eZ Publish  [trunk]
ezmbstringmapper.php
Go to the documentation of this file.
00001 <?php
00002 /**
00003  * File containing the eZMBStringMapper class.
00004  *
00005  * @copyright Copyright (C) 1999-2012 eZ Systems AS. All rights reserved.
00006  * @license http://www.gnu.org/licenses/gpl-2.0.txt GNU General Public License v2
00007  * @version //autogentag//
00008  * @package lib
00009  */
00010 
00011 /*!
00012   \class eZMBStringMapper ezmbstringmapper.php
00013   \ingroup eZI18N
00014   \brief The class eZMBStringMapper does
00015 
00016   The mbstring extension supports the following charset:
00017   UCS-4, UCS-4BE, UCS-4LE, UCS-2, UCS-2BE, UCS-2LE, UTF-32, UTF-32BE, UTF-32LE, UCS-2LE, UTF-16,
00018   UTF-16BE, UTF-16LE, UTF-8, UTF-7, ASCII, EUC-JP, SJIS, eucJP-win, SJIS-win, ISO-2022-JP, JIS,
00019   ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6, ISO-8859-7, ISO-8859-8,
00020   ISO-8859-9, ISO-8859-10, ISO-8859-13, ISO-8859-14, ISO-8859-15, byte2be, byte2le, byte4be,
00021   byte4le, BASE64, 7bit, 8bit and UTF7-IMAP.
00022 */
00023 
00024 class eZMBStringMapper
00025 {
00026     /*!
00027      Constructor
00028     */
00029     function eZMBStringMapper( $input_charset_code, $output_charset_code )
00030     {
00031         $this->RequestedInputCharsetCode = $input_charset_code;
00032         $this->InputCharsetCode = eZCharsetInfo::realCharsetCode( $input_charset_code );
00033         $this->RequestedOutputCharsetCode = $output_charset_code;
00034         $this->OutputCharsetCode = eZCharsetInfo::realCharsetCode( $output_charset_code );
00035         $this->Valid = false;
00036         if ( !$this->isCharsetSupported( $input_charset_code ) )
00037         {
00038             eZDebug::writeError( "Input charset $input_charset_code not supported", "eZMBStringMapper" );
00039         }
00040         else if ( !$this->isCharsetSupported( $output_charset_code ) )
00041         {
00042             eZDebug::writeError( "Output charset $output_charset_code not supported", "eZMBStringMapper" );
00043         }
00044         else if ( $this->hasMBStringExtension() )
00045             $this->Valid = true;
00046         else
00047             eZDebug::writeError( "No mbstring functions available", "eZMBStringMapper" );
00048     }
00049 
00050     /*!
00051      \static
00052      \note This function is duplicated in eZTextCodec::eZTextCodec(), remember to update both places.
00053     */
00054     static function &charsetList()
00055     {
00056         $charsets =& $GLOBALS["eZMBCharsetList"];
00057         if ( !is_array( $charsets ) )
00058         {
00059             $charsetList = array( "ucs-4", "ucs-4be", "ucs-4le", "ucs-2", "ucs-2be", "ucs-2le", "utf-32", "utf-32be", "utf-32le", "utf-16",
00060                                   "utf-16be", "utf-16le", "utf-8", "utf-7", "ascii", "euc-jp", "sjis", "eucjp-win", "sjis-win", "iso-2022-jp", "jis",
00061                                   "iso-8859-1", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8",
00062                                   "iso-8859-9", "iso-8859-10", "iso-8859-13", "iso-8859-14", "iso-8859-15", "byte2be", "byte2le", "byte4be",
00063                                   "byte4le", "base64", "7bit", "8bit", "utf7-imap" );
00064             $charsets = array();
00065             foreach ( $charsetList as $charset )
00066             {
00067                 $charsets[$charset] = $charset;
00068             }
00069         }
00070         return $charsets;
00071     }
00072 
00073     /*!
00074      \static
00075      \return \c true if the mbstring can be used.
00076      \note The following function must be present for the function to return \c true.
00077            mb_convert_encoding
00078            mb_substitute_character
00079            mb_strcut
00080            mb_strlen
00081            mb_strpos
00082            mb_strrpos
00083            mb_strwidth
00084            mb_substr
00085      \note This function is duplicated in eZTextCodec::eZTextCodec(), remember to update both places.
00086     */
00087     static function hasMBStringExtension()
00088     {
00089         return ( function_exists( "mb_convert_encoding" ) and
00090                  function_exists( "mb_substitute_character" ) and
00091                  function_exists( "mb_strcut" ) and
00092                  function_exists( "mb_strlen" ) and
00093                  function_exists( "mb_strpos" ) and
00094                  function_exists( "mb_strrpos" ) and
00095                  function_exists( "mb_strwidth" ) and
00096                  function_exists( "mb_substr" ) );
00097     }
00098 
00099     function inputCharsetCode()
00100     {
00101         return $this->InputCharsetCode;
00102     }
00103 
00104     function outputCharsetCode()
00105     {
00106         return $this->OutputCharsetCode;
00107     }
00108 
00109     function requestedInputCharsetCode()
00110     {
00111         return $this->RequestedInputCharsetCode;
00112     }
00113 
00114     function requestedOutputCharsetCode()
00115     {
00116         return $this->RequestedOutputCharsetCode;
00117     }
00118 
00119     function isCharsetSupported( $charset_code )
00120     {
00121         $charset_code = eZCharsetInfo::realCharsetCode( $charset_code );
00122         return in_array( $charset_code, eZMBStringMapper::charsetList() );
00123     }
00124 
00125     function substituteCharacter()
00126     {
00127         if ( !$this->Valid )
00128             return null;
00129         return mb_substitute_character();
00130     }
00131 
00132     function setSubstituteCharacter( $char )
00133     {
00134         if ( $this->Valid )
00135             mb_substitute_character( $char );
00136     }
00137 
00138     function convertString( $str )
00139     {
00140         if ( !$this->Valid )
00141             return $str;
00142         return mb_convert_encoding( $str, $this->OutputCharsetCode, $this->InputCharsetCode );
00143     }
00144 
00145     function strlen( $str )
00146     {
00147         return mb_strlen( $str, $this->InputCharsetCode );
00148     }
00149 
00150     function strpos( $haystack, $needle, $offset = 0 )
00151     {
00152         return mb_strpos( $haystack, $needle, $offset, $this->InputCharsetCode );
00153     }
00154 
00155     function strrpos( $haystack, $needle )
00156     {
00157         return mb_strrpos( $haystack, $needle, $this->InputCharsetCode );
00158     }
00159 
00160     function substr( $str, $start, $length )
00161     {
00162         return mb_substr( $str, $start, $length, $this->InputCharsetCode );
00163     }
00164 
00165     /**
00166      * Returns a shared instance of the eZMBStringMapper pr the $input_charset_code
00167      * and $output_charset_code params.
00168      *
00169      * @param string $input_charset_code
00170      * @param string $output_charset_code
00171      * @return eZMBStringMapper
00172      */
00173     static function instance( $input_charset_code, $output_charset_code )
00174     {
00175         $globalsKey = "eZMBStringMapper-$input_charset_code-$output_charset_code";
00176 
00177         if ( !isset( $GLOBALS[$globalsKey] ) ||
00178              !( $GLOBALS[$globalsKey] instanceof eZMBStringMapper ) )
00179         {
00180             $GLOBALS[$globalsKey] = new eZMBStringMapper( $input_charset_code, $output_charset_code );
00181         }
00182 
00183         return $GLOBALS[$globalsKey];
00184     }
00185 }
00186 
00187 ?>