eZ Publish  [4.0]
ezmbstringmapper.php
Go to the documentation of this file.
00001 <?php
00002 //
00003 // Definition of eZMBStringMapper class
00004 //
00005 // Created on: <12-Jul-2002 12:56:48 amos>
00006 //
00007 // ## BEGIN COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00008 // SOFTWARE NAME: eZ Publish
00009 // SOFTWARE RELEASE: 4.0.x
00010 // COPYRIGHT NOTICE: Copyright (C) 1999-2008 eZ Systems AS
00011 // SOFTWARE LICENSE: GNU General Public License v2.0
00012 // NOTICE: >
00013 //   This program is free software; you can redistribute it and/or
00014 //   modify it under the terms of version 2.0  of the GNU General
00015 //   Public License as published by the Free Software Foundation.
00016 //
00017 //   This program is distributed in the hope that it will be useful,
00018 //   but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 //   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020 //   GNU General Public License for more details.
00021 //
00022 //   You should have received a copy of version 2.0 of the GNU General
00023 //   Public License along with this program; if not, write to the Free
00024 //   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00025 //   MA 02110-1301, USA.
00026 //
00027 //
00028 // ## END COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00029 //
00030 
00031 /*! \file ezmbstringmapper.php
00032 */
00033 
00034 /*!
00035   \class eZMBStringMapper ezmbstringmapper.php
00036   \ingroup eZI18N
00037   \brief The class eZMBStringMapper does
00038 
00039   The mbstring extension supports the following charset:
00040   UCS-4, UCS-4BE, UCS-4LE, UCS-2, UCS-2BE, UCS-2LE, UTF-32, UTF-32BE, UTF-32LE, UCS-2LE, UTF-16,
00041   UTF-16BE, UTF-16LE, UTF-8, UTF-7, ASCII, EUC-JP, SJIS, eucJP-win, SJIS-win, ISO-2022-JP, JIS,
00042   ISO-8859-1, ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6, ISO-8859-7, ISO-8859-8,
00043   ISO-8859-9, ISO-8859-10, ISO-8859-13, ISO-8859-14, ISO-8859-15, byte2be, byte2le, byte4be,
00044   byte4le, BASE64, 7bit, 8bit and UTF7-IMAP.
00045 */
00046 
00047 //include_once( "lib/ezi18n/classes/ezcharsetinfo.php" );
00048 
00049 class eZMBStringMapper
00050 {
00051     /*!
00052      Constructor
00053     */
00054     function eZMBStringMapper( $input_charset_code, $output_charset_code )
00055     {
00056         $this->RequestedInputCharsetCode = $input_charset_code;
00057         $this->InputCharsetCode = eZCharsetInfo::realCharsetCode( $input_charset_code );
00058         $this->RequestedOutputCharsetCode = $output_charset_code;
00059         $this->OutputCharsetCode = eZCharsetInfo::realCharsetCode( $output_charset_code );
00060         $this->Valid = false;
00061         if ( !$this->isCharsetSupported( $input_charset_code ) )
00062         {
00063             eZDebug::writeError( "Input charset $input_charset_code not supported", "eZMBStringMapper" );
00064         }
00065         else if ( !$this->isCharsetSupported( $output_charset_code ) )
00066         {
00067             eZDebug::writeError( "Output charset $output_charset_code not supported", "eZMBStringMapper" );
00068         }
00069         else if ( $this->hasMBStringExtension() )
00070             $this->Valid = true;
00071         else
00072             eZDebug::writeError( "No mbstring functions available", "eZMBStringMapper" );
00073     }
00074 
00075     /*!
00076      \static
00077      \note This function is duplicated in eZTextCodec::eZTextCodec(), remember to update both places.
00078     */
00079     static function &charsetList()
00080     {
00081         $charsets =& $GLOBALS["eZMBCharsetList"];
00082         if ( !is_array( $charsets ) )
00083         {
00084             $charsetList = array( "ucs-4", "ucs-4be", "ucs-4le", "ucs-2", "ucs-2be", "ucs-2le", "utf-32", "utf-32be", "utf-32le", "utf-16",
00085                                   "utf-16be", "utf-16le", "utf-8", "utf-7", "ascii", "euc-jp", "sjis", "eucjp-win", "sjis-win", "iso-2022-jp", "jis",
00086                                   "iso-8859-1", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8",
00087                                   "iso-8859-9", "iso-8859-10", "iso-8859-13", "iso-8859-14", "iso-8859-15", "byte2be", "byte2le", "byte4be",
00088                                   "byte4le", "base64", "7bit", "8bit", "utf7-imap" );
00089             $charsets = array();
00090             foreach ( $charsetList as $charset )
00091             {
00092                 $charsets[$charset] = $charset;
00093             }
00094         }
00095         return $charsets;
00096     }
00097 
00098     /*!
00099      \static
00100      \return \c true if the mbstring can be used.
00101      \note The following function must be present for the function to return \c true.
00102            mb_convert_encoding
00103            mb_substitute_character
00104            mb_strcut
00105            mb_strlen
00106            mb_strpos
00107            mb_strrpos
00108            mb_strwidth
00109            mb_substr
00110      \note This function is duplicated in eZTextCodec::eZTextCodec(), remember to update both places.
00111     */
00112     static function hasMBStringExtension()
00113     {
00114         return ( function_exists( "mb_convert_encoding" ) and
00115                  function_exists( "mb_substitute_character" ) and
00116                  function_exists( "mb_strcut" ) and
00117                  function_exists( "mb_strlen" ) and
00118                  function_exists( "mb_strpos" ) and
00119                  function_exists( "mb_strrpos" ) and
00120                  function_exists( "mb_strwidth" ) and
00121                  function_exists( "mb_substr" ) );
00122     }
00123 
00124     function inputCharsetCode()
00125     {
00126         return $this->InputCharsetCode;
00127     }
00128 
00129     function outputCharsetCode()
00130     {
00131         return $this->OutputCharsetCode;
00132     }
00133 
00134     function requestedInputCharsetCode()
00135     {
00136         return $this->RequestedInputCharsetCode;
00137     }
00138 
00139     function requestedOutputCharsetCode()
00140     {
00141         return $this->RequestedOutputCharsetCode;
00142     }
00143 
00144     function isCharsetSupported( $charset_code )
00145     {
00146         $charset_code = eZCharsetInfo::realCharsetCode( $charset_code );
00147         return in_array( $charset_code, eZMBStringMapper::charsetList() );
00148     }
00149 
00150     function substituteCharacter()
00151     {
00152         if ( !$this->Valid )
00153             return null;
00154         return mb_substitute_character();
00155     }
00156 
00157     function setSubstituteCharacter( $char )
00158     {
00159         if ( $this->Valid )
00160             mb_substitute_character( $char );
00161     }
00162 
00163     function convertString( $str )
00164     {
00165         if ( !$this->Valid )
00166             return $str;
00167         return mb_convert_encoding( $str, $this->OutputCharsetCode, $this->InputCharsetCode );
00168     }
00169 
00170     function strlen( $str )
00171     {
00172         return mb_strlen( $str, $this->InputCharsetCode );
00173     }
00174 
00175     function strpos( $haystack, $needle, $offset = 0 )
00176     {
00177         return mb_strpos( $haystack, $needle, $offset, $this->InputCharsetCode );
00178     }
00179 
00180     function strrpos( $haystack, $needle )
00181     {
00182         return mb_strrpos( $haystack, $needle, $this->InputCharsetCode );
00183     }
00184 
00185     function substr( $str, $start, $length )
00186     {
00187         return mb_substr( $str, $start, $length, $this->InputCharsetCode );
00188     }
00189 
00190     static function instance( $input_charset_code, $output_charset_code )
00191     {
00192         $globalsKey = "eZMBStringMapper-$input_charset_code-$output_charset_code";
00193 
00194         if ( !isset( $GLOBALS[$globalsKey] ) ||
00195              !( $GLOBALS[$globalsKey] instanceof eZMBStringMapper ) )
00196         {
00197             $GLOBALS[$globalsKey] = new eZMBStringMapper( $input_charset_code, $output_charset_code );
00198         }
00199 
00200         return $GLOBALS[$globalsKey];
00201     }
00202 }
00203 
00204 ?>