eZ Publish  [4.0]
ezcharsetinfo.php
Go to the documentation of this file.
00001 <?php
00002 //
00003 // Definition of eZCharsetInfo class
00004 //
00005 // Created on: <10-Jul-2002 16:44:29 amos>
00006 //
00007 // ## BEGIN COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00008 // SOFTWARE NAME: eZ Publish
00009 // SOFTWARE RELEASE: 4.0.x
00010 // COPYRIGHT NOTICE: Copyright (C) 1999-2008 eZ Systems AS
00011 // SOFTWARE LICENSE: GNU General Public License v2.0
00012 // NOTICE: >
00013 //   This program is free software; you can redistribute it and/or
00014 //   modify it under the terms of version 2.0  of the GNU General
00015 //   Public License as published by the Free Software Foundation.
00016 //
00017 //   This program is distributed in the hope that it will be useful,
00018 //   but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 //   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020 //   GNU General Public License for more details.
00021 //
00022 //   You should have received a copy of version 2.0 of the GNU General
00023 //   Public License along with this program; if not, write to the Free
00024 //   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00025 //   MA 02110-1301, USA.
00026 //
00027 //
00028 // ## END COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00029 //
00030 
00031 /*! \file ezcharsetinfo.php
00032   Provides information on charset.
00033 */
00034 
00035 /*!
00036   \class eZCharsetInfo ezcharsetinfo.php
00037   \ingroup eZI18N
00038   \brief Allows for quering information about charsets
00039 
00040   A charset can be known by multiple names but the internationlization
00041   system only works with one name. To fetch the real internal name use
00042   the static realCharsetCode() function.
00043   Each charset also has a specific encoding scheme associated with it
00044   which can be fetched with characterEncodingScheme().
00045 
00046 */
00047 
00048 class eZCharsetInfo
00049 {
00050     /*!
00051      \private
00052      \static
00053      \return the hash table with aliases, creates if it doesn't already exist.
00054     */
00055     static function &aliasTable()
00056     {
00057         $aliasTable =& $GLOBALS['eZCharsetInfoTable'];
00058         if ( !is_array( $aliasTable ) )
00059         {
00060             $aliasTable = array( 'ascii' => 'us-ascii',
00061                                  'latin1' => 'iso-8859-1',
00062                                  'latin2' => 'iso-8859-2',
00063                                  'latin3' => 'iso-8859-3',
00064                                  'latin4' => 'iso-8859-4',
00065                                  'latin5' => 'iso-8859-9',
00066                                  'latin6' => 'iso-8859-10',
00067                                  'latin7' => 'iso-8859-13',
00068                                  'latin8' => 'iso-8859-14',
00069                                  'latin9' => 'iso-8859-15',
00070                                  'cyrillic' => 'iso-8859-5',
00071                                  'arabic' => 'iso-8859-6',
00072                                  'greek' => 'iso-8859-7',
00073                                  'hebrew' => 'iso-8859-8',
00074                                  'thai' => 'iso-8859-11',
00075 
00076                                  'koi8-r' => 'koi8-r',
00077                                  'koi-8-r' => 'koi8-r',
00078                                  'koi8r' => 'koi8-r',
00079 
00080                                  'koi8-u' => 'koi8-u',
00081                                  'koi-8-u' => 'koi8-u',
00082                                  'koi8u' => 'koi8-u',
00083 
00084                                  'cp1250' => 'windows-1250',
00085                                  'cp1251' => 'windows-1251',
00086                                  'cp1252' => 'windows-1252',
00087                                  'cp1253' => 'windows-1253',
00088                                  'cp1254' => 'windows-1254',
00089                                  'cp1255' => 'windows-1255',
00090                                  'cp1256' => 'windows-1256',
00091                                  'cp1257' => 'windows-1257',
00092                                  'cp1258' => 'windows-1258',
00093                                  'winlatin1' => 'windows-1252',
00094                                  'winlatin2' => 'windows-1250',
00095                                  'wincyrillic' => 'windows-1251',
00096                                  'wingreek' => 'windows-1253',
00097                                  'winturkish' => 'windows-1254',
00098                                  'winhebrew' => 'windows-1255',
00099                                  'winarabic' => 'windows-1256',
00100                                  'winbaltic' => 'windows-1257',
00101                                  'winvietnamese' => 'windows-1258',
00102 
00103                                  'doslatinus' => 'cp437',
00104                                  'dosgreek' => 'cp737',
00105                                  'dosbaltrim' => 'cp775',
00106                                  'doslatin1' => 'cp850',
00107                                  'doslatin2' => 'cp852',
00108                                  'doscyrillic' => 'cp855',
00109                                  'dosturkish' => 'cp857',
00110                                  'dosportuguese' => 'cp860',
00111                                  'dosicelandic' => 'cp861',
00112                                  'doshebrew' => 'cp862',
00113                                  'doscanadaf' => 'cp863',
00114                                  'dosarabic' => 'cp864',
00115                                  'dosnordic' => 'cp865',
00116                                  'dosgreek2' => 'cp869',
00117                                  'doscyrillicrussian' => 'cp866',
00118                                  'dosthai' => 'cp874',
00119 
00120                                  'macroman' => 'macintosh',
00121                                  'nextstep' => 'next',
00122 
00123                                  'utf8' => 'utf-8',
00124                                  'utf7' => 'utf-7',
00125 
00126                                  'utf16' => 'utf-16',
00127                                  'utf16be' => 'utf-16be',
00128                                  'utf16le' => 'utf-16le',
00129 
00130                                  'utf32' => 'utf-32',
00131                                  'utf32be' => 'utf-32be',
00132                                  'utf32le' => 'utf-32le',
00133 
00134                                  'ucs2le' => 'ucs-2le',
00135 
00136                                  'ucs4' => 'ucs-4',
00137                                  'ucs4be' => 'ucs-4be',
00138                                  'ucs4le' => 'ucs-4le',
00139 
00140                                  'ucs2' => 'ucs-2',
00141                                  'ucs2be' => 'ucs-2be',
00142                                  'ucs2le' => 'ucs-2le',
00143 
00144                                  'shift-jis' => 'cp932',
00145                                  'gbk' => 'gbk',
00146                                  'euc-cn' => 'euc-cn',
00147                                  'unifiedhangul' => 'cp849',
00148                                  'uhc' => 'cp849',
00149                                  'big5' => 'cp850'
00150                                  );
00151             for ( $i = 1; $i <= 15; ++$i )
00152             {
00153                 $aliasTable["iso8859-$i"] = "iso-8859-$i";
00154                 $aliasTable["iso8859$i"] = "iso-8859-$i";
00155             }
00156             $aliasTable['unicode'] = 'unicode';
00157         }
00158         return $aliasTable;
00159     }
00160 
00161     /*!
00162      \private
00163      \static
00164      \return the character encoding hash table, creates it if it does not exist.
00165      The table will map from a character encoding scheme to an array of character sets.
00166      \sa reverseEncodingTable
00167     */
00168     static function &encodingTable()
00169     {
00170         $encodingTable =& $GLOBALS['eZCharsetInfoEncodingTable'];
00171         if ( !is_array( $encodingTable ) )
00172         {
00173             $encodingTable = array( 'doublebyte' => array( 'cp932',
00174                                                            'GBK',
00175                                                            'euc-cn',
00176                                                            'cp849',
00177                                                            'cp850' ),
00178                                     'unicode' => array( 'unicode' ),
00179                                     'utf-8' => array( 'utf-8' ) );
00180         }
00181         return $encodingTable;
00182     }
00183 
00184     /*!
00185      \private
00186      \static
00187      \return the reverse character encoding hash table, creates it if it does not exist.
00188      The table will map from a character set to a character encoding scheme.
00189      \sa encodingTable
00190     */
00191     static function &reverseEncodingTable()
00192     {
00193         $reverseEncodingTable =& $GLOBALS['eZCharsetInfoReverseEncodingTable'];
00194         if ( !is_array( $reverseEncodingTable ) )
00195         {
00196             $encodingTable =& eZCharsetInfo::encodingTable();
00197             $reverseEncodingTable = array();
00198             foreach( $encodingTable as $encodingScheme => $charsetMatches )
00199             {
00200                 foreach( $charsetMatches as $charsetMatch )
00201                     $reverseEncodingTable[$charsetMatch] = $encodingScheme;
00202             }
00203         }
00204         return $reverseEncodingTable;
00205     }
00206 
00207     /*!
00208      Tries to find an alias for the charset code and returns it. If no
00209      alias code could be find the original charset code is returned.
00210      \note The resulting charset code will be an all lowercase letters.
00211     */
00212     static function realCharsetCode( $charsetCode )
00213     {
00214         $aliasTable =& eZCharsetInfo::aliasTable();
00215         $charsetCode = strtolower( $charsetCode );
00216         if ( isset( $aliasTable[$charsetCode] ) )
00217             return $aliasTable[$charsetCode];
00218         // Check alias without any dashes
00219         $charsetCodeNoDash = str_replace( '-', '', $charsetCode );
00220         if ( isset( $aliasTable[$charsetCodeNoDash] ) )
00221             return $aliasTable[$charsetCodeNoDash];
00222         return $charsetCode;
00223     }
00224 
00225     /*!
00226      Tries to figure out the character encoding scheme for the given character set.
00227      It uses realCharsetCode() to get the correct internal charset so any charset
00228      can be given to this function.
00229      Either returns the found encoding scheme or 'singlebyte' if no scheme was found.
00230      \sa realCharsetCode
00231     */
00232     static function characterEncodingScheme( $charsetCode, $isRealCharset = false )
00233     {
00234         if ( !$isRealCharset )
00235             $charsetCode = eZCharsetInfo::realCharsetCode( $charsetCode );
00236         $reverseEncodingTable =& eZCharsetInfo::reverseEncodingTable();
00237         if ( isset( $reverseEncodingTable[$charsetCode] ) )
00238             return $reverseEncodingTable[$charsetCode];
00239         return 'singlebyte';
00240     }
00241 }
00242 
00243 ?>