|
eZ Publish
[4.0]
|
00001 <?php 00002 // 00003 // Definition of eZCharsetInfo class 00004 // 00005 // Created on: <10-Jul-2002 16:44:29 amos> 00006 // 00007 // ## BEGIN COPYRIGHT, LICENSE AND WARRANTY NOTICE ## 00008 // SOFTWARE NAME: eZ Publish 00009 // SOFTWARE RELEASE: 4.0.x 00010 // COPYRIGHT NOTICE: Copyright (C) 1999-2008 eZ Systems AS 00011 // SOFTWARE LICENSE: GNU General Public License v2.0 00012 // NOTICE: > 00013 // This program is free software; you can redistribute it and/or 00014 // modify it under the terms of version 2.0 of the GNU General 00015 // Public License as published by the Free Software Foundation. 00016 // 00017 // This program is distributed in the hope that it will be useful, 00018 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00019 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00020 // GNU General Public License for more details. 00021 // 00022 // You should have received a copy of version 2.0 of the GNU General 00023 // Public License along with this program; if not, write to the Free 00024 // Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 00025 // MA 02110-1301, USA. 00026 // 00027 // 00028 // ## END COPYRIGHT, LICENSE AND WARRANTY NOTICE ## 00029 // 00030 00031 /*! \file ezcharsetinfo.php 00032 Provides information on charset. 00033 */ 00034 00035 /*! 00036 \class eZCharsetInfo ezcharsetinfo.php 00037 \ingroup eZI18N 00038 \brief Allows for quering information about charsets 00039 00040 A charset can be known by multiple names but the internationlization 00041 system only works with one name. To fetch the real internal name use 00042 the static realCharsetCode() function. 00043 Each charset also has a specific encoding scheme associated with it 00044 which can be fetched with characterEncodingScheme(). 00045 00046 */ 00047 00048 class eZCharsetInfo 00049 { 00050 /*! 00051 \private 00052 \static 00053 \return the hash table with aliases, creates if it doesn't already exist. 00054 */ 00055 static function &aliasTable() 00056 { 00057 $aliasTable =& $GLOBALS['eZCharsetInfoTable']; 00058 if ( !is_array( $aliasTable ) ) 00059 { 00060 $aliasTable = array( 'ascii' => 'us-ascii', 00061 'latin1' => 'iso-8859-1', 00062 'latin2' => 'iso-8859-2', 00063 'latin3' => 'iso-8859-3', 00064 'latin4' => 'iso-8859-4', 00065 'latin5' => 'iso-8859-9', 00066 'latin6' => 'iso-8859-10', 00067 'latin7' => 'iso-8859-13', 00068 'latin8' => 'iso-8859-14', 00069 'latin9' => 'iso-8859-15', 00070 'cyrillic' => 'iso-8859-5', 00071 'arabic' => 'iso-8859-6', 00072 'greek' => 'iso-8859-7', 00073 'hebrew' => 'iso-8859-8', 00074 'thai' => 'iso-8859-11', 00075 00076 'koi8-r' => 'koi8-r', 00077 'koi-8-r' => 'koi8-r', 00078 'koi8r' => 'koi8-r', 00079 00080 'koi8-u' => 'koi8-u', 00081 'koi-8-u' => 'koi8-u', 00082 'koi8u' => 'koi8-u', 00083 00084 'cp1250' => 'windows-1250', 00085 'cp1251' => 'windows-1251', 00086 'cp1252' => 'windows-1252', 00087 'cp1253' => 'windows-1253', 00088 'cp1254' => 'windows-1254', 00089 'cp1255' => 'windows-1255', 00090 'cp1256' => 'windows-1256', 00091 'cp1257' => 'windows-1257', 00092 'cp1258' => 'windows-1258', 00093 'winlatin1' => 'windows-1252', 00094 'winlatin2' => 'windows-1250', 00095 'wincyrillic' => 'windows-1251', 00096 'wingreek' => 'windows-1253', 00097 'winturkish' => 'windows-1254', 00098 'winhebrew' => 'windows-1255', 00099 'winarabic' => 'windows-1256', 00100 'winbaltic' => 'windows-1257', 00101 'winvietnamese' => 'windows-1258', 00102 00103 'doslatinus' => 'cp437', 00104 'dosgreek' => 'cp737', 00105 'dosbaltrim' => 'cp775', 00106 'doslatin1' => 'cp850', 00107 'doslatin2' => 'cp852', 00108 'doscyrillic' => 'cp855', 00109 'dosturkish' => 'cp857', 00110 'dosportuguese' => 'cp860', 00111 'dosicelandic' => 'cp861', 00112 'doshebrew' => 'cp862', 00113 'doscanadaf' => 'cp863', 00114 'dosarabic' => 'cp864', 00115 'dosnordic' => 'cp865', 00116 'dosgreek2' => 'cp869', 00117 'doscyrillicrussian' => 'cp866', 00118 'dosthai' => 'cp874', 00119 00120 'macroman' => 'macintosh', 00121 'nextstep' => 'next', 00122 00123 'utf8' => 'utf-8', 00124 'utf7' => 'utf-7', 00125 00126 'utf16' => 'utf-16', 00127 'utf16be' => 'utf-16be', 00128 'utf16le' => 'utf-16le', 00129 00130 'utf32' => 'utf-32', 00131 'utf32be' => 'utf-32be', 00132 'utf32le' => 'utf-32le', 00133 00134 'ucs2le' => 'ucs-2le', 00135 00136 'ucs4' => 'ucs-4', 00137 'ucs4be' => 'ucs-4be', 00138 'ucs4le' => 'ucs-4le', 00139 00140 'ucs2' => 'ucs-2', 00141 'ucs2be' => 'ucs-2be', 00142 'ucs2le' => 'ucs-2le', 00143 00144 'shift-jis' => 'cp932', 00145 'gbk' => 'gbk', 00146 'euc-cn' => 'euc-cn', 00147 'unifiedhangul' => 'cp849', 00148 'uhc' => 'cp849', 00149 'big5' => 'cp850' 00150 ); 00151 for ( $i = 1; $i <= 15; ++$i ) 00152 { 00153 $aliasTable["iso8859-$i"] = "iso-8859-$i"; 00154 $aliasTable["iso8859$i"] = "iso-8859-$i"; 00155 } 00156 $aliasTable['unicode'] = 'unicode'; 00157 } 00158 return $aliasTable; 00159 } 00160 00161 /*! 00162 \private 00163 \static 00164 \return the character encoding hash table, creates it if it does not exist. 00165 The table will map from a character encoding scheme to an array of character sets. 00166 \sa reverseEncodingTable 00167 */ 00168 static function &encodingTable() 00169 { 00170 $encodingTable =& $GLOBALS['eZCharsetInfoEncodingTable']; 00171 if ( !is_array( $encodingTable ) ) 00172 { 00173 $encodingTable = array( 'doublebyte' => array( 'cp932', 00174 'GBK', 00175 'euc-cn', 00176 'cp849', 00177 'cp850' ), 00178 'unicode' => array( 'unicode' ), 00179 'utf-8' => array( 'utf-8' ) ); 00180 } 00181 return $encodingTable; 00182 } 00183 00184 /*! 00185 \private 00186 \static 00187 \return the reverse character encoding hash table, creates it if it does not exist. 00188 The table will map from a character set to a character encoding scheme. 00189 \sa encodingTable 00190 */ 00191 static function &reverseEncodingTable() 00192 { 00193 $reverseEncodingTable =& $GLOBALS['eZCharsetInfoReverseEncodingTable']; 00194 if ( !is_array( $reverseEncodingTable ) ) 00195 { 00196 $encodingTable =& eZCharsetInfo::encodingTable(); 00197 $reverseEncodingTable = array(); 00198 foreach( $encodingTable as $encodingScheme => $charsetMatches ) 00199 { 00200 foreach( $charsetMatches as $charsetMatch ) 00201 $reverseEncodingTable[$charsetMatch] = $encodingScheme; 00202 } 00203 } 00204 return $reverseEncodingTable; 00205 } 00206 00207 /*! 00208 Tries to find an alias for the charset code and returns it. If no 00209 alias code could be find the original charset code is returned. 00210 \note The resulting charset code will be an all lowercase letters. 00211 */ 00212 static function realCharsetCode( $charsetCode ) 00213 { 00214 $aliasTable =& eZCharsetInfo::aliasTable(); 00215 $charsetCode = strtolower( $charsetCode ); 00216 if ( isset( $aliasTable[$charsetCode] ) ) 00217 return $aliasTable[$charsetCode]; 00218 // Check alias without any dashes 00219 $charsetCodeNoDash = str_replace( '-', '', $charsetCode ); 00220 if ( isset( $aliasTable[$charsetCodeNoDash] ) ) 00221 return $aliasTable[$charsetCodeNoDash]; 00222 return $charsetCode; 00223 } 00224 00225 /*! 00226 Tries to figure out the character encoding scheme for the given character set. 00227 It uses realCharsetCode() to get the correct internal charset so any charset 00228 can be given to this function. 00229 Either returns the found encoding scheme or 'singlebyte' if no scheme was found. 00230 \sa realCharsetCode 00231 */ 00232 static function characterEncodingScheme( $charsetCode, $isRealCharset = false ) 00233 { 00234 if ( !$isRealCharset ) 00235 $charsetCode = eZCharsetInfo::realCharsetCode( $charsetCode ); 00236 $reverseEncodingTable =& eZCharsetInfo::reverseEncodingTable(); 00237 if ( isset( $reverseEncodingTable[$charsetCode] ) ) 00238 return $reverseEncodingTable[$charsetCode]; 00239 return 'singlebyte'; 00240 } 00241 } 00242 00243 ?>