00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 class eZTextCodec
00041 {
00042
00043
00044 function eZTextCodec( $inputCharsetCode, $outputCharsetCode,
00045 $realInputCharsetCode, $realOutputCharsetCode,
00046 $inputEncoding, $outputEncoding )
00047 {
00048 include_once( "lib/ezi18n/classes/ezcharsetinfo.php" );
00049 $this->RequestedInputCharsetCode = $inputCharsetCode;
00050 $this->RequestedOutputCharsetCode = $outputCharsetCode;
00051 $this->InputCharsetCode = $realInputCharsetCode;
00052 $this->OutputCharsetCode = $realOutputCharsetCode;
00053 $this->InputCharacterEncodingScheme = $inputEncoding;
00054 $this->OutputCharacterEncodingScheme = $outputEncoding;
00055
00056 $useMBStringExtension = true;
00057 if ( isset( $GLOBALS['eZTextCodecMBStringExtension'] ) )
00058 $useMBStringExtension = $GLOBALS['eZTextCodecMBStringExtension'];
00059
00060
00061
00062
00063 $hasMBString = ( function_exists( "mb_convert_encoding" ) and
00064 function_exists( "mb_substitute_character" ) and
00065 function_exists( "mb_strcut" ) and
00066 function_exists( "mb_strlen" ) and
00067 function_exists( "mb_strpos" ) and
00068 function_exists( "mb_strrpos" ) and
00069 function_exists( "mb_strwidth" ) and
00070 function_exists( "mb_substr" ) );
00071
00072 $useMBString = ( $useMBStringExtension and
00073 eZTextCodec::useMBString() and
00074 $hasMBString );
00075
00076
00077 $encodingConvertMap = array();
00078 $encodingConvertInitMap = array();
00079 $encodingStrlenMap = array();
00080
00081 $encodingStrlenMap['unicode'] = 'strlenUnicode';
00082 $encodingStrlenMap['utf-8'] = 'strlenUTF8';
00083 $encodingStrlenMap['singlebyte'] = 'strlenCodepage';
00084 $encodingStrlenMap['doublebyte'] = 'strlenCodepage';
00085
00086
00087 $encodingConvertMap['unicode']['unicode'] = 'convertNone';
00088 $encodingConvertMap['unicode']['utf-8'] = 'convertUnicodeToUTF8';
00089 $encodingConvertMap['unicode']['singlebyte'] = 'convertUnicodeToCodepage';
00090 $encodingConvertMap['unicode']['doublebyte'] = 'convertUnicodeToCodepage';
00091
00092 $encodingConvertInitMap['unicode']['singlebyte'] = 'initializeOutputCodepage';
00093 $encodingConvertInitMap['unicode']['doublebyte'] = 'initializeOutputCodepage';
00094
00095
00096 $encodingConvertMap['utf-8']['unicode'] = 'convertUTF8ToUnicode';
00097 $encodingConvertMap['utf-8']['utf-8'] = 'convertNone';
00098 $encodingConvertMap['utf-8']['singlebyte'] = 'convertCodepageRev';
00099 $encodingConvertMap['utf-8']['doublebyte'] = 'convertCodepageRev';
00100
00101 $encodingConvertInitMap['utf-8']['singlebyte'] = 'initializeOutputCodepage';
00102 $encodingConvertInitMap['utf-8']['doublebyte'] = 'initializeOutputCodepage';
00103
00104
00105 $encodingConvertMap['singlebyte']['unicode'] = 'convertCodepageToUnicode';
00106 $encodingConvertMap['singlebyte']['utf-8'] = 'convertCodepage';
00107 $encodingConvertMap['singlebyte']['singlebyte'] = 'convertCodepageMapper';
00108 $encodingConvertMap['singlebyte']['doublebyte'] = 'convertCodepageMapper';
00109
00110 $encodingConvertInitMap['singlebyte']['unicode'] = 'initializeInputCodepage';
00111 $encodingConvertInitMap['singlebyte']['utf-8'] = 'initializeInputCodepage';
00112 $encodingConvertInitMap['singlebyte']['singlebyte'] = 'initializeCodepageMapper';
00113 $encodingConvertInitMap['singlebyte']['doublebyte'] = 'initializeCodepageMapper';
00114
00115
00116 $encodingConvertMap['doublebyte']['unicode'] = 'convertCodepageToUnicode';
00117 $encodingConvertMap['doublebyte']['utf-8'] = 'convertCodepage';
00118 $encodingConvertMap['doublebyte']['singlebyte'] = 'convertCodepageMapper';
00119 $encodingConvertMap['doublebyte']['doublebyte'] = 'convertCodepageMapper';
00120
00121 $encodingConvertInitMap['doublebyte']['unicode'] = 'initializeInputCodepage';
00122 $encodingConvertInitMap['doublebyte']['utf-8'] = 'initializeInputCodepage';
00123 $encodingConvertInitMap['doublebyte']['singlebyte'] = 'initializeCodepageMapper';
00124 $encodingConvertInitMap['doublebyte']['doublebyte'] = 'convertCodepageMapper';
00125
00126
00127 $noneConversionFunction = 'convertNone';
00128 $noneStrlenFunction = 'strlenNone';
00129 $conversionFunction = null;
00130 $strlenFunction = null;
00131 $encodingConvertInitFunction = null;
00132
00133
00134
00135
00136 $mbStringCharsets =& $GLOBALS["eZMBCharsetList"];
00137 if ( $useMBString and
00138 !is_array( $mbStringCharsets ) )
00139 {
00140 $charsetList = array( "ucs-4", "ucs-4be", "ucs-4le", "ucs-2", "ucs-2be", "ucs-2le", "utf-32", "utf-32be", "utf-32le", "utf-16",
00141 "utf-16be", "utf-16le", "utf-8", "utf-7", "ascii", "euc-jp", "sjis", "eucjp-win", "sjis-win", "iso-2022-jp", "jis",
00142 "iso-8859-1", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8",
00143 "iso-8859-9", "iso-8859-10", "iso-8859-13", "iso-8859-14", "iso-8859-15", "byte2be", "byte2le", "byte4be",
00144 "byte4le", "base64", "7bit", "8bit", "utf7-imap" );
00145 $mbStringCharsets = array();
00146 foreach ( $charsetList as $charset )
00147 {
00148 $mbStringCharsets[$charset] = $charset;
00149 }
00150 }
00151
00152
00153 $isSinglebyteSame = false;
00154 $isSame = false;
00155
00156
00157 if ( $this->InputCharsetCode == $this->OutputCharsetCode )
00158 {
00159 $conversionFunction = $noneConversionFunction;
00160 $encodingConvertInitFunction = 'initializeInputCodepage';
00161 $inpenc = $this->InputCharacterEncodingScheme;
00162 if ( $inpenc == 'singlebyte' )
00163 {
00164 $isSinglebyteSame = true;
00165 }
00166 $isSame = true;
00167 }
00168 else if ( $useMBString and
00169 isset( $mbStringCharsets[$this->InputCharsetCode] ) and
00170 isset( $mbStringCharsets[$this->OutputCharsetCode] ) )
00171 {
00172
00173
00174
00175
00176 $conversionFunction = "convertMBString";
00177 $strlenFunction = "strlenMBString";
00178 $encodingConvertInitFunction = false;
00179 }
00180 else
00181 {
00182 $inpenc = $this->InputCharacterEncodingScheme;
00183 $outenc = $this->OutputCharacterEncodingScheme;
00184 if ( isset( $encodingConvertMap[$inpenc][$outenc] ) )
00185 {
00186 $conversionFunction = $encodingConvertMap[$inpenc][$outenc];
00187 }
00188 }
00189
00190 if ( $strlenFunction === null )
00191 {
00192 $inpenc = $this->InputCharacterEncodingScheme;
00193 if ( $isSinglebyteSame )
00194 {
00195 $strlenFunction = 'strlenNone';
00196 }
00197 else if ( $useMBString and isset( $mbStringCharsets[$this->InputCharsetCode] ) )
00198 {
00199 $strlenFunction = 'strlenMBString';
00200 }
00201 else if ( isset( $encodingStrlenMap[$inpenc] ) )
00202 {
00203 $strlenFunction = $encodingStrlenMap[$inpenc];
00204 if ( $inpenc == 'utf-8')
00205 {
00206 include_once( "lib/ezi18n/classes/ezutf8codec.php" );
00207 }
00208 }
00209 }
00210
00211 if ( !$isSame and
00212 $conversionFunction and
00213 $strlenFunction )
00214 {
00215 $this->initializeConversionFunction( $encodingConvertInitMap, $encodingConvertInitFunction );
00216 }
00217 if ( !$conversionFunction or
00218 !$strlenFunction )
00219 {
00220 eZDebug::writeError( "Cannot create textcodec from characterset " . $this->RequestedInputCharsetCode .
00221 " to characterset " . $this->RequestedOutputCharsetCode,
00222 "eZTextCodec" );
00223 if ( !$conversionFunction )
00224 $conversionFunction = $noneConversionFunction;
00225 if ( !$strlenFunction )
00226 $strlenFunction = $noneStrlenFunction;
00227 }
00228
00229 $this->ConversionFunction = $conversionFunction;
00230 $this->StrlenFunction = $strlenFunction;
00231 $this->RequireConversion = $conversionFunction != $noneConversionFunction;
00232 }
00233
00234 function initializeConversionFunction( $encodingConvertInitMap, $encodingConvertInitFunction )
00235 {
00236 $inpenc = $this->InputCharacterEncodingScheme;
00237 $outenc = $this->OutputCharacterEncodingScheme;
00238 $initFunction = false;
00239 if ( $encodingConvertInitFunction !== null )
00240 {
00241 if ( $encodingConvertInitFunction )
00242 {
00243 $initFunction = $encodingConvertInitFunction;
00244 }
00245 }
00246 else if ( isset( $encodingConvertInitMap[$inpenc][$outenc] ) )
00247 {
00248 $initFunction = $encodingConvertInitMap[$inpenc][$outenc];
00249 }
00250 if ( $initFunction )
00251 {
00252 $this->$initFunction();
00253 }
00254 }
00255
00256 function initializeCodepageMapper()
00257 {
00258 include_once( 'lib/ezi18n/classes/ezcodepagemapper.php' );
00259 $this->CodepageMapper =& eZCodepageMapper::instance( $this->InputCharsetCode,
00260 $this->OutputCharsetCode );
00261 }
00262
00263 function initializeInputCodepage()
00264 {
00265 include_once( 'lib/ezi18n/classes/ezcodepage.php' );
00266 $this->Codepage =& eZCodepage::instance( $this->InputCharsetCode );
00267 }
00268
00269 function initializeOutputCodepage()
00270 {
00271 include_once( 'lib/ezi18n/classes/ezcodepage.php' );
00272 $this->Codepage =& eZCodepage::instance( $this->OutputCharsetCode );
00273 }
00274
00275
00276
00277
00278 function conversionRequired()
00279 {
00280 return $this->RequireConversion;
00281 }
00282
00283 function setUseMBString( $use )
00284 {
00285 $GLOBALS["eZTextCodecUseMBString"] = $use;
00286 }
00287
00288 function useMBString()
00289 {
00290 $use =& $GLOBALS["eZTextCodecUseMBString"];
00291 if ( !isset( $use ) )
00292 $use = true;
00293 return $use;
00294 }
00295
00296 function requestedInputCharsetCode()
00297 {
00298 return $this->RequestedInputCharsetCode;
00299 }
00300
00301 function requestedOutputCharsetCode()
00302 {
00303 return $this->RequestedOutputCharsetCode;
00304 }
00305
00306 function inputCharsetCode()
00307 {
00308 return $this->InputCharsetCode;
00309 }
00310
00311 function outputCharsetCode()
00312 {
00313 return $this->OutputCharsetCode;
00314 }
00315
00316 function convertString( $str )
00317 {
00318 eZDebug::accumulatorStart( 'textcodec_conversion', false, 'String conversion' );
00319 $conversionFunction = $this->ConversionFunction;
00320 $tmp = $this->$conversionFunction( $str );
00321 eZDebug::accumulatorStop( 'textcodec_conversion' );
00322 return $tmp;
00323 }
00324
00325 function strlen( $str )
00326 {
00327 $strlenFunction = $this->StrlenFunction;
00328 return $this->$strlenFunction( $str );
00329 }
00330
00331
00332
00333
00334 function convertNoneToUnicode( $str )
00335 {
00336 return array();
00337 }
00338
00339 function convertCodepageToUnicode( $str )
00340 {
00341 eZDebug::accumulatorStart( 'textcodec_codepage_unicode', false, 'String conversion w/ codepage to Unicode' );
00342 $tmp = $this->Codepage->convertStringToUnicode( $str );
00343 eZDebug::accumulatorStop( 'textcodec_codepage_unicode' );
00344 return $tmp;
00345 }
00346
00347 function convertUTF8ToUnicode( $str )
00348 {
00349 include_once ( 'lib/ezi18n/classes/ezutf8codec.php' );
00350 eZDebug::accumulatorStart( 'textcodec_utf8_unicode', false, 'String conversion w/ UTF-8 to Unicode' );
00351 $tmp = eZUTF8Codec::convertStringToUnicode( $str );
00352 eZDebug::accumulatorStop( 'textcodec_utf8_unicode' );
00353 return $tmp;
00354 }
00355
00356 function convertUnicodeToCodepage( $unicodeValues )
00357 {
00358 eZDebug::accumulatorStart( 'textcodec_unicode_codepage', false, 'String conversion w/ Unicode to codepage' );
00359 $tmp = $this->Codepage->convertUnicodeToString( $unicodeValues );
00360 eZDebug::accumulatorStop( 'textcodec_unicode_codepage' );
00361 return $tmp;
00362 }
00363
00364 function convertUnicodeToUTF8( $unicodeValues )
00365 {
00366 include_once ( 'lib/ezi18n/classes/ezutf8codec.php' );
00367 eZDebug::accumulatorStart( 'textcodec_unicode_utf8', false, 'String conversion w/ Unicode to UTF8' );
00368 $tmp = eZUTF8Codec::convertUnicodeToString( $unicodeValues );
00369 eZDebug::accumulatorStop( 'textcodec_unicode_utf8' );
00370 return $tmp;
00371 }
00372
00373 function convertNone( $str )
00374 {
00375 return $str;
00376 }
00377
00378 function convertCodepage( $str )
00379 {
00380 eZDebug::accumulatorStart( 'textcodec_codepage', false, 'String conversion w/ codepage' );
00381 $tmp = $this->Codepage->convertString( $str );
00382 eZDebug::accumulatorStop( 'textcodec_codepage', false, 'String conversion w/ codepage' );
00383 return $tmp;
00384 }
00385
00386 function convertCodepageRev( $str )
00387 {
00388 eZDebug::accumulatorStart( 'textcodec_codepage_rev', false, 'String conversion w/ codepage reverse' );
00389 $tmp = $this->Codepage->convertStringFromUTF8( $str );
00390 eZDebug::accumulatorStop( 'textcodec_codepage_rev', false, 'String conversion w/ codepage reverse' );
00391 return $tmp;
00392 }
00393
00394 function convertCodepageMapper( $str )
00395 {
00396 eZDebug::accumulatorStart( 'textcodec_codepage_mapper', false, 'String conversion w/ codepage mapper' );
00397 $tmp = $this->CodepageMapper->convertString( $str );
00398 eZDebug::accumulatorStop( 'textcodec_codepage_mapper', false, 'String conversion w/ codepage mapper' );
00399 return $tmp;
00400 }
00401
00402 function convertMBString( $str )
00403 {
00404 eZDebug::accumulatorStart( 'textcodec_mbstring', false, 'String conversion w/ mbstring' );
00405
00406
00407
00408 $tmp = mb_convert_encoding( $str, $this->OutputCharsetCode, $this->InputCharsetCode );
00409 eZDebug::accumulatorStop( 'textcodec_mbstring', false, 'String conversion w/ mbstring' );
00410 return $tmp;
00411 }
00412
00413 function strlenNone( $str )
00414 {
00415 return strlen( $str );
00416 }
00417
00418 function strlenUnicode( $unicodeValues )
00419 {
00420 return count( $unicodeValues );
00421 }
00422
00423 function strlenCodepage( $str )
00424 {
00425 return $this->Codepage->strlen( $str );
00426 }
00427
00428 function strlenUTF8( $str )
00429 {
00430 $utf8_codec =& eZUTF8Codec::instance();
00431 return $utf8_codec->strlen( $str );
00432 }
00433
00434 function strlenCodepageRev( $str )
00435 {
00436 return $this->Codepage->strlenFromUTF8( $str );
00437 }
00438
00439 function strlenCodepageMapper( $str )
00440 {
00441 return $this->CodepageMapper->strlen( $str );
00442 }
00443
00444 function strlenMBString( $str )
00445 {
00446
00447
00448
00449 return mb_strlen( $str, $this->InputCharsetCode );
00450 }
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460
00461 function &instance( $inputCharsetCode, $outputCharsetCode = false, $alwaysReturn = true )
00462 {
00463 if ( $inputCharsetCode === false or $outputCharsetCode === false )
00464 {
00465 if ( isset( $GLOBALS['eZTextCodecInternalCharsetReal'] ) )
00466 {
00467 $internalCharset = $GLOBALS['eZTextCodecInternalCharsetReal'];
00468 }
00469 else
00470 {
00471 $internalCharset = eZTextCodec::internalCharset();
00472 }
00473 }
00474
00475 if ( $inputCharsetCode === false )
00476 {
00477 $realInputCharsetCode = $inputCharsetCode = $internalCharset;
00478 }
00479 else
00480 {
00481 $realInputCharsetCode = eZCharsetInfo::realCharsetCode( $inputCharsetCode );
00482 }
00483
00484 if ( $outputCharsetCode === false )
00485 {
00486 $realOutputCharsetCode = $outputCharsetCode = $internalCharset;
00487 }
00488 else
00489 {
00490 $realOutputCharsetCode = eZCharsetInfo::realCharsetCode( $outputCharsetCode );
00491 }
00492
00493 $check =& $GLOBALS["eZTextCodecCharsetCheck"]["$realInputCharsetCode-$realOutputCharsetCode"];
00494 if ( !$alwaysReturn and isset( $check ) and !$check )
00495 {
00496 $check = null;
00497 return $check;
00498 }
00499 if ( isset( $check ) and is_object( $check ) )
00500 {
00501 return $check;
00502 }
00503
00504 if ( !$realInputCharsetCode )
00505 {
00506 include_once( "lib/ezi18n/classes/ezcharsetinfo.php" );
00507 $realInputCharsetCode = eZCharsetInfo::realCharsetCode( $inputCharsetCode );
00508 }
00509 if ( !$realOutputCharsetCode )
00510 {
00511 include_once( "lib/ezi18n/classes/ezcharsetinfo.php" );
00512 $realOutputCharsetCode = eZCharsetInfo::realCharsetCode( $outputCharsetCode );
00513 }
00514 $inputEncoding = eZCharsetInfo::characterEncodingScheme( $realInputCharsetCode, true );
00515 $outputEncoding = eZCharsetInfo::characterEncodingScheme( $realOutputCharsetCode, true );
00516 if ( !$alwaysReturn and
00517 $inputEncoding == 'singlebyte' and
00518 $inputEncoding == $outputEncoding and
00519 $realInputCharsetCode == $realOutputCharsetCode )
00520 {
00521 $check = null;
00522 return $check;
00523 }
00524 $codec =& $GLOBALS["eZTextCodec-$realInputCharsetCode-$realOutputCharsetCode"];
00525 if ( get_class( $codec ) != "eztextcodec" )
00526 {
00527 $codec = new eZTextCodec( $inputCharsetCode, $outputCharsetCode,
00528 $realInputCharsetCode, $realOutputCharsetCode,
00529 $inputEncoding, $outputEncoding );
00530 }
00531 $check =& $codec;
00532 return $codec;
00533 }
00534
00535
00536
00537
00538
00539
00540 function updateSettings( $settings )
00541 {
00542 unset( $GLOBALS['eZTextCodecInternalCharsetReal'] );
00543 unset( $GLOBALS['eZTextCodecHTTPCharsetReal'] );
00544 unset( $GLOBALS['eZTextCodecCharsetCheck'] );
00545 $GLOBALS['eZTextCodecInternalCharset'] = $settings['internal-charset'];
00546 $GLOBALS['eZTextCodecHTTPCharset'] = $settings['http-charset'];
00547 $GLOBALS['eZTextCodecMBStringExtension'] = $settings['mbstring-extension'];
00548 if ( function_exists( 'mb_internal_encoding' ) )
00549 {
00550 @mb_internal_encoding( $settings['internal-charset'] );
00551 }
00552 }
00553
00554
00555
00556
00557
00558
00559
00560 function internalCharset()
00561 {
00562 $realCharset =& $GLOBALS['eZTextCodecInternalCharsetReal'];
00563 if ( !isset( $realCharset ) )
00564 {
00565 if ( !isset( $GLOBALS['eZTextCodecInternalCharset'] ) )
00566 {
00567 $i18n =& eZINI::instance( 'i18n.ini', '', false );
00568 $charsetCode = $i18n->variable( 'CharacterSettings', 'Charset' );
00569 }
00570 else
00571 $charsetCode = $GLOBALS['eZTextCodecInternalCharset'];
00572 include_once( "lib/ezi18n/classes/ezcharsetinfo.php" );
00573 $realCharset = eZCharsetInfo::realCharsetCode( $charsetCode );
00574 }
00575 return $realCharset;
00576 }
00577
00578
00579
00580
00581
00582
00583 function httpCharset()
00584 {
00585 $realCharset =& $GLOBALS['eZTextCodecHTTPCharsetReal'];
00586 if ( !isset( $realCharset ) )
00587 {
00588 $charset = '';
00589 if ( isset( $GLOBALS['eZTextCodecHTTPCharset'] ) )
00590 $charset = $GLOBALS['eZTextCodecHTTPCharset'];
00591 if ( $charset == '' )
00592 {
00593 if ( isset( $GLOBALS['eZTextCodecInternalCharsetReal'] ) )
00594 $realCharset = $GLOBALS['eZTextCodecInternalCharsetReal'];
00595 else
00596 $realCharset = eZTextCodec::internalCharset();
00597 }
00598 else
00599 {
00600 include_once( "lib/ezi18n/classes/ezcharsetinfo.php" );
00601 $realCharset = eZCharsetInfo::realCharsetCode( $charset );
00602 }
00603 }
00604 return $realCharset;
00605 }
00606 }
00607
00608 ?>