|
eZ Publish
[4.0]
|
00001 <?php 00002 // 00003 // Definition of eZCharTransform class 00004 // 00005 // Created on: <16-Jul-2004 15:54:21 amos> 00006 // 00007 // ## BEGIN COPYRIGHT, LICENSE AND WARRANTY NOTICE ## 00008 // SOFTWARE NAME: eZ Publish 00009 // SOFTWARE RELEASE: 4.0.x 00010 // COPYRIGHT NOTICE: Copyright (C) 1999-2008 eZ Systems AS 00011 // SOFTWARE LICENSE: GNU General Public License v2.0 00012 // NOTICE: > 00013 // This program is free software; you can redistribute it and/or 00014 // modify it under the terms of version 2.0 of the GNU General 00015 // Public License as published by the Free Software Foundation. 00016 // 00017 // This program is distributed in the hope that it will be useful, 00018 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00019 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00020 // GNU General Public License for more details. 00021 // 00022 // You should have received a copy of version 2.0 of the GNU General 00023 // Public License along with this program; if not, write to the Free 00024 // Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 00025 // MA 02110-1301, USA. 00026 // 00027 // 00028 // ## END COPYRIGHT, LICENSE AND WARRANTY NOTICE ## 00029 // 00030 00031 /*! \file ezchartransform.php 00032 */ 00033 00034 /*! 00035 \class eZCharTransform ezchartransform.php 00036 \ingroup eZI18N 00037 \brief Performs rule based transformation of characters in a string 00038 00039 \sa eZCodeMapper 00040 */ 00041 00042 //include_once( 'lib/ezi18n/classes/eztextcodec.php' ); 00043 //include_once( 'lib/ezi18n/classes/ezcharsetinfo.php' ); 00044 00045 class eZCharTransform 00046 { 00047 /// The timestamp for when the format of the cache files were 00048 /// last changed. This must be updated when the format changes 00049 /// to invalidate existing cache files. 00050 /// 1101288452 00051 /// 30. Jan. 2007 - 1170165730 00052 /// 24. Apr. 2007 - 1177423380 00053 const CODE_DATE = 1177423380; 00054 00055 /*! 00056 Constructor 00057 */ 00058 function eZCharTransform() 00059 { 00060 } 00061 00062 /*! 00063 Transforms the text according to the rules defined in \a $rule using character set \a $charset. 00064 \param $text The text string to be converted, currently Unicode arrays are not supported 00065 \param $rule Which transformation rule to use, can either be a string identifier or an array with identifiers. 00066 \param $charset Which charset to use when transforming, if \c false it will use current charset (i18n.ini). 00067 \param $useCache If \c true then it will use cache files for the mapping, 00068 if not it will have to calculate them each time. 00069 */ 00070 function transform( $text, $rule, $charset = false, $useCache = true ) 00071 { 00072 if ( $text === '' ) 00073 { 00074 return $text; 00075 } 00076 00077 if ( $useCache ) 00078 { 00079 // CRC32 is used for speed, MD5 would be more unique but is slower 00080 //include_once( 'lib/ezutils/classes/ezsys.php' ); 00081 $key = eZSys::ezcrc32( 'Rule: ' . ( is_array( $rule ) ? implode( ',', $rule ) : $rule ) . '-' . $charset ); 00082 $filepath = $this->cacheFilePath( 'rule-', 00083 '-' . $charsetName, 00084 $key ); 00085 00086 $charsetName = ( $charset === false ? eZTextCodec::internalCharset() : eZCharsetInfo::realCharsetCode( $charset ) ); 00087 00088 // Try to execute code in the cache file, if it succeeds 00089 // \a $text will/ transformated 00090 $retText = $this->executeCacheFile( $text, $filepath ); 00091 if ( $retText !== false ) 00092 { 00093 return $retText; 00094 } 00095 } 00096 00097 // Make sure we have a mapper 00098 $mapper = new eZCodeMapper(); 00099 00100 $mapper->loadTransformationFiles( $charsetName, false ); 00101 00102 // First generate a unicode based mapping table from the rules 00103 $unicodeTable = $mapper->generateMappingCode( $rule ); 00104 unset($unicodeTable[0]); 00105 // Then transform that to a table that works with the current charset 00106 // Any character not available in the current charset will be removed 00107 $charsetTable = $mapper->generateCharsetMappingTable( $unicodeTable, $charset ); 00108 $transformationData = array( 'table' => $charsetTable ); 00109 unset( $unicodeTable ); 00110 00111 if ( $useCache ) 00112 { 00113 $extraCode = ''; 00114 $this->storeCacheFile( $filepath, $transformationData, 00115 $extraCode, 00116 'Rule', $charsetName ); 00117 } 00118 00119 // Execute transformations 00120 return strtr( $text, $transformationData['table'] ); 00121 } 00122 00123 /*! 00124 Transforms the text according to the rules defined in \a $rule using character set \a $charset. 00125 \param $text The text string to be converted, currently Unicode arrays are not supported 00126 \param $rule Which transformation rule to use, can either be a string identifier or an array with identifiers. 00127 \param $charset Which charset to use when transforming, if \c false it will use current charset (i18n.ini). 00128 \param $useCache If \c true then it will use cache files for the tables, 00129 if not it will have to calculate them each time. 00130 */ 00131 function transformByGroup( $text, $group, $charset = false, $useCache = true ) 00132 { 00133 if ( $text === '' ) 00134 { 00135 return $text; 00136 } 00137 $charsetName = ( $charset === false ? eZTextCodec::internalCharset() : eZCharsetInfo::realCharsetCode( $charset ) ); 00138 if ( $useCache ) 00139 { 00140 // CRC32 is used for speed, MD5 would be more unique but is slower 00141 //include_once( 'lib/ezutils/classes/ezsys.php' ); 00142 00143 $keyText = 'Group:' . $group; 00144 $key = eZSys::ezcrc32( $keyText . '-' . $charset ); 00145 $filepath = $this->cacheFilePath( 'g-' . $group . '-', 00146 '-' . $charsetName, 00147 $key); 00148 00149 // Try to execute code in the cache file, if it succeeds 00150 // \a $text will/ transformated 00151 $retText = $this->executeCacheFile( $text, $filepath ); 00152 if ( $retText !== false ) 00153 { 00154 return $retText; 00155 } 00156 } 00157 00158 $commands = $this->groupCommands( $group ); 00159 if ( $commands === false ) 00160 return false; 00161 00162 $mapper = new eZCodeMapper(); 00163 00164 $mapper->loadTransformationFiles( $charsetName, $group ); 00165 00166 $rules = array(); 00167 foreach ( $commands as $command ) 00168 { 00169 $rules = array_merge( $rules, 00170 $mapper->decodeCommand( $command['command'], $command['parameters'] ) ); 00171 } 00172 00173 // First generate a unicode based mapping table from the rules 00174 $unicodeTable = $mapper->generateMappingCode( $rules ); 00175 unset($unicodeTable[0]); 00176 // Then transform that to a table that works with the current charset 00177 // Any character not available in the current charset will be removed 00178 $charsetTable = $mapper->generateCharsetMappingTable( $unicodeTable, $charset ); 00179 $transformationData = array( 'table' => $charsetTable ); 00180 unset( $unicodeTable ); 00181 00182 if ( $useCache ) 00183 { 00184 $extraCode = ''; 00185 foreach ( $commands as $command ) 00186 { 00187 $code = $mapper->generateCommandCode( $command, $charsetName ); 00188 if ( $code !== false ) 00189 { 00190 $extraCode .= $code . "\n"; 00191 } 00192 } 00193 $this->storeCacheFile( $filepath, $transformationData, 00194 $extraCode, 00195 'Group:' . $group, $charsetName ); 00196 } 00197 00198 // Execute transformations 00199 $text = strtr( $text, $transformationData['table'] ); 00200 00201 // Execute custom code 00202 foreach ( $commands as $command ) 00203 { 00204 $mapper->executeCommandCode( $text, $command, $charsetName ); 00205 } 00206 00207 return $text; 00208 } 00209 00210 /*! 00211 \private 00212 \static 00213 \return the path of the cached transformation tables. 00214 */ 00215 function cachedTransformationPath() 00216 { 00217 $dir =& $GLOBALS['eZCodeMapperCachePath']; 00218 if ( isset( $dir ) ) 00219 return $dir; 00220 00221 //include_once( 'lib/ezutils/classes/ezsys.php' ); 00222 $sys = eZSys::instance(); 00223 $dir = $sys->cacheDirectory() . '/trans'; 00224 return $dir; 00225 } 00226 00227 /*! 00228 \private 00229 Finds all commands defined for group \a $group. 00230 The groups and their commands are defined in \c transform.ini. 00231 00232 \return An array with commands, each entry contains of: 00233 - command - Name of the command 00234 - parameters - Array with parameters for command 00235 - text - Textual representation of the command + parameters 00236 */ 00237 function groupCommands( $group ) 00238 { 00239 $rules =& $this->GroupRules[$group]; 00240 if ( isset( $rules ) ) 00241 return $rules; 00242 00243 $ini = eZINI::instance( 'transform.ini' ); 00244 $groups = $ini->variable( 'Transformation', 'Groups' ); 00245 if ( !in_array( $group, $groups ) ) 00246 { 00247 eZDebug::writeError( "Transformation group $group is not part of the active group list Groups in transform.ini", 00248 'eZCharTransform::groupCommands' ); 00249 return false; 00250 } 00251 00252 if ( !$ini->hasGroup( $group ) ) 00253 { 00254 eZDebug::writeError( "Transformation group $group is missing in transform.ini", 00255 'eZCharTransform::groupCommands' ); 00256 return false; 00257 } 00258 00259 $rules = array(); 00260 $ruleTexts = $ini->variable( $group, 'Commands' ); 00261 foreach ( $ruleTexts as $ruleText ) 00262 { 00263 if ( preg_match( "#^([a-zA-Z][a-zA-Z0-9_-]+)(\((.+)\))?$#", $ruleText, $matches ) ) 00264 { 00265 $command = $matches[1]; 00266 $parameters = array(); 00267 if ( isset( $matches[2] ) ) 00268 { 00269 $parameters = explode( ',', $matches[3] ); 00270 } 00271 $rules[] = array( 'command' => $command, 00272 'parameters' => $parameters ); 00273 } 00274 } 00275 00276 return $rules; 00277 } 00278 00279 /*! 00280 Get cache file path. 00281 00282 \param $prefix 00283 \param $suffix 00284 \param $key 00285 00286 \return cache file path. 00287 */ 00288 function cacheFilePath( $prefix, $suffix, $key ) 00289 { 00290 $path = eZCharTransform::cachedTransformationPath(); 00291 if ( !file_exists( $path ) ) 00292 { 00293 //include_once( 'lib/ezfile/classes/ezdir.php' ); 00294 eZDir::mkdir( $path, false, true ); 00295 } 00296 return $path . '/' . $prefix . sprintf( "%u", $key ) . $suffix . '.ctt.php'; // ctt=charset transform table 00297 } 00298 00299 /*! 00300 \private 00301 \param $text The text that should be transformed 00302 \param $key The unique key for the cache, this should be a CRC32 or MD5 of 00303 the current rules or commands which are used. 00304 \param $timestamp A timestamp value which is matched against the cache file, 00305 pass for instance the timestamp of the INI file. 00306 \param[out] $filepath The filepath for the cache file will be generated here, 00307 this can be used for the storeCacheFile() method. 00308 \return The restored transformation data or \c false if there is no cached data. 00309 */ 00310 protected function executeCacheFile( $text, $filepath, $timestamp = false ) 00311 { 00312 if ( file_exists( $filepath ) ) 00313 { 00314 $time = filemtime( $filepath ); 00315 $ini = eZINI::instance( 'transform.ini' ); 00316 if ( $ini->CacheFile && file_exists( $ini->CacheFile ) && $time < filemtime( $ini->CacheFile ) ) 00317 { 00318 return false; 00319 } 00320 if ( $time >= max( self::CODE_DATE, $timestamp ) ) 00321 { 00322 // Execute the PHP file causing $text will be transformed 00323 include "$filepath"; 00324 return $text; 00325 } 00326 } 00327 return false; 00328 } 00329 00330 /*! 00331 \private 00332 Stores the mapping table \a $table in the cache file \a $filepath. 00333 */ 00334 function storeCacheFile( $filepath, $transformationData,$extraCode, $type, $charsetName ) 00335 { 00336 $file = basename( $filepath ); 00337 $dir = dirname( $filepath ); 00338 $php = new eZPHPCreator( $dir, $file ); 00339 00340 $php->addComment( "Cached transformation data" ); 00341 $php->addComment( "Type: $type" ); 00342 $php->addComment( "Charset: $charsetName" ); 00343 $php->addComment( "Cached transformation data" ); 00344 00345 $php->addCodePiece( '$data = ' . eZCharTransform::varExport( $transformationData ) . ";\n" ); 00346 $php->addCodePiece( "\$text = strtr( \$text, \$data['table'] );\n" ); 00347 00348 if ( $extraCode ) 00349 { 00350 $php->addCodePiece( $extraCode ); 00351 } 00352 00353 return $php->store( true ); 00354 } 00355 00356 /*! 00357 \private 00358 Creates a text representation of the value \a $value which can 00359 be placed in files and be read back by a PHP parser as it was. 00360 The type of the values determines the output, it can be one of the following. 00361 - boolean, becomes \c true or \c false 00362 - null, becomes \c null 00363 - string, adds \ (backslash) to backslashes, double quotes, dollar signs and newlines. 00364 Then wraps the whole string in " (double quotes). 00365 - numeric, displays the value as-is. 00366 - array, expands all value recursively using this function 00367 - object, creates a representation of an object creation if the object has \c serializeData implemented. 00368 00369 \param $column Determines the starting column in which the text will be placed. 00370 This is used for expanding arrays and objects which can span multiple lines. 00371 \param $iteration The current iteration, starts at 0 and increases with 1 for each recursive call 00372 00373 */ 00374 static function varExport( $value ) 00375 { 00376 return var_export( $value, true ); 00377 } 00378 00379 /*! 00380 \private 00381 \static 00382 Creates a text representation of the value \a $value which can 00383 be placed in files and be read back by a PHP parser as it was. 00384 Meant as a replacement for PHP versions with broken var_export. 00385 */ 00386 static function varExportInternal( $value, $column = 0, $iteration = 0 ) 00387 { 00388 00389 if ( is_bool( $value ) ) 00390 $text = ( $value ? 'true' : 'false' ); 00391 else if ( is_null( $value ) ) 00392 $text = 'null'; 00393 else if ( is_string( $value ) ) 00394 { 00395 $valueText = str_replace( array( "\\", 00396 "\"", 00397 "\$", 00398 "\n" ), 00399 array( "\\\\", 00400 "\\\"", 00401 "\\$", 00402 "\\n" ), 00403 $value ); 00404 $text = "\"$valueText\""; 00405 } 00406 else if ( is_numeric( $value ) ) 00407 $text = $value; 00408 else if ( is_object( $value ) ) 00409 { 00410 $text = ''; 00411 if ( method_exists( $value, 'serializedata' ) ) 00412 { 00413 $serializeData = $value->serializeData(); 00414 $className = $serializeData['class_name']; 00415 $text = "new $className("; 00416 00417 $column += strlen( $text ); 00418 $parameters = $serializeData['parameters']; 00419 $variables = $serializeData['variables']; 00420 00421 $i = 0; 00422 foreach ( $parameters as $parameter ) 00423 { 00424 if ( $i > 0 ) 00425 { 00426 $text .= ",\n" . str_repeat( ' ', $column ); 00427 } 00428 $variableName = $variables[$parameter]; 00429 $variableValue = $value->$variableName; 00430 $keyText = " "; 00431 $text .= $keyText . eZCharTransform::varExportInternal( $variableValue, $column + strlen( $keyText ), $iteration + 1 ); 00432 ++$i; 00433 } 00434 if ( $i > 0 ) 00435 $text .= ' '; 00436 00437 $text .= ')'; 00438 } 00439 } 00440 else if ( is_array( $value ) ) 00441 { 00442 $text = 'array('; 00443 $column += strlen( $text ); 00444 $valueKeys = array_keys( $value ); 00445 $isIndexed = true; 00446 for ( $i = 0; $i < count( $valueKeys ); ++$i ) 00447 { 00448 if ( $i !== $valueKeys[$i] ) 00449 { 00450 $isIndexed = false; 00451 break; 00452 } 00453 } 00454 $i = 0; 00455 foreach ( $valueKeys as $key ) 00456 { 00457 if ( $i > 0 ) 00458 { 00459 $text .= ",\n" . str_repeat( ' ', $column ); 00460 } 00461 $element =& $value[$key]; 00462 $keyText = ' '; 00463 if ( !$isIndexed ) 00464 { 00465 if ( is_int( $key ) ) 00466 $keyText = $key; 00467 else 00468 $keyText = "\"" . str_replace( array( "\\", 00469 "\"", 00470 "\n" ), 00471 array( "\\\\", 00472 "\\\"", 00473 "\\n" ), 00474 $key ) . "\""; 00475 $keyText = " $keyText => "; 00476 } 00477 $text .= $keyText . eZCharTransform::varExportInternal( $element, $column + strlen( $keyText ), $iteration + 1 ); 00478 ++$i; 00479 } 00480 if ( $i > 0 ) 00481 $text .= ' '; 00482 $text .= ')'; 00483 } 00484 else 00485 $text = 'null'; 00486 return $text; 00487 } 00488 00489 /*! 00490 \static 00491 Returns the current word separator, if none is found it will read from site.ini URLTranslator/WordSeparator 00492 \sa setWordSeparator 00493 */ 00494 static function wordSeparator() 00495 { 00496 if ( isset( $GLOBALS['eZCharTransform_wordSeparator'] ) ) 00497 { 00498 return $GLOBALS['eZCharTransform_wordSeparator']; 00499 } 00500 else 00501 { 00502 $ini = eZINI::instance(); 00503 $separator = strtolower( $ini->variable( "URLTranslator", "WordSeparator" ) ); 00504 switch ( $separator ) 00505 { 00506 case 'dash': 00507 $separator = '-'; 00508 break; 00509 case 'underscore': 00510 $separator = '_'; 00511 break; 00512 case 'space': 00513 $separator = ' '; 00514 break; 00515 default: 00516 return '-'; 00517 } 00518 $GLOBALS['eZCharTransform_wordSeparator'] = $separator; 00519 return $separator; 00520 } 00521 } 00522 00523 /*! 00524 Sets the current word separator, set it to \c null to use default value. 00525 */ 00526 function setWordSeparator( $char ) 00527 { 00528 $GLOBALS['eZCharTransform_wordSeparator'] = $char; 00529 } 00530 00531 static function commandUrlCleanupCompat( $text, $charsetName ) 00532 { 00533 // Old style of url alias with lowercase only and underscores for separators 00534 $text = strtolower( $text ); 00535 $text = preg_replace( array( "#[^a-z0-9]+#", 00536 "#^_+|_+$#" ), 00537 array( "_", 00538 "" ), 00539 $text ); 00540 return $text; 00541 } 00542 00543 static function commandUrlCleanup( $text, $charsetName ) 00544 { 00545 $sep = eZCharTransform::wordSeparator(); 00546 $sepQ = preg_quote( $sep ); 00547 $text = preg_replace( array( "#[^a-zA-Z0-9_!.-]+#", 00548 "#^[.]+|[!.]+$#", # Remove dots at beginning/end 00549 "#\.\.+#", # Remove double dots 00550 "#[{$sepQ}]+#", # Turn multiple separators into one 00551 "#^[{$sepQ}]+|[{$sepQ}]+$#" ), # Strip separator from beginning/end 00552 array( $sep, 00553 $sep, 00554 $sep, 00555 $sep, 00556 "" ), 00557 $text ); 00558 return $text; 00559 } 00560 00561 static function commandUrlCleanupIRI( $text, $charsetName ) 00562 { 00563 // With IRI support we keep all characters except some reserved ones, 00564 // they are space, ampersand, semi-colon, forward slash, colon, equal sign, question mark, 00565 // square brackets, parenthesis, plus. 00566 // 00567 // Note: Space is turned into a dash to make it easier for people to 00568 // paste urls from the system and have the whole url recognized 00569 // instead of being broken off 00570 $sep = eZCharTransform::wordSeparator(); 00571 $sepQ = preg_quote( $sep ); 00572 $prepost = " ." . $sepQ; 00573 if ( $sep != "-" ) 00574 $prepost .= "-"; 00575 $text = preg_replace( array( "#[ \\\\%\#&;/:=?\[\]()+]+#", 00576 "#^[.]+|[!.]+$#", # Remove dots at beginning/end 00577 "#\.\.+#", # Remove double dots 00578 "#[{$sepQ}]+#", # Turn multiple separators into one 00579 "#^[{$prepost}]+|[{$prepost}]+$#" ), 00580 array( $sep, 00581 $sep, 00582 $sep, 00583 $sep, 00584 "" ), 00585 $text ); 00586 return $text; 00587 } 00588 00589 /*! 00590 \return The unique instance of the character transformer. 00591 */ 00592 static function instance() 00593 { 00594 $instance =& $GLOBALS['eZCharTransformInstance']; 00595 if ( !isset( $instance ) ) 00596 { 00597 $instance = new eZCharTransform(); 00598 } 00599 return $instance; 00600 } 00601 } 00602 00603 ?>