|
eZ Publish
[trunk]
|
00001 <?php 00002 /** 00003 * File containing the eZCharTransform class. 00004 * 00005 * @copyright Copyright (C) 1999-2012 eZ Systems AS. All rights reserved. 00006 * @license http://www.gnu.org/licenses/gpl-2.0.txt GNU General Public License v2 00007 * @version //autogentag// 00008 * @package lib 00009 */ 00010 00011 /*! 00012 \class eZCharTransform ezchartransform.php 00013 \ingroup eZI18N 00014 \brief Performs rule based transformation of characters in a string 00015 00016 \sa eZCodeMapper 00017 */ 00018 00019 class eZCharTransform 00020 { 00021 /// The timestamp for when the format of the cache files were 00022 /// last changed. This must be updated when the format changes 00023 /// to invalidate existing cache files. 00024 /// 1101288452 00025 /// 30. Jan. 2007 - 1170165730 00026 /// 24. Apr. 2007 - 1177423380 00027 const CODE_DATE = 1177423380; 00028 00029 /*! 00030 Constructor 00031 */ 00032 function eZCharTransform() 00033 { 00034 } 00035 00036 /*! 00037 Transforms the text according to the rules defined in \a $rule using character set \a $charset. 00038 \param $text The text string to be converted, currently Unicode arrays are not supported 00039 \param $rule Which transformation rule to use, can either be a string identifier or an array with identifiers. 00040 \param $charset Which charset to use when transforming, if \c false it will use current charset (i18n.ini). 00041 \param $useCache If \c true then it will use cache files for the mapping, 00042 if not it will have to calculate them each time. 00043 */ 00044 function transform( $text, $rule, $charset = false, $useCache = true ) 00045 { 00046 if ( $text === '' ) 00047 { 00048 return $text; 00049 } 00050 00051 if ( $useCache ) 00052 { 00053 // CRC32 is used for speed, MD5 would be more unique but is slower 00054 $key = eZSys::ezcrc32( 'Rule: ' . ( is_array( $rule ) ? implode( ',', $rule ) : $rule ) . '-' . $charset ); 00055 $filepath = $this->cacheFilePath( 'rule-', 00056 '-' . $charsetName, 00057 $key ); 00058 00059 $charsetName = ( $charset === false ? eZTextCodec::internalCharset() : eZCharsetInfo::realCharsetCode( $charset ) ); 00060 00061 // Try to execute code in the cache file, if it succeeds 00062 // \a $text will/ transformated 00063 $retText = $this->executeCacheFile( $text, $filepath ); 00064 if ( $retText !== false ) 00065 { 00066 return $retText; 00067 } 00068 } 00069 00070 // Make sure we have a mapper 00071 $mapper = new eZCodeMapper(); 00072 00073 $mapper->loadTransformationFiles( $charsetName, false ); 00074 00075 // First generate a unicode based mapping table from the rules 00076 $unicodeTable = $mapper->generateMappingCode( $rule ); 00077 unset($unicodeTable[0]); 00078 // Then transform that to a table that works with the current charset 00079 // Any character not available in the current charset will be removed 00080 $charsetTable = $mapper->generateCharsetMappingTable( $unicodeTable, $charset ); 00081 $transformationData = array( 'table' => $charsetTable ); 00082 unset( $unicodeTable ); 00083 00084 if ( $useCache ) 00085 { 00086 $extraCode = ''; 00087 $this->storeCacheFile( $filepath, $transformationData, 00088 $extraCode, 00089 'Rule', $charsetName ); 00090 } 00091 00092 // Execute transformations 00093 return strtr( $text, $transformationData['table'] ); 00094 } 00095 00096 /*! 00097 Transforms the text according to the rules defined in \a $rule using character set \a $charset. 00098 \param $text The text string to be converted, currently Unicode arrays are not supported 00099 \param $group Which transformation group to use, of which the rules will be applied. 00100 \param $charset Which charset to use when transforming, if \c false it will use current charset (i18n.ini). 00101 \param $useCache If \c true then it will use cache files for the tables, 00102 if not it will have to calculate them each time. 00103 */ 00104 function transformByGroup( $text, $group, $charset = false, $useCache = true ) 00105 { 00106 if ( $text === '' ) 00107 { 00108 return $text; 00109 } 00110 $charsetName = ( $charset === false ? eZTextCodec::internalCharset() : eZCharsetInfo::realCharsetCode( $charset ) ); 00111 if ( $useCache ) 00112 { 00113 // CRC32 is used for speed, MD5 would be more unique but is slower 00114 $keyText = 'Group:' . $group; 00115 $key = eZSys::ezcrc32( $keyText . '-' . $charset ); 00116 $filepath = $this->cacheFilePath( 'g-' . $group . '-', 00117 '-' . $charsetName, 00118 $key); 00119 00120 // Try to execute code in the cache file, if it succeeds 00121 // \a $text will/ transformated 00122 $retText = $this->executeCacheFile( $text, $filepath ); 00123 if ( $retText !== false ) 00124 { 00125 return $retText; 00126 } 00127 } 00128 00129 $commands = $this->groupCommands( $group ); 00130 if ( $commands === false ) 00131 return false; 00132 00133 $mapper = new eZCodeMapper(); 00134 00135 $mapper->loadTransformationFiles( $charsetName, $group ); 00136 00137 $rules = array(); 00138 foreach ( $commands as $command ) 00139 { 00140 $rules = array_merge( $rules, 00141 $mapper->decodeCommand( $command['command'], $command['parameters'] ) ); 00142 } 00143 00144 // First generate a unicode based mapping table from the rules 00145 $unicodeTable = $mapper->generateMappingCode( $rules ); 00146 unset($unicodeTable[0]); 00147 // Then transform that to a table that works with the current charset 00148 // Any character not available in the current charset will be removed 00149 $charsetTable = $mapper->generateCharsetMappingTable( $unicodeTable, $charset ); 00150 $transformationData = array( 'table' => $charsetTable ); 00151 unset( $unicodeTable ); 00152 00153 if ( $useCache ) 00154 { 00155 $extraCode = ''; 00156 foreach ( $commands as $command ) 00157 { 00158 $code = $mapper->generateCommandCode( $command, $charsetName ); 00159 if ( $code !== false ) 00160 { 00161 $extraCode .= $code . "\n"; 00162 } 00163 } 00164 $this->storeCacheFile( $filepath, $transformationData, 00165 $extraCode, 00166 'Group:' . $group, $charsetName ); 00167 } 00168 00169 // Execute transformations 00170 $text = strtr( $text, $transformationData['table'] ); 00171 00172 // Execute custom code 00173 foreach ( $commands as $command ) 00174 { 00175 $mapper->executeCommandCode( $text, $command, $charsetName ); 00176 } 00177 00178 return $text; 00179 } 00180 00181 /*! 00182 \private 00183 \static 00184 \return the path of the cached transformation tables. 00185 */ 00186 function cachedTransformationPath() 00187 { 00188 $dir =& $GLOBALS['eZCodeMapperCachePath']; 00189 if ( isset( $dir ) ) 00190 return $dir; 00191 00192 $sys = eZSys::instance(); 00193 $dir = $sys->cacheDirectory() . '/trans'; 00194 return $dir; 00195 } 00196 00197 /*! 00198 \private 00199 Finds all commands defined for group \a $group. 00200 The groups and their commands are defined in \c transform.ini. 00201 00202 \return An array with commands, each entry contains of: 00203 - command - Name of the command 00204 - parameters - Array with parameters for command 00205 - text - Textual representation of the command + parameters 00206 */ 00207 function groupCommands( $group ) 00208 { 00209 $rules =& $this->GroupRules[$group]; 00210 if ( isset( $rules ) ) 00211 return $rules; 00212 00213 $ini = eZINI::instance( 'transform.ini' ); 00214 $groups = $ini->variable( 'Transformation', 'Groups' ); 00215 if ( !in_array( $group, $groups ) ) 00216 { 00217 eZDebug::writeError( "Transformation group $group is not part of the active group list Groups in transform.ini", __METHOD__ ); 00218 return false; 00219 } 00220 00221 if ( !$ini->hasGroup( $group ) ) 00222 { 00223 eZDebug::writeError( "Transformation group $group is missing in transform.ini", __METHOD__ ); 00224 return false; 00225 } 00226 00227 $rules = array(); 00228 $ruleTexts = $ini->variable( $group, 'Commands' ); 00229 foreach ( $ruleTexts as $ruleText ) 00230 { 00231 if ( preg_match( "#^([a-zA-Z][a-zA-Z0-9_-]+)(\((.+)\))?$#", $ruleText, $matches ) ) 00232 { 00233 $command = $matches[1]; 00234 $parameters = array(); 00235 if ( isset( $matches[2] ) ) 00236 { 00237 $parameters = explode( ',', $matches[3] ); 00238 } 00239 $rules[] = array( 'command' => $command, 00240 'parameters' => $parameters ); 00241 } 00242 } 00243 00244 return $rules; 00245 } 00246 00247 /*! 00248 Get cache file path. 00249 00250 \param $prefix 00251 \param $suffix 00252 \param $key 00253 00254 \return cache file path. 00255 */ 00256 function cacheFilePath( $prefix, $suffix, $key ) 00257 { 00258 $path = eZCharTransform::cachedTransformationPath(); 00259 if ( !file_exists( $path ) ) 00260 { 00261 eZDir::mkdir( $path, false, true ); 00262 } 00263 return $path . '/' . $prefix . sprintf( "%u", $key ) . $suffix . '.ctt.php'; // ctt=charset transform table 00264 } 00265 00266 /*! 00267 \private 00268 \param $text The text that should be transformed 00269 \param $filepath The filepath for the cache file 00270 \param $timestamp A timestamp value which is matched against the cache file, 00271 pass for instance the timestamp of the INI file. 00272 00273 \return The restored transformation data or \c false if there is no cached data. 00274 */ 00275 protected function executeCacheFile( $text, $filepath, $timestamp = false ) 00276 { 00277 if ( file_exists( $filepath ) ) 00278 { 00279 $time = filemtime( $filepath ); 00280 $ini = eZINI::instance( 'transform.ini' ); 00281 if ( $ini->CacheFile && file_exists( $ini->CacheFile ) && $time < filemtime( $ini->CacheFile ) ) 00282 { 00283 return false; 00284 } 00285 if ( $time >= max( self::CODE_DATE, $timestamp ) ) 00286 { 00287 // Execute the PHP file causing $text will be transformed 00288 include "$filepath"; 00289 return $text; 00290 } 00291 } 00292 return false; 00293 } 00294 00295 /*! 00296 \private 00297 Stores the mapping table \a $table in the cache file \a $filepath. 00298 */ 00299 function storeCacheFile( $filepath, $transformationData,$extraCode, $type, $charsetName ) 00300 { 00301 $file = basename( $filepath ); 00302 $dir = dirname( $filepath ); 00303 $php = new eZPHPCreator( $dir, $file ); 00304 00305 $php->addComment( "Cached transformation data" ); 00306 $php->addComment( "Type: $type" ); 00307 $php->addComment( "Charset: $charsetName" ); 00308 $php->addComment( "Cached transformation data" ); 00309 00310 $php->addCodePiece( '$data = ' . eZCharTransform::varExport( $transformationData ) . ";\n" ); 00311 $php->addCodePiece( "\$text = strtr( \$text, \$data['table'] );\n" ); 00312 00313 if ( $extraCode ) 00314 { 00315 $php->addCodePiece( $extraCode ); 00316 } 00317 00318 return $php->store( true ); 00319 } 00320 00321 /*! 00322 \private 00323 Creates a text representation of the value \a $value which can 00324 be placed in files and be read back by a PHP parser as it was. 00325 The type of the values determines the output, it can be one of the following. 00326 - boolean, becomes \c true or \c false 00327 - null, becomes \c null 00328 - string, adds \ (backslash) to backslashes, double quotes, dollar signs and newlines. 00329 Then wraps the whole string in " (double quotes). 00330 - numeric, displays the value as-is. 00331 - array, expands all value recursively using this function 00332 - object, creates a representation of an object creation if the object has \c serializeData implemented. 00333 */ 00334 static function varExport( $value ) 00335 { 00336 return var_export( $value, true ); 00337 } 00338 00339 /*! 00340 \static 00341 Returns the current word separator, if none is found it will read from site.ini URLTranslator/WordSeparator 00342 \sa setWordSeparator 00343 */ 00344 static function wordSeparator() 00345 { 00346 if ( isset( $GLOBALS['eZCharTransform_wordSeparator'] ) ) 00347 { 00348 return $GLOBALS['eZCharTransform_wordSeparator']; 00349 } 00350 else 00351 { 00352 $ini = eZINI::instance(); 00353 $separator = strtolower( $ini->variable( "URLTranslator", "WordSeparator" ) ); 00354 switch ( $separator ) 00355 { 00356 case 'dash': 00357 $separator = '-'; 00358 break; 00359 case 'underscore': 00360 $separator = '_'; 00361 break; 00362 case 'space': 00363 $separator = ' '; 00364 break; 00365 default: 00366 return '-'; 00367 } 00368 $GLOBALS['eZCharTransform_wordSeparator'] = $separator; 00369 return $separator; 00370 } 00371 } 00372 00373 /*! 00374 Sets the current word separator, set it to \c null to use default value. 00375 */ 00376 function setWordSeparator( $char ) 00377 { 00378 $GLOBALS['eZCharTransform_wordSeparator'] = $char; 00379 } 00380 00381 static function commandUrlCleanupCompat( $text, $charsetName ) 00382 { 00383 // Old style of url alias with lowercase only and underscores for separators 00384 $text = strtolower( $text ); 00385 $text = preg_replace( array( "#[^a-z0-9]+#", 00386 "#^_+|_+$#" ), 00387 array( "_", 00388 "" ), 00389 $text ); 00390 return $text; 00391 } 00392 00393 static function commandUrlCleanup( $text, $charsetName ) 00394 { 00395 $sep = eZCharTransform::wordSeparator(); 00396 $sepQ = preg_quote( $sep ); 00397 $text = preg_replace( array( "#[^a-zA-Z0-9_!.-]+#", 00398 "#^[.]+|[!.]+$#", # Remove dots at beginning/end 00399 "#\.\.+#", # Remove double dots 00400 "#[{$sepQ}]+#", # Turn multiple separators into one 00401 "#^[{$sepQ}]+|[{$sepQ}]+$#" ), # Strip separator from beginning/end 00402 array( $sep, 00403 $sep, 00404 $sep, 00405 $sep, 00406 "" ), 00407 $text ); 00408 return $text; 00409 } 00410 00411 static function commandUrlCleanupIRI( $text, $charsetName ) 00412 { 00413 // With IRI support we keep all characters except some reserved ones, 00414 // they are space, ampersand, semi-colon, forward slash, colon, equal sign, question mark, 00415 // square brackets, parenthesis, plus. 00416 // 00417 // Note: Space is turned into a dash to make it easier for people to 00418 // paste urls from the system and have the whole url recognized 00419 // instead of being broken off 00420 $sep = eZCharTransform::wordSeparator(); 00421 $sepQ = preg_quote( $sep ); 00422 $prepost = " ." . $sepQ; 00423 if ( $sep != "-" ) 00424 $prepost .= "-"; 00425 $text = preg_replace( array( "#[ \\\\%\#&;/:=?\[\]()+]+#", 00426 "#^[.]+|[!.]+$#", # Remove dots at beginning/end 00427 "#\.\.+#", # Remove double dots 00428 "#[{$sepQ}]+#", # Turn multiple separators into one 00429 "#^[{$prepost}]+|[{$prepost}]+$#" ), 00430 array( $sep, 00431 $sep, 00432 $sep, 00433 $sep, 00434 "" ), 00435 $text ); 00436 return $text; 00437 } 00438 00439 /** 00440 * Returns a shared instance of the eZCharTransform class. 00441 * 00442 * @return eZCharTransform 00443 */ 00444 static function instance() 00445 { 00446 $instance =& $GLOBALS['eZCharTransformInstance']; 00447 if ( !isset( $instance ) ) 00448 { 00449 $instance = new eZCharTransform(); 00450 } 00451 return $instance; 00452 } 00453 } 00454 00455 ?>