00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041 define( "EZ_CODEMAPPER_TYPE_DIRECT", 1 );
00042 define( "EZ_CODEMAPPER_TYPE_RANGE", 2 );
00043 define( "EZ_CODEMAPPER_TYPE_REPLACE", 3 );
00044
00045 class eZCodeMapper
00046 {
00047
00048
00049
00050 function eZCodeMapper()
00051 {
00052 $this->TransformationTables = array();
00053 $this->TransformationFiles = array();
00054 }
00055
00056
00057
00058
00059 function mappingTable( $identifier )
00060 {
00061 if ( isset( $this->TransformationTables[$identifier] ) )
00062 return $this->TransformationTables[$identifier];
00063 return false;
00064 }
00065
00066
00067
00068
00069 function ruleNames()
00070 {
00071 return array_keys( $this->TransformationTables );
00072 }
00073
00074
00075
00076
00077 function error( $text, $position = false )
00078 {
00079 if ( $position )
00080 {
00081 $str = $position['file'] . ':' . $position['from'][0] . ' C' . $position['from'][1];
00082 if ( isset( $position['to'] ) )
00083 $str .= ' -> L' . $position['to'][0] . ' C' . $position['to'][1];
00084 $str .= ':';
00085 }
00086 $str .= $text;
00087 if ( class_exists( 'ezcli' ) )
00088 {
00089 include_once( 'lib/ezutils/classes/ezcli.php' );
00090 $cli =& eZCLI::instance();
00091 $cli->error( $str );
00092 }
00093 else
00094 {
00095 eZDebug::writeError( $str, 'eZCodeMapper::error' );
00096 }
00097 }
00098
00099
00100
00101
00102 function warning( $text, $position = false )
00103 {
00104 if ( $position )
00105 {
00106 $str = $position['file'] . ':' . $position['from'][0] . ' C' . $position['from'][1];
00107 if ( isset( $position['to'] ) )
00108 $str .= ' -> L' . $position['to'][0] . ' C' . $position['to'][1];
00109 $str .= ':';
00110 }
00111 $str .= $text;
00112 if ( class_exists( 'ezcli' ) )
00113 {
00114 include_once( 'lib/ezutils/classes/ezcli.php' );
00115 $cli =& eZCLI::instance();
00116 $cli->warning( $str );
00117 }
00118 else
00119 {
00120 eZDebug::writeWarning( $str, 'eZCodeMapper::warning' );
00121 }
00122 }
00123
00124
00125
00126
00127 function isTranformationLoaded( $name )
00128 {
00129 return in_array( $name, $this->TransformationFiles );
00130 }
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140 function loadTransformationFiles( $currentCharset, $transformationGroup )
00141 {
00142 $ini =& eZINI::instance( 'transform.ini' );
00143 $repositoryList = array( $ini->variable( 'Transformation', 'Repository' ) );
00144 $files = $ini->variable( 'Transformation', 'Files' );
00145 include_once( 'lib/ezutils/classes/ezextension.php' );
00146 $extensions = $ini->variable( 'Transformation', 'Extensions' );
00147 $repositoryList = array_merge( $repositoryList,
00148 eZExtension::expandedPathList( $extensions, 'transformations' ) );
00149
00150
00151
00152 $unicodeGroups = array();
00153 $charsets = $ini->variable( 'Transformation', 'Charsets' );
00154 foreach ( $charsets as $entry )
00155 {
00156 list ( $charset, $group ) = explode( ';', $entry, 2 );
00157 $charset = eZCharsetInfo::realCharsetCode( $charset );
00158 if ( $charset == $currentCharset )
00159 {
00160 if ( !in_array( $group, $unicodeGroups ) )
00161 $unicodeGroups[] = $group;
00162 }
00163 }
00164
00165
00166
00167
00168 if ( $transformationGroup !== false )
00169 $unicodeGroups[] = $transformationGroup;
00170
00171
00172 foreach ( $unicodeGroups as $unicodeGroup )
00173 {
00174 if ( $ini->hasGroup( $unicodeGroup ) )
00175 {
00176 $files = array_merge( $files, $ini->variable( $unicodeGroup, 'Files' ) );
00177 $extensions = $ini->variable( $unicodeGroup, 'Extensions' );
00178 $repositoryList = array_merge( $repositoryList,
00179 eZExtension::expandedPathList( $extensions, 'transformations' ) );
00180 }
00181 }
00182
00183 foreach ( $files as $file )
00184 {
00185
00186 if ( $this->isTranformationLoaded( $file ) )
00187 continue;
00188
00189 foreach ( $repositoryList as $repository )
00190 {
00191 $trFile = $repository . '/' . $file;
00192 if ( file_exists( $trFile ) )
00193 {
00194 $this->parseTransformationFile( $trFile, $file );
00195 }
00196 }
00197 }
00198 }
00199
00200
00201
00202
00203
00204
00205 function parseTransformationFile( $filename, $name )
00206 {
00207
00208 $tbl = array();
00209
00210 $fd = fopen( $filename, "rb" );
00211 if ( !$fd )
00212 {
00213 $this->error( "Failed opening $filename" );
00214 return false;
00215 }
00216
00217 $this->TransformationFiles[] = $name;
00218
00219 include_once( 'lib/ezi18n/classes/eztextcodec.php' );
00220 include_once( 'lib/ezi18n/classes/ezcharsetinfo.php' );
00221 $this->ISOUnicodeCodec =& eZTextCodec::instance( 'iso-8859-1', 'unicode' );
00222
00223 $buffer = '';
00224 $lineNum = 1;
00225 $i = 0;
00226 $hexValues = "0123456789abcdefABCDEF";
00227 $identifier = false;
00228
00229
00230
00231
00232
00233
00234
00235
00236 while ( !feof( $fd ) or strlen( $buffer ) > 0 )
00237 {
00238 $lines = array();
00239 $len = strlen( $buffer );
00240
00241
00242 if ( $len > 0 )
00243 {
00244 $endPos = false;
00245 $eolPos = 0;
00246
00247 while ( $eolPos !== false and $eolPos < $len )
00248 {
00249 $eolPos = strpos( $buffer, "\n", $endPos );
00250 if ( $eolPos !== false )
00251 {
00252 $line = substr( $buffer, $endPos, $eolPos - $endPos );
00253 $lines[] = array( 'text' => $line,
00254 'line' => $lineNum );
00255 ++$lineNum;
00256 $endPos = $eolPos + 1;
00257 }
00258 }
00259
00260
00261 if ( $endPos !== false )
00262 {
00263 $buffer = substr( $buffer, $endPos );
00264 }
00265 }
00266
00267
00268 foreach ( $lines as $lineData )
00269 {
00270 $line = $lineData['text'];
00271 $lineOrg = $line;
00272 $linePos = $lineData['line'];
00273 $commentPos = strpos( $line, '#' );
00274 $origLine = $line;
00275
00276 if ( $commentPos !== false )
00277 {
00278 $line = substr( $line, 0, $commentPos );
00279 }
00280 $trimLine = trim( $line );
00281
00282 if ( strlen( $trimLine ) == 0 )
00283 continue;
00284
00285
00286
00287 $unicodeData = false;
00288
00289 $sourceValue = false;
00290 $sourceEndValue = false;
00291 $destinationValues = false;
00292 $transposeValue = false;
00293 $transposeAdd = true;
00294 $moduloValue = 1;
00295
00296 $state = 'source';
00297
00298 $type = false;
00299
00300 $len = strlen( $line );
00301 if ( preg_match( '#^(.+):[ \t]*$#', $line, $matches ) )
00302 {
00303 $identifier = $matches[1];
00304 if ( !preg_match( '#^[a-zA-Z_-][a-zA-Z0-9_-]*$#', $identifier ) )
00305 {
00306 $this->warning( "Invalid identifier '$identifier', can only contain a-z, a-Z - and _",
00307 array( 'file' => $filename, 'from' => array( $linePos, strlen( $identifier ) ) ) );
00308 $identifier = false;
00309 continue;
00310 }
00311
00312 continue;
00313 }
00314 else if ( $identifier === false )
00315 {
00316 $this->warning( "No identifier defined yet, skipping: '" . $line . "'",
00317 array( 'file' => $filename, 'from' => array( $linePos, 0 ) ) );
00318 continue;
00319 }
00320 else
00321 {
00322 $pos = 0;
00323 $col = 0;
00324 $failed = false;
00325 while ( $pos < $len )
00326 {
00327 while ( $pos < $len and
00328 ( $line[$pos] == ' ' or
00329 $line[$pos] == "\t" ) )
00330 {
00331 ++$pos;
00332 }
00333 if ( $pos >= $len )
00334 break;
00335
00336 $char = $line[$pos];
00337 $unicodeData = false;
00338 if ( $char == '"' )
00339 {
00340 $delimiterPos = $pos;
00341 while ( $delimiterPos < $len )
00342 {
00343 $delimiterPos = strpos( $line, '"', $delimiterPos + 1 );
00344 if ( $delimiterPos === false or
00345 $delimiterPos <= $pos + 1 or
00346 $line[$delimiterPos - 1] != "\\" )
00347 break;
00348 }
00349 if ( $delimiterPos === false )
00350 {
00351 $this->warning( "No end-quote found for line, skipping: '$line'",
00352 array( 'file' => $filename,
00353 'from' => array( $linePos, $pos ),
00354 'to' => array( $linePos, strlen( $line ) ) ) );
00355 $pos = $len;
00356 $failed = true;
00357 break;
00358 }
00359 $str = str_replace( array( "\\\"", "\\\\" ),
00360 array( "\"", "\\" ),
00361 substr( $line, $pos + 1, $delimiterPos - $pos - 1 ) );
00362
00363 $pos = $delimiterPos + 1;
00364 $unicodeData = array( 'value' => $str,
00365 'type' => 'string' );
00366 }
00367 else if ( $char == 'U' and
00368 $pos + 1 < $len and
00369 $line[$pos + 1] == '+' )
00370 {
00371 $hexPos = $pos + 2;
00372 if ( $hexPos + 4 > $len )
00373 {
00374 $col = $hexPos;
00375 $this->warning( "Found U+ value with " . ( 4 - ( $len - $hexPos ) ) . " missing hex numbers",
00376 array( 'file' => $filename,
00377 'from' => array( $linePos, $hexPos ) ) );
00378 $failed = true;
00379 $pos = $hexPos;
00380 break;
00381 }
00382 $hasHexValues = true;
00383 for ( $offset = 0; $offset < 4; ++$offset )
00384 {
00385 $hexChar = $line[$hexPos + $offset];
00386 if ( $hexChar == ' ' or
00387 $hexChar == "\t" )
00388 {
00389 $col = $hexPos + $offset;
00390 $hasHexValues = false;
00391 $this->warning( "Found U+ value with " . ( 4 - $offset ) . " missing hex numbers",
00392 array( 'file' => $filename,
00393 'from' => array( $linePos, $hexPos ),
00394 'to' => array( $linePos, $hexPos + $offset ) ) );
00395 $failed = true;
00396 $pos = $hexPos + $offset;
00397 break;
00398 }
00399 if ( strpos( $hexValues, $hexChar ) === false )
00400 {
00401 $col = $hexPos + $offset;
00402 $hasHexValues = false;
00403 $this->warning( "Found U+ value with invalid hex numbers ($hexChar)",
00404 array( 'file' => $filename,
00405 'from' => array( $linePos, $hexPos ),
00406 'to' => array( $linePos, $hexPos + $offset ) ) );
00407 $pos = $hexPos + $offset;
00408 $failed = true;
00409 break;
00410 }
00411 }
00412 if ( $failed )
00413 break;
00414 if ( $hasHexValues )
00415 {
00416 $unicodeValue = hexdec( substr( $line, $hexPos, 4 ) );
00417 $unicodeData = array( 'value' => $unicodeValue,
00418 'type' => 'unicode' );
00419
00420 }
00421 $pos = $hexPos + 4;
00422 }
00423 else if ( strpos( $hexValues, $char ) !== false and
00424 $pos + 1 < $len and
00425 strpos( $hexValues, $line[$pos + 1] ) !== false )
00426 {
00427 $hexPos = $pos;
00428 if ( $hexPos + 2 > $len )
00429 {
00430 $col = $len;
00431 $this->warning( "Found ASCII value with " . ( 2 - ( $len - $hexPos ) ) . " missing hex numbers",
00432 array( 'file' => $filename,
00433 'from' => array( $linePos, $hexPos ) ) );
00434 $pos = $hexPos;
00435 $failed = true;
00436 break;
00437 }
00438 $hasHexValues = true;
00439 for ( $offset = 0; $offset < 2; ++$offset )
00440 {
00441 $hexChar = $line[$hexPos + $offset];
00442 if ( $hexChar == ' ' or
00443 $hexChar == "\t" )
00444 {
00445 $col = $hexPos + $offset;
00446 $hasHexValues = false;
00447 $this->warning( "Found ASCII value with " . ( 2 - $offset ) . " missing hex numbers",
00448 array( 'file' => $filename,
00449 'from' => array( $linePos, $hexPos ),
00450 'to' => array( $linePos, $hexPos + $offset ) ) );
00451 $pos = $hexPos + $offset;
00452 $failed = true;
00453 break;
00454 }
00455 if ( strpos( $hexValues, $hexChar ) === false )
00456 {
00457 $col = $hexPos + $offset;
00458 $hasHexValues = false;
00459 $this->warning( "Found ASCII value with invalid hex numbers ($hexChar)",
00460 array( 'file' => $filename,
00461 'from' => array( $linePos, $hexPos ),
00462 'to' => array( $linePos, $hexPos + $offset ) ) );
00463 $pos = $hexPos + $offset;
00464 $failed = true;
00465 break;
00466 }
00467 }
00468 if ( $failed )
00469 break;
00470 if ( $hasHexValues )
00471 {
00472 $asciiValue = hexdec( substr( $line, $hexPos, 4 ) );
00473
00474 $unicodeData = array( 'value' => $asciiValue,
00475 'type' => 'ascii' );
00476 }
00477 $pos = $hexPos + 2;
00478 }
00479 else if ( substr( $line, $pos, 6 ) == 'remove' )
00480 {
00481
00482 $unicodeData = array( 'value' => false,
00483 'type' => 'remove' );
00484 $pos += 6;
00485 }
00486 else if ( substr( $line, $pos, 4 ) == 'keep' )
00487 {
00488
00489 $unicodeData = array( 'value' => true,
00490 'type' => 'keep' );
00491 $pos += 4;
00492 }
00493
00494 if ( $unicodeData )
00495 {
00496
00497
00498 if ( $state == 'source' )
00499 {
00500 if ( $unicodeData['type'] == 'string' and
00501 strlen( $unicodeData['value'] ) > 1 )
00502 {
00503 $this->warning( "Text string with more than one character cannot be used as input value '" . $unicodeData['value'] . "'",
00504 array( 'file' => $filename,
00505 'from' => array( $linePos, $pos ) ) );
00506 $failed = true;
00507 break;
00508 }
00509 $sourceValue = $this->extractUnicodeValue( $unicodeData );
00510 $state = 'marker';
00511 }
00512 else if ( $state == 'marker' )
00513 {
00514 $this->warning( "Source value not expected, a source value has already been extracted at $line" . "[$pos]",
00515 array( 'file' => $filename,
00516 'from' => array( $linePos, $pos ) ) );
00517 $failed = true;
00518 break;
00519 }
00520 else if ( $state == 'range_input' )
00521 {
00522 if ( $unicodeData['type'] == 'string' and
00523 strlen( $unicodeData['value'] ) > 1 )
00524 {
00525 $this->warning( "Text string with more than one character cannot be used as range end value '" . $unicodeData['value'] . "'",
00526 array( 'file' => $filename,
00527 'from' => array( $linePos, $pos ) ) );
00528 $failed = true;
00529 break;
00530 }
00531 $sourceEndValue = $this->extractUnicodeValue( $unicodeData );
00532 $state = 'range_marker_or_modulo';
00533 }
00534 else if ( $state == 'range_marker_or_modulo' or
00535 $state == 'range_marker' )
00536 {
00537 $this->warning( "Range value not expected, a range value has already been extracted at $line" . "[$pos]",
00538 array( 'file' => $filename,
00539 'from' => array( $linePos, $pos ) ) );
00540 $failed = true;
00541 break;
00542 }
00543 else if ( $state == 'map_input' )
00544 {
00545 if ( !is_array( $destinationValues ) )
00546 $destinationValues = array();
00547 $destinationValues = array_merge( $destinationValues,
00548 $this->extractUnicodeValues( $unicodeData ) );
00549 $type = 'map';
00550 }
00551 else if ( $state == 'replace_input' )
00552 {
00553 if ( !is_array( $destinationValues ) )
00554 $destinationValues = array();
00555 $destinationValues = array_merge( $destinationValues,
00556 $this->extractUnicodeValues( $unicodeData ) );
00557 $type = 'replace';
00558 }
00559 else if ( $state == 'transpose_input' )
00560 {
00561 if ( $unicodeData['type'] == 'string' and
00562 strlen( $unicodeData['value'] ) > 1 )
00563 {
00564 $this->warning( "Text string with more than one character cannot be used as transpose value '" . $unicodeData['value'] . "'",
00565 array( 'file' => $filename,
00566 'from' => array( $linePos, $pos ) ) );
00567 $failed = true;
00568 break;
00569 }
00570 $transposeValue = $this->extractUnicodeValue( $unicodeData );
00571 $type = 'transpose';
00572 }
00573 else if ( $state == 'transpose_modulo' )
00574 {
00575 if ( $unicodeData['type'] == 'string' and
00576 strlen( $unicodeData['value'] ) > 1 )
00577 {
00578 $this->warning( "Text string with more than one character cannot be used as transpose modulo value '" . $unicodeData['value'] . "'",
00579 array( 'file' => $filename,
00580 'from' => array( $linePos, $pos ) ) );
00581 $failed = true;
00582 break;
00583 }
00584 $moduloValue = $this->extractUnicodeValue( $unicodeData );
00585 if ( $moduloValue == 0 )
00586 {
00587 $this->error( "Modulo value of 0 is not allowed, 1 will be used instead",
00588 array( 'file' => $filename,
00589 'from' => array( $linePos, $pos ) ) );
00590
00591 }
00592
00593 $state = 'range_marker';
00594 }
00595 }
00596 else if ( !$failed )
00597 {
00598
00599
00600 if ( $state == 'source' )
00601 {
00602 if ( $char == '=' )
00603 {
00604 $this->warning( "Cannot use map marker $char without prior character value",
00605 array( 'file' => $filename,
00606 'from' => array( $linePos, $pos ) ) );
00607 $failed = true;
00608 break;
00609 }
00610 else if ( $char == '+' or
00611 $char == '-' )
00612 {
00613 $this->warning( "Cannot use range marker $char without prior character value",
00614 array( 'file' => $filename,
00615 'from' => array( $linePos, $pos ) ) );
00616 $failed = true;
00617 break;
00618 }
00619 else
00620 {
00621 $this->warning( "Unknown character '$char', expecting input value",
00622 array( 'file' => $filename,
00623 'from' => array( $linePos, $pos ) ) );
00624 $failed = true;
00625 break;
00626 }
00627 }
00628 else if ( $state == 'marker' )
00629 {
00630 if ( $char == '=' )
00631 {
00632 $state = 'map_input';
00633 ++$pos;
00634 }
00635 else if ( $char == '-' )
00636 {
00637 $state = 'range_input';
00638 ++$pos;
00639 }
00640 else if ( $char == '+' )
00641 {
00642 $this->warning( "Cannot use range marker $char without prior character value",
00643 array( 'file' => $filename,
00644 'from' => array( $linePos, $pos ) ) );
00645 $failed = true;
00646 break;
00647 }
00648 else
00649 {
00650 $this->warning( "Unknown character '$char', expecting marker",
00651 array( 'file' => $filename,
00652 'from' => array( $linePos, $pos ) ) );
00653 $failed = true;
00654 break;
00655 }
00656 }
00657 else if ( $state == 'range_marker_or_modulo' or
00658 $state == 'range_marker' )
00659 {
00660 if ( $state == 'range_marker_or_modulo' and
00661 $char == '%' )
00662 {
00663
00664
00665 $state = 'transpose_modulo';
00666 ++$pos;
00667 }
00668 else if ( $char == '=' )
00669 {
00670 $state = 'replace_input';
00671 ++$pos;
00672 }
00673 else if ( $char == '-' or
00674 $char == '+' )
00675 {
00676 $transposeAdd = ( $char == '+' ? true : false );
00677 $state = 'transpose_input';
00678 ++$pos;
00679 }
00680 else
00681 {
00682 $this->warning( "Unknown character '$char', expecting range end value",
00683 array( 'file' => $filename,
00684 'from' => array( $linePos, $pos ) ) );
00685 $failed = true;
00686 break;
00687 }
00688 }
00689 else if ( $state == 'map_input' )
00690 {
00691 if ( $char == '=' )
00692 {
00693 $this->warning( "Duplicate mapping marker $char",
00694 array( 'file' => $filename,
00695 'from' => array( $linePos, $pos ) ) );
00696 $failed = true;
00697 break;
00698 }
00699 else if ( $char == '-' or
00700 $char == '+' )
00701 {
00702 $this->warning( "Already mapping values, cannot use range/transpose marker $char",
00703 array( 'file' => $filename,
00704 'from' => array( $linePos, $pos ) ) );
00705 $failed = true;
00706 break;
00707 }
00708 else
00709 {
00710 $this->warning( "Unknown character '$char', expecting output values",
00711 array( 'file' => $filename,
00712 'from' => array( $linePos, $pos ) ) );
00713 $failed = true;
00714 break;
00715 }
00716 }
00717 else if ( $state == 'transpose_modulo' )
00718 {
00719 if ( $char == '%' )
00720 {
00721 $this->warning( "Modulo marker already used, cannot use $char",
00722 array( 'file' => $filename,
00723 'from' => array( $linePos, $pos ) ) );
00724 $failed = true;
00725 break;
00726 }
00727 else if ( $char == '-' or
00728 $char == '+' )
00729 {
00730 $this->warning( "Transpose marker $char used, but no modulo value has been found yet",
00731 array( 'file' => $filename,
00732 'from' => array( $linePos, $pos ) ) );
00733 $failed = true;
00734 break;
00735 }
00736 else
00737 {
00738 $this->warning( "Unknown character '$char', expecting modulo value",
00739 array( 'file' => $filename,
00740 'from' => array( $linePos, $pos ) ) );
00741 $failed = true;
00742 break;
00743 }
00744 }
00745 else if ( $state == 'transpose_input' )
00746 {
00747 if ( $char == '=' )
00748 {
00749 $this->warning( "Already transposing, cannot use mapping marker $char",
00750 array( 'file' => $filename,
00751 'from' => array( $linePos, $pos ) ) );
00752 $failed = true;
00753 break;
00754 }
00755 else if ( $char == '-' or
00756 $char == '+' )
00757 {
00758 $this->warning( "Duplicate transpose marker $char",
00759 array( 'file' => $filename,
00760 'from' => array( $linePos, $pos ) ) );
00761 $failed = true;
00762 break;
00763 }
00764 else
00765 {
00766 $this->warning( "Unknown character '$char', expecting transpose value",
00767 array( 'file' => $filename,
00768 'from' => array( $linePos, $pos ) ) );
00769 $failed = true;
00770 break;
00771 }
00772 }
00773 else if ( $state == 'replace_input' )
00774 {
00775 if ( $char == '=' )
00776 {
00777 $this->warning( "Already replacing, cannot use mapping marker $char",
00778 array( 'file' => $filename,
00779 'from' => array( $linePos, $pos ) ) );
00780 $failed = true;
00781 break;
00782 }
00783 else if ( $char == '-' or
00784 $char == '+' )
00785 {
00786 $this->warning( "Already replacing, cannot use transpose marker $char",
00787 array( 'file' => $filename,
00788 'from' => array( $linePos, $pos ) ) );
00789 $failed = true;
00790 break;
00791 }
00792 else
00793 {
00794 $this->warning( "Unknown character '$char', expecting replace value",
00795 array( 'file' => $filename,
00796 'from' => array( $linePos, $pos ) ) );
00797 $failed = true;
00798 break;
00799 }
00800 }
00801 }
00802 }
00803 if ( !$failed )
00804 {
00805 if ( $identifier )
00806 {
00807
00808
00809
00810
00811 if ( !isset( $tbl[$identifier] ) )
00812 $tbl[$identifier] = array();
00813
00814 if ( $type == 'map' )
00815 {
00816
00817 $this->appendDirectMapping( $tbl[$identifier], $identifier, $sourceValue, $destinationValues );
00818 }
00819 else if ( $type == 'replace' )
00820 {
00821
00822 $this->appendReplaceMapping( $tbl[$identifier], $identifier, $sourceValue, $sourceEndValue, $destinationValues );
00823 }
00824 else if ( $type == 'transpose' )
00825 {
00826
00827 $this->appendTransposeMapping( $tbl[$identifier], $identifier, $sourceValue, $sourceEndValue, $transposeValue, $transposeAdd, $moduloValue );
00828 }
00829 }
00830
00831
00832
00833
00834 }
00835 else
00836 {
00837
00838
00839
00840 }
00841 }
00842 }
00843
00844
00845
00846 if ( !feof( $fd ) )
00847 {
00848 $buffer .= fread( $fd, 4096 );
00849
00850
00851 $buffer = preg_replace( "#(\r\n|\r|\n)#", "\n", $buffer );
00852 }
00853 ++$i;
00854 }
00855
00856 fclose( $fd );
00857
00858 $this->TransformationTables = array_merge( $this->TransformationTables, $tbl );
00859 }
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869 function appendDirectMapping( &$block, $identifier, $sourceValue, $destinationValues )
00870 {
00871 $count = count( $block );
00872 if ( count( $destinationValues ) == 1 )
00873 $destinationValues = array_pop( $destinationValues );
00874 if ( isset( $block[$count - 1] ) and
00875 $block[$count - 1][0] == EZ_CODEMAPPER_TYPE_DIRECT and
00876 $block[$count - 1][2] == $identifier )
00877 {
00878 $block[$count - 1][1][$sourceValue] = $destinationValues;
00879 }
00880 else
00881 {
00882 $block[] = array( EZ_CODEMAPPER_TYPE_DIRECT,
00883 array( $sourceValue => $destinationValues ),
00884 $identifier );
00885
00886 }
00887 }
00888
00889
00890
00891
00892
00893
00894
00895
00896
00897
00898 function appendReplaceMapping( &$block, $identifier, $sourceValue, $sourceEndValue, $destinationValues )
00899 {
00900 $count = count( $block );
00901 if ( count( $destinationValues ) == 1 )
00902 $destinationValues = array_pop( $destinationValues );
00903 if ( isset( $block[$count - 1] ) and
00904 $block[$count - 1][0] == EZ_CODEMAPPER_TYPE_REPLACE and
00905 $block[$count - 1][2] == $identifier )
00906 {
00907 $block[$count - 1][1][] = array( $sourceValue, $sourceEndValue, $destinationValues );
00908 }
00909 else
00910 {
00911 $block[] = array( EZ_CODEMAPPER_TYPE_REPLACE,
00912 array( array( $sourceValue, $sourceEndValue, $destinationValues ) ),
00913 $identifier );
00914
00915 }
00916 }
00917
00918
00919
00920
00921
00922
00923
00924
00925
00926
00927
00928 function appendTransposeMapping( &$block, $identifier, $sourceValue, $sourceEndValue, $transposeValue, $addValue, $moduloValue )
00929 {
00930 $count = count( $block );
00931 if ( isset( $block[$count - 1] ) and
00932 $block[$count - 1][0] == EZ_CODEMAPPER_TYPE_RANGE and
00933 $block[$count - 1][2] == $identifier )
00934 {
00935 $block[$count - 1][1][] = array( $sourceValue, $sourceEndValue, $addValue ? $transposeValue : -$transposeValue, $moduloValue );
00936 }
00937 else
00938 {
00939 $block[] = array( EZ_CODEMAPPER_TYPE_RANGE,
00940 array( array( $sourceValue, $sourceEndValue, $addValue ? $transposeValue : -$transposeValue, $moduloValue ) ),
00941 $identifier );
00942
00943 }
00944 }
00945
00946
00947
00948
00949
00950 function extractUnicodeValue( $data )
00951 {
00952 $type = $data['type'];
00953 if ( $type == 'string' )
00954 {
00955 $list = $this->ISOUnicodeCodec->convertString( $data['value'][0] );
00956 return $list[0];
00957 }
00958 else if ( $type == 'ascii' )
00959 {
00960 return $data['value'];
00961 }
00962 else if ( $type == 'unicode' )
00963 {
00964 return $data['value'];
00965 }
00966 else if ( $type == 'remove' )
00967 {
00968 return false;
00969 }
00970 else if ( $type == 'keep' )
00971 {
00972 return true;
00973 }
00974 return null;
00975 }
00976
00977
00978
00979
00980
00981 function extractUnicodeValues( $data )
00982 {
00983 $type = $data['type'];
00984 if ( $type == 'string' )
00985 {
00986 return $this->ISOUnicodeCodec->convertString( $data['value'] );
00987 }
00988 else if ( $type == 'ascii' )
00989 {
00990 return array( $data['value'] );
00991 }
00992 else if ( $type == 'unicode' )
00993 {
00994 return array( $data['value'] );
00995 }
00996 else if ( $type == 'remove' )
00997 {
00998 return array( false );
00999 }
01000 else if ( $type == 'keep' )
01001 {
01002 return array( true );
01003 }
01004 return array();
01005 }
01006
01007
01008
01009
01010
01011
01012
01013 function expandInheritance( $table )
01014 {
01015 $newTable = array();
01016 foreach ( $table as $tableItem )
01017 {
01018 if ( is_string( $tableItem ) )
01019 {
01020 $identifier = $tableItem;
01021 $subTable = $this->mappingTable( $identifier );
01022 if ( !$subTable )
01023 {
01024 eZDebug::writeError( "Failed to fetch mapping table for identifier: '$identifier'" );
01025 }
01026 else
01027 {
01028 $subTable = $this->expandInheritance( $subTable );
01029 $newTable = array_merge( $newTable, $subTable );
01030 }
01031 }
01032 else
01033 {
01034 $newTable[] = $tableItem;
01035 }
01036 }
01037 return $newTable;
01038 }
01039
01040
01041
01042
01043
01044
01045
01046
01047
01048 function ordinalValues( $table, $list )
01049 {
01050 $ordinals = array();
01051 if ( is_string( $list ) )
01052 {
01053 $len = strlen( $list );
01054 for ( $offset = 0; $offset < $len; ++$offset )
01055 {
01056 $ordinals[] = ord( $list[$offset] );
01057 }
01058 }
01059 else if ( is_numeric( $list ) )
01060 {
01061 $ordinals[] = $list;
01062 }
01063 else if ( is_array( $list ) )
01064 {
01065 foreach ( $list as $item )
01066 {
01067 $ordinals = array_merge( $ordinals, eZCodeMapper::ordinalValues( $table, $item ) );
01068 }
01069 }
01070 $ordinals = eZCodeMapper::mapOrdinals( $table, $ordinals );
01071 return $ordinals;
01072 }
01073
01074
01075
01076
01077
01078
01079 function mapOrdinals( $table, $ordinals )
01080 {
01081 $mappedOrdinals = array();
01082 foreach ( $ordinals as $ordinal )
01083 {
01084 while ( !is_array( $ordinal ) and isset( $table[$ordinal] ) )
01085 {
01086 $ordinal = $table[$ordinal];
01087 if ( is_array( $ordinal ) )
01088 {
01089 $ordinal = eZCodeMapper::mapOrdinals( $table, $ordinal );
01090 }
01091 }
01092 if ( is_array( $ordinal ) )
01093 $mappedOrdinals = array_merge( $mappedOrdinals, $ordinal );
01094 else
01095 $mappedOrdinals[] = $ordinal;
01096 }
01097 return $mappedOrdinals;
01098 }
01099
01100
01101
01102
01103
01104 function mapExistingCodes( &$unicodeMap, $fromCode, $toCode )
01105 {
01106 foreach ( $unicodeMap as $from => $to )
01107 {
01108 if ( is_array( $to ) )
01109 {
01110 $newTo = array();
01111 foreach ( $to as $ordinal )
01112 {
01113 if ( $ordinal == $fromCode )
01114 {
01115 $newTo = array_merge( $newTo, $toCode );
01116 }
01117 else
01118 {
01119 $newTo[] = $ordinal;
01120 }
01121 }
01122 $unicodeMap[$from]= $newTo;
01123 }
01124 else if ( $to == $fromCode )
01125 {
01126 $unicodeMap[$from]= $toCode;
01127 }
01128 }
01129 }
01130
01131
01132
01133
01134
01135
01136
01137
01138
01139
01140 function generateSimpleMappingTable( $table, $allowedRanges )
01141 {
01142 if ( !is_array( $table ) )
01143 return false;
01144 $unicodeMap = array();
01145 foreach ( $table as $tableItem )
01146 {
01147 $type = $tableItem[0];
01148 $item = $tableItem[1];
01149 if ( isset( $tableItem[2] ) )
01150 {
01151 $identifier = $tableItem[2];
01152
01153 }
01154 if ( $type == EZ_CODEMAPPER_TYPE_DIRECT )
01155 {
01156 foreach ( $item as $fromCode => $toCode )
01157 {
01158
01159
01160
01161
01162
01163 $toCode = eZCodeMapper::ordinalValues( $unicodeMap, $toCode );
01164
01165
01166
01167
01168 if ( count( $allowedRanges ) == 0 )
01169 {
01170 if ( count( $toCode ) == 1 )
01171 $toCode = $toCode[0];
01172
01173 if ( isset( $unicodeMap[$fromCode] ) )
01174 continue;
01175
01176 $unicodeMap[$fromCode] = $toCode;
01177 eZCodeMapper::mapExistingCodes( $unicodeMap, $fromCode, $toCode );
01178 }
01179 else
01180 {
01181 $allowed = false;
01182 foreach ( $allowedRanges as $allowedRange )
01183 {
01184 if ( $fromCode >= $allowedRange[0] and
01185 $fromCode <= $allowedRange[1] )
01186 {
01187 $allowed = true;
01188 break;
01189 }
01190 }
01191 if ( !$allowed )
01192 continue;
01193
01194 $toCodeList = $toCode;
01195 $newToCodeList = array();
01196 foreach ( $toCodeList as $toCode )
01197 {
01198 if ( is_bool( $toCode ) )
01199 {
01200 $newToCodeList[] = $toCode;
01201 continue;
01202 }
01203 foreach ( $allowedRanges as $allowedRange )
01204 {
01205 if ( $toCode >= $allowedRange[0] and
01206 $toCode <= $allowedRange[1] )
01207 {
01208 break;
01209 }
01210 }
01211 if ( $allowed )
01212 {
01213 $newToCodeList[] = $toCode;
01214 }
01215 }
01216 $toCode = $newToCodeList;
01217 if ( count( $toCode ) > 0 )
01218 {
01219 if ( count( $toCode ) == 1 )
01220 $toCode = $toCode[0];
01221
01222
01223 if ( isset( $unicodeMap[$fromCode] ) )
01224 continue;
01225
01226 eZCodeMapper::mapExistingCodes( $unicodeMap, $fromCode, $toCode );
01227
01228 $unicodeMap[$fromCode] = $toCode;
01229 }
01230 }
01231 }
01232 }
01233 else if ( $type == EZ_CODEMAPPER_TYPE_RANGE )
01234 {
01235 foreach ( $item as $rangeItem )
01236 {
01237 $start = $rangeItem[0];
01238 $stop = $rangeItem[1];
01239 if ( $start > $stop )
01240 {
01241 $tmp = $stop;
01242 $stop = $start;
01243 $start = $tmp;
01244 }
01245 $add = $rangeItem[2];
01246 $modulo = $rangeItem[3];
01247
01248 if ( $modulo == 0 )
01249 $modulo = 1;
01250 for ( $i = $start; $i <= $stop; $i += $modulo )
01251 {
01252 if ( count( $allowedRanges ) == 0 )
01253 {
01254 $allowed = true;
01255 }
01256 else
01257 {
01258 $allowed = false;
01259 foreach ( $allowedRanges as $allowedRange )
01260 {
01261 if ( $i >= $allowedRange[0] and
01262 $i <= $allowedRange[1] )
01263 {
01264 $allowed = true;
01265 break;
01266 }
01267 }
01268 if ( !$allowed )
01269 continue;
01270 }
01271
01272 $replace = $i + $add;
01273 $replace = eZCodeMapper::ordinalValues( $unicodeMap, $replace );
01274 if ( count( $allowedRanges ) == 0 )
01275 {
01276 if ( count( $replace ) == 0 )
01277 $replace = false;
01278 else if ( count( $replace ) == 1 )
01279 $replace = $replace[0];
01280 eZCodeMapper::mapExistingCodes( $unicodeMap, $i, $replace );
01281
01282
01283 if ( isset( $unicodeMap[$i] ) )
01284 continue;
01285
01286 $unicodeMap[$i] = $replace;
01287 }
01288 else
01289 {
01290 $newReplace = array();
01291 foreach ( $allowedRanges as $allowedRange )
01292 {
01293 foreach ( $replace as $replaceOrdinal )
01294 {
01295 if ( $replaceOrdinal >= $allowedRange[0] and
01296 $replaceOrdinal <= $allowedRange[1] )
01297 {
01298 $newReplace[] = $replaceOrdinal;
01299 }
01300 }
01301 }
01302 if ( count( $newReplace ) == 0 )
01303 $replace = false;
01304 else if ( count( $newReplace ) == 1 )
01305 $replace = $newReplace[0];
01306 else
01307 $replace = $newReplace;
01308
01309
01310 if ( isset( $unicodeMap[$i] ) )
01311 continue;
01312
01313 eZCodeMapper::mapExistingCodes( $unicodeMap, $i, $replace );
01314 $unicodeMap[$i] = $replace;
01315 }
01316 }
01317 }
01318 }
01319 else if ( $type == EZ_CODEMAPPER_TYPE_REPLACE )
01320 {
01321 foreach ( $item as $rangeItem )
01322 {
01323 $start = $rangeItem[0];
01324 $stop = $rangeItem[1];
01325 if ( $start > $stop )
01326 {
01327 $tmp = $stop;
01328 $stop = $start;
01329 $start = $tmp;
01330 }
01331 $replace = $rangeItem[2];
01332 $replace = eZCodeMapper::ordinalValues( $unicodeMap, $replace );
01333 if ( count( $allowedRanges ) == 0 )
01334 {
01335 if ( count( $replace ) == 0 )
01336 $replace = false;
01337 else if ( count( $replace ) == 1 )
01338 $replace = $replace[0];
01339 for ( $i = $start; $i <= $stop; ++$i )
01340 {
01341
01342 if ( isset( $unicodeMap[$i] ) )
01343 continue;
01344
01345 eZCodeMapper::mapExistingCodes( $unicodeMap, $i, $replace );
01346 $unicodeMap[$i] = $replace;
01347 }
01348 }
01349 else
01350 {
01351 $newReplace = array();
01352 foreach ( $allowedRanges as $allowedRange )
01353 {
01354 foreach ( $replace as $replaceOrdinal )
01355 {
01356 if ( $replaceOrdinal >= $allowedRange[0] and
01357 $replaceOrdinal <= $allowedRange[1] )
01358 {
01359 $newReplace[] = $replaceOrdinal;
01360 }
01361 }
01362 }
01363 if ( count( $newReplace ) == 0 )
01364 $replace = false;
01365 else if ( count( $newReplace ) == 1 )
01366 $replace = $newReplace[0];
01367 else
01368 $replace = $newReplace;
01369 for ( $i = $start; $i <= $stop; ++$i )
01370 {
01371 $allowed = false;
01372 foreach ( $allowedRanges as $allowedRange )
01373 {
01374 if ( $i >= $allowedRange[0] and
01375 $i <= $allowedRange[1] )
01376 {
01377 $allowed = true;
01378 break;
01379 }
01380 }
01381 if ( $allowed )
01382 {
01383
01384 if ( isset( $unicodeMap[$i] ) )
01385 continue;
01386
01387 eZCodeMapper::mapExistingCodes( $unicodeMap, $i, $replace );
01388 $unicodeMap[$i] = $replace;
01389 }
01390 }
01391 }
01392 }
01393 }
01394 }
01395 return $unicodeMap;
01396 }
01397
01398
01399
01400
01401
01402
01403
01404
01405 function generateMappingCode( $identifier )
01406 {
01407 if ( !is_array( $identifier ) )
01408 $identifier = array( $identifier );
01409 $table = $this->expandInheritance( $identifier );
01410
01411
01412 $allowedRanges = array();
01413 $simpleTable = $this->generateSimpleMappingTable( $table, $allowedRanges );
01414 ksort( $simpleTable );
01415 return $simpleTable;
01416 }
01417
01418
01419
01420
01421
01422
01423
01424 function generateCharsetMappingTable( $unicodeTable, $charset )
01425 {
01426 include_once( 'lib/ezi18n/classes/eztextcodec.php' );
01427
01428 $codec =& eZTextCodec::instance( 'unicode', $charset );
01429 if ( !$codec )
01430 {
01431 eZDebug::writeError( "Failed to create textcodec for charset '$charset'" );
01432 return false;
01433 }
01434
01435 $charsetTable = array();
01436 foreach ( $unicodeTable as $match => $replacement )
01437 {
01438 $matchLocal = $codec->convertString( array( $match ) );
01439 if ( is_array( $replacement ) )
01440 {
01441 $replacementLocal = $codec->convertString( $replacement );
01442 }
01443 else
01444 {
01445 $replacementLocal = $codec->convertString( array( $replacement ) );
01446 }
01447 $charsetTable[$matchLocal] = $replacementLocal;
01448 }
01449
01450
01451
01452
01453 krsort( $charsetTable );
01454 return $charsetTable;
01455 }
01456
01457
01458
01459
01460
01461
01462
01463 function decodeCommand( $name, $parameters )
01464 {
01465 $names = $this->ruleNames();
01466 $rules = array();
01467 switch ( $name )
01468 {
01469
01470 case 'url_cleanup':
01471 case 'identifier_cleanup':
01472 {
01473 } break;
01474
01475 case 'normalize':
01476 case 'search_normalize':
01477 case 'decompose':
01478 case 'diacritical':
01479 case 'lowercase':
01480 case 'uppercase':
01481 case 'search_cleanup':
01482 {
01483 if ( count( $parameters ) == 0 )
01484 {
01485
01486 foreach ( $names as $rule )
01487 {
01488 if ( preg_match( '#_'. $name . '$#', $rule ) )
01489 $rules[] = $rule;
01490 }
01491 }
01492 else
01493 {
01494 foreach ( $parameters as $parameter )
01495 {
01496 $rule = $parameter . '_' . $name;
01497 if ( in_array( $rule, $names ) )
01498 $rules[] = $rule;
01499 }
01500 }
01501 } break;
01502
01503 case 'transform':
01504 case 'transliterate':
01505 {
01506 $dividers = array( 'transform' => '_to_',
01507 'transliterate' => '_transliterate_' );
01508 $divider = $dividers[$name];
01509 if ( count( $parameters ) == 0 )
01510 {
01511
01512 foreach ( $names as $rule )
01513 {
01514 if ( preg_match( '#^[a-zA-Z][a-zA-Z0-9-]+'. $divider . '[a-zA-Z][a-zA-Z0-9-]+$#', $rule ) )
01515 $rules[] = $rule;
01516 }
01517 }
01518 else if ( count( $parameters ) == 2 )
01519 {
01520 $rule = $parameters[0] . $divider . $parameters[1];
01521 if ( in_array( $rule, $names ) )
01522 $rules[] = $rule;
01523 }
01524 } break;
01525
01526 default:
01527 {
01528 eZDebug::writeError( "Unknown command '$name'",
01529 'eZCharTransform::decodeCommand' );
01530 } break;
01531 }
01532 return $rules;
01533 }
01534
01535
01536
01537
01538
01539
01540
01541 function generateCommandCode( $command, $charsetName )
01542 {
01543 if ( $command['command'] == 'url_cleanup' or
01544 $command['command'] == 'identifier_cleanup' )
01545 {
01546 $code = ( "\$text = strtolower( \$text );\n" .
01547 "\$text = preg_replace( array( \"#[^a-z0-9_ ]#\",\n" .
01548 " \"/ /\",\n" .
01549 " \"/__+/\",\n" .
01550 " \"/^_|_$/\" ),\n" .
01551 " array( \" \",\n" .
01552 " \"_\",\n" .
01553 " \"_\",\n" .
01554 " \"\" ),\n" .
01555 " \$text );\n" );
01556 return $code;
01557 }
01558 else if ( $command['command'] == 'search_cleanup' )
01559 {
01560 $code = '';
01561 $nonCJKCharsets = $this->nonCJKCharsets();
01562 if ( !in_array( $charsetName, $nonCJKCharsets ) )
01563 {
01564 $code .= ( '// add N-Gram(N=2) chinese / japanese / korean multibyte characters' . "\n" .
01565 'include_once( \'lib/ezi18n/classes/eztextcodec.php\' );' . "\n" .
01566 '$codec =& eZTextCodec::instance( false, \'unicode\' );' . "\n" .
01567 "\n" .
01568 '$unicodeValueArray = $codec->convertString( $text );' . "\n" .
01569 "\n" .
01570 '$normalizedTextArray = array();' . "\n" .
01571 '$bFlag = false;' . "\n" .
01572 'foreach ( array_keys( $unicodeValueArray ) as $valueKey )' . "\n" .
01573 '{' . "\n" .
01574 ' // Check for word characters that should be broken up for search' . "\n" .
01575 ' if ( ( $unicodeValueArray[$valueKey] >= 12289 and' . "\n" .
01576 ' $unicodeValueArray[$valueKey] <= 12542 ) or' . "\n" .
01577 ' ( $unicodeValueArray[$valueKey] >= 13312 and' . "\n" .
01578 ' $unicodeValueArray[$valueKey] <= 40863 ) or' . "\n" .
01579 ' ( $unicodeValueArray[$valueKey] >= 44032 and' . "\n" .
01580 ' $unicodeValueArray[$valueKey] <= 55203 ) )' . "\n" .
01581 ' {' . "\n" .
01582 ' if ( $bFlag )' . "\n" .
01583 ' {' . "\n" .
01584 ' $normalizedTextArray[] = $unicodeValueArray[$valueKey];' . "\n" .
01585 ' }' . "\n" .
01586 ' $normalizedTextArray[] = 32; // A space' . "\n" .
01587 ' $normalizedTextArray[] = $unicodeValueArray[$valueKey];' . "\n" .
01588 ' $bFlag = true;' . "\n" .
01589 ' }' . "\n" .
01590 ' else' . "\n" .
01591 ' {' . "\n" .
01592 ' if ( $bFlag )' . "\n" .
01593 ' {' . "\n" .
01594 ' $normalizedTextArray[] = 32; // A space' . "\n" .
01595 ' }' . "\n" .
01596 ' $normalizedTextArray[] = $unicodeValueArray[$valueKey];' . "\n" .
01597 ' $bFlag = false;' . "\n" .
01598 ' }' . "\n" .
01599 '}' . "\n" .
01600 'if ( $bFlag )' . "\n" .
01601 '{' . "\n" .
01602 ' $normalizedTextArray[count($normalizedTextArray)-1]=32;' . "\n" .
01603 '}' . "\n" .
01604 '$revCodec =& eZTextCodec::instance( \'unicode\', false ); // false means use internal charset' . "\n" .
01605 '$text = $revCodec->convertString( $normalizedTextArray );' . "\n" );
01606 }
01607 $code .= ( '$text = preg_replace( array( "#(\.){2,}#",' . "\n" .
01608 ' "#^\.#",' . "\n" .
01609 ' "#\s\.#",' . "\n" .
01610 ' "#\.\s#",' . "\n" .
01611 ' "#\.$#",' . "\n" .
01612 ' "#([^0-9])%#" ),' . "\n" .
01613 ' array( " ",' . "\n" .
01614 ' " ",' . "\n" .
01615 ' " ",' . "\n" .
01616 ' " ",' . "\n" .
01617 ' " ",' . "\n" .
01618 ' " " ),' . "\n" .
01619 ' $text );' . "\n" .
01620 '$ini =& eZINI::instance();' . "\n" .
01621 'if ( $ini->variable( \'SearchSettings\', \'EnableWildcard\' ) != \'true\' )' . "\n" .
01622 '{' . "\n" .
01623 ' $text = str_replace( "*", " ", $text );' . "\n" .
01624 '}' . "\n" .
01625 '$charset = eZTextCodec::internalCharset();' . "\n" .
01626 '$hasUTF8 = ( $charset == "utf-8" );' . "\n" .
01627 "\n" .
01628 'if ( $hasUTF8 )' . "\n" .
01629 '{' . "\n" .
01630 ' $text = preg_replace( "#(\s+)#u", " ", $text );' . "\n" .
01631 '}' . "\n" .
01632 'else' . "\n" .
01633 '{' . "\n" .
01634 ' $text = preg_replace( "#(\s+)#", " ", $text );' . "\n" .
01635 '}' );
01636
01637 return $code;
01638 }
01639 return false;
01640 }
01641
01642
01643
01644
01645
01646
01647
01648 function executeCommandCode( &$text, $command, $charsetName )
01649 {
01650 if ( $command['command'] == 'url_cleanup' or
01651 $command['command'] == 'identifier_cleanup' )
01652 {
01653 $text = strtolower( $text );
01654 $text = preg_replace( array( "#[^a-z0-9_ ]#",
01655 "/ /",
01656 "/__+/",
01657 "/^_|_$/" ),
01658 array( " ",
01659 "_",
01660 "_",
01661 "" ),
01662 $text );
01663 return true;
01664 }
01665 else if ( $command['command'] == 'search_cleanup' )
01666 {
01667 $nonCJKCharsets = $this->nonCJKCharsets();
01668 if ( !in_array( $charsetName, $nonCJKCharsets ) )
01669 {
01670
01671 include_once( 'lib/ezi18n/classes/eztextcodec.php' );
01672 $codec =& eZTextCodec::instance( false, 'unicode' );
01673
01674 $unicodeValueArray = $codec->convertString( $text );
01675
01676 $normalizedTextArray = array();
01677 $bFlag = false;
01678 foreach ( array_keys( $unicodeValueArray ) as $valueKey )
01679 {
01680
01681 if ( ( $unicodeValueArray[$valueKey] >= 12289 and
01682 $unicodeValueArray[$valueKey] <= 12542 ) or
01683 ( $unicodeValueArray[$valueKey] >= 13312 and
01684 $unicodeValueArray[$valueKey] <= 40863 ) or
01685 ( $unicodeValueArray[$valueKey] >= 44032 and
01686 $unicodeValueArray[$valueKey] <= 55203 ) )
01687 {
01688 if ( $bFlag )
01689 {
01690 $normalizedTextArray[] = $unicodeValueArray[$valueKey];
01691 }
01692 $normalizedTextArray[] = 32;
01693 $normalizedTextArray[] = $unicodeValueArray[$valueKey];
01694 $bFlag = true;
01695 }
01696 else
01697 {
01698 if ( $bFlag )
01699 {
01700 $normalizedTextArray[] = 32;
01701 }
01702 $normalizedTextArray[] = $unicodeValueArray[$valueKey];
01703 $bFlag = false;
01704 }
01705 }
01706
01707 if ( $bFlag )
01708 {
01709 $normalizedTextArray[ count( $normalizedTextArray ) - 1 ] = 32;
01710 }
01711
01712 $revCodec =& eZTextCodec::instance( 'unicode', false );
01713 $text = $revCodec->convertString( $normalizedTextArray );
01714 }
01715
01716
01717 $text = preg_replace( array( "#(\.){2,}#",
01718 "#^\.#",
01719 "#\s\.#",
01720 "#\.\s#",
01721 "#\.$#",
01722 "#([^0-9])%#" ),
01723 array( " ",
01724 " ",
01725 " ",
01726 " ",
01727 " ",
01728 "$1 " ),
01729 $text );
01730 $ini =& eZINI::instance();
01731 if ( $ini->variable( 'SearchSettings', 'EnableWildcard' ) != 'true' )
01732 {
01733 $text = str_replace( "*", " ", $text );
01734 }
01735 $charset = eZTextCodec::internalCharset();
01736 $hasUTF8 = ( $charset == "utf-8" );
01737
01738 if ( $hasUTF8 )
01739 {
01740 $text = preg_replace( "#(\s+)#u", " ", $text );
01741 }
01742 else
01743 {
01744 $text = preg_replace( "#(\s+)#", " ", $text );
01745 }
01746
01747 return true;
01748 }
01749 return false;
01750 }
01751
01752
01753
01754
01755 function nonCJKCharsets()
01756 {
01757 return array( 'adobe-standard-encoding',
01758 'cp437', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855', 'cp857',
01759 'cp860', 'cp861', 'cp862', 'cp863', 'cp864', 'cp865', 'cp866',
01760 'cp869', 'cp874',
01761 'dec-mcs', 'hp-roman8',
01762 'iso-8859-1', 'iso-8859-2', 'iso-8859-3', 'iso-8859-4', 'iso-8859-5',
01763 'iso-8859-6', 'iso-8859-7', 'iso-8859-8', 'iso-8859-9', 'iso-8859-10',
01764 'iso-8859-11', 'iso-8859-13', 'iso-8859-14', 'iso-8859-15',
01765 'koi8-r', 'koi8-u', 'macintosh', 'next', 'us-ascii',
01766 'windows-1250', 'windows-1251', 'windows-1252', 'windows-1253',
01767 'windows-1254', 'windows-1255', 'windows-1256', 'windows-1257',
01768 'windows-1258' );
01769 }
01770
01771
01772 var $TransformationTables;
01773 var $TransformationFiles;
01774 var $ISOUnicodeCodec;
01775 }
01776
01777 ?>