00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044 include_once( 'lib/ezdiff/classes/ezdiffengine.php' );
00045 include_once( 'lib/ezdiff/classes/ezdiffmatrix.php' );
00046
00047 class eZDiffTextEngine extends eZDiffEngine
00048 {
00049 function eZDiffTextEngine()
00050 {
00051 }
00052
00053
00054
00055
00056
00057 function createDifferenceObject( $fromData, $toData )
00058 {
00059 include_once( 'lib/ezdiff/classes/eztextdiff.php' );
00060
00061 $pattern = array( '/[ ][ ]+/',
00062 '/ \n( \n)+/',
00063 '/^ /m',
00064 '/(\n){3,}/' );
00065 $replace = array( ' ',
00066 "\n",
00067 '',
00068 "\n\n" );
00069
00070 $old = preg_replace( $pattern, $replace, $fromData );
00071 $new = preg_replace( $pattern, $replace, $toData );
00072
00073 $oldArray = split( "\r\n", $old );
00074 $newArray = split( "\r\n", $new );
00075
00076 $oldSums = array();
00077 foreach( $oldArray as $paragraph )
00078 {
00079 $oldSums[] = crc32( $paragraph );
00080 }
00081
00082 $newSums = array();
00083 foreach( $newArray as $paragraph )
00084 {
00085 $newSums[] = crc32( $paragraph );
00086 }
00087
00088 $changes = new eZTextDiff();
00089
00090 $pre = $this->preProcess( $oldSums, $newSums );
00091 $out = $this->createOutput( $pre, $oldArray, $newArray );
00092
00093 $changes->setChanges( $out );
00094
00095 return $changes;
00096 }
00097
00098
00099
00100
00101 function createOutput( $arr, $oldArray, $newArray )
00102 {
00103 $diff = array();
00104
00105 foreach( $arr as $item )
00106 {
00107 $old = null;
00108 $new = null;
00109 foreach ( $item as $par => $state )
00110 {
00111 switch ( $state )
00112 {
00113 case 'unchanged':
00114 {
00115 $index = $par;
00116 }break;
00117
00118 case 'changed':
00119 {
00120 $old = $oldArray[$par];
00121 $new = $newArray[$par];
00122 }break;
00123
00124 case 'added':
00125 {
00126 $new = $newArray[$par];
00127 }break;
00128
00129 case 'removed':
00130 {
00131 $old = $oldArray[$par];
00132 }break;
00133 }
00134 }
00135
00136 if ( $old === null )
00137 {
00138 if ( $new === null )
00139 {
00140
00141 $text = $newArray[$index];
00142 $this->addNewLine( $text );
00143 $diff[] = array( 'unchanged' => $text,
00144 'status' => 0 );
00145 }
00146 else
00147 {
00148
00149 $text = $new;
00150 $this->addNewLine( $text );
00151 $diff[] = array( 'added' => $text,
00152 'status' => 2 );
00153 }
00154 }
00155 elseif ( $new === null )
00156 {
00157
00158 $text = $old;
00159 $this->addNewLine( $text );
00160 $diff[] = array( 'removed' => $text,
00161 'status' => 1 );
00162 }
00163 else
00164 {
00165
00166 $diffText = $this->buildDiff( explode( ' ', $old ), explode( ' ', $new ) );
00167 $size = count( $diffText ) - 1;
00168
00169 foreach( $diffText as $number => $change )
00170 {
00171 $state = $change['status'];
00172 switch( $state )
00173 {
00174 case '0':
00175 {
00176 if ( $number == $size )
00177 $this->addNewLine( $change['unchanged'] );
00178 }break;
00179
00180 case '1':
00181 {
00182 if ( $number == $size )
00183 $this->addNewLine( $change['removed'] );
00184 }break;
00185
00186 case '2':
00187 {
00188 if ( $number == $size )
00189 $this->addNewLine( $change['added'] );
00190 }break;
00191 }
00192 $diff[] = $change;
00193 }
00194 }
00195 }
00196 $output = $this->postProcessDiff( $diff );
00197 return $output;
00198 }
00199
00200
00201
00202
00203
00204 function addNewLine( &$text )
00205 {
00206 $text .= "\n";
00207 }
00208
00209
00210
00211
00212 function preProcess( $oldArray, $newArray )
00213 {
00214 $substr = $this->substringMatrix( $oldArray, $newArray );
00215
00216 $nOldWords = count( $oldArray );
00217 $nNewWords = count( $newArray );
00218
00219
00220
00221
00222
00223
00224
00225
00226 $strings = $this->substrings( $substr, $oldArray, $newArray );
00227
00228
00229 $mergedStrings = array();
00230 foreach ( $strings as $sstring )
00231 {
00232 $mergedStrings = $mergedStrings + $sstring;
00233 }
00234 unset( $strings );
00235
00236
00237 $offset = 0;
00238 $delOffset = 0;
00239 $internalOffset = 0;
00240 $merged = array();
00241
00242 $oldOffset = -1;
00243
00244 foreach ( $mergedStrings as $key => $wordArray )
00245 {
00246 if ( $oldOffset >= $wordArray['oldOffset'] )
00247 {
00248 continue;
00249 }
00250
00251 $oldOffset = $wordArray['oldOffset'];
00252
00253
00254 $nk = $internalOffset;
00255 while ( $key > $offset )
00256 {
00257 $merged[$nk] = array( $offset => 'added' );
00258 $nk++;
00259 $offset++;
00260 }
00261
00262
00263 $k = $internalOffset;
00264 while ( $oldOffset > $delOffset )
00265 {
00266 if ( $k < $nk )
00267 {
00268
00269 if ( array_key_exists( $delOffset, $merged[$k] ) )
00270 {
00271
00272 $merged[$k][$delOffset] = 'changed';
00273 }
00274 else
00275 {
00276 $merged[$k][$delOffset] = 'removed';
00277 }
00278 }
00279 else
00280 {
00281 $merged[$k] = array( $delOffset => 'removed' );
00282 }
00283 $k++;
00284 $delOffset++;
00285 }
00286
00287 $internalOffset = ($k > $nk) ? $k:$nk;
00288
00289
00290 $merged[$internalOffset] = array( $key => 'unchanged' );
00291 $internalOffset++;
00292 $delOffset++;
00293 $offset++;
00294 }
00295
00296
00297 $nk = $internalOffset;
00298 while ( $nNewWords > $offset )
00299 {
00300 $merged[$nk] = array( $offset => 'added' );
00301 $nk++;
00302 $offset++;
00303 }
00304
00305
00306 $k = $internalOffset;
00307 while ( $nOldWords > $delOffset )
00308 {
00309 if ( $k < $nk )
00310 {
00311 if ( array_key_exists( $delOffset, $merged[$k] ) )
00312 {
00313 $merged[$k][$delOffset] = 'changed';
00314 }
00315 else
00316 {
00317 $merged[$k][$delOffset] = 'removed';
00318 }
00319 }
00320 else
00321 {
00322 $merged[$k] = array( $delOffset => 'removed' );
00323 }
00324 $k++;
00325 $delOffset++;
00326 }
00327
00328 return $merged;
00329 }
00330
00331
00332
00333
00334
00335
00336 function buildDiff( $oldArray, $newArray )
00337 {
00338 $substr = $this->substringMatrix( $oldArray, $newArray );
00339
00340 $nOldWords = count( $oldArray );
00341 $nNewWords = count( $newArray );
00342
00343
00344
00345
00346
00347
00348
00349
00350 $strings = $this->substrings( $substr, $oldArray, $newArray );
00351 $len = count( $strings );
00352
00353
00354 $mergedStrings = array();
00355 foreach ( $strings as $sstring )
00356 {
00357 $mergedStrings = $mergedStrings + $sstring;
00358 }
00359 unset( $strings );
00360
00361
00362 $differences = array();
00363 $offset = 0;
00364 $delOffset = 0;
00365
00366 $oldOffset = -1;
00367
00368 foreach ( $mergedStrings as $key => $wordArray )
00369 {
00370 if ( $oldOffset >= $wordArray['oldOffset'] )
00371 {
00372 continue;
00373 }
00374
00375 $oldOffset = $wordArray['oldOffset'];
00376
00377
00378 while ( $key > $offset )
00379 {
00380 $differences[] = array( 'added' => $newArray[$offset],
00381 'status' => 2 );
00382 $offset++;
00383 }
00384
00385
00386 while ( $oldOffset > $delOffset )
00387 {
00388 $differences[] = array( 'removed' => $oldArray[$delOffset],
00389 'status' => 1 );
00390 $delOffset++;
00391 }
00392
00393
00394 $differences[] = array( 'unchanged' => $newArray[$key],
00395 'status' => 0 );
00396 $delOffset++;
00397 $offset++;
00398 }
00399
00400
00401 while ( $nNewWords > $offset )
00402 {
00403 $differences[] = array( 'added' => $newArray[$offset],
00404 'status' => 2 );
00405 $offset++;
00406 }
00407
00408
00409 while ( $nOldWords > $delOffset )
00410 {
00411 $differences[] = array( 'removed' => $oldArray[$delOffset],
00412 'status' => 1 );
00413 $delOffset++;
00414 }
00415
00416 $output = $this->postProcessDiff( $differences );
00417 return $output;
00418
00419 }
00420
00421
00422
00423
00424
00425 function postProcessDiff( $diffArray )
00426 {
00427 $string = "";
00428 $diff = array();
00429 $item = current( $diffArray );
00430
00431 $lastStatus = $item['status'];
00432
00433 $mode = array( 0 => 'unchanged',
00434 1 => 'removed',
00435 2 => 'added' );
00436
00437 while ( $item = current( $diffArray ) )
00438 {
00439 if ( $item['status'] != $lastStatus )
00440 {
00441 $diff[] = array( $mode[$lastStatus] => $string,
00442 'status' => $lastStatus );
00443 $lastStatus = $item['status'];
00444 $string ="";
00445 continue;
00446 }
00447
00448 $string .= $item[$mode[$lastStatus]] . " ";
00449 next( $diffArray );
00450 }
00451 if ( strlen( $string ) > 0 )
00452 {
00453 $diff[] = array( $mode[$lastStatus] => $string,
00454 'status' => $lastStatus );
00455 }
00456 return $diff;
00457 }
00458
00459
00460
00461
00462
00463
00464
00465 function substrings( $sub, $old, $new )
00466 {
00467 $matrix = $sub['lengthMatrix'];
00468 $val = $sub['maxLength'];
00469 $row = $sub['maxRow'];
00470 $col = $sub['maxCol'];
00471
00472 if ( $val == 0 )
00473 {
00474
00475 return array();
00476 }
00477
00478 $rows = count( $old );
00479 $cols = count( $new );
00480
00481 $lcsOffsets = $this->findLongestSubstringOffsets( $sub );
00482 $lcsPlacement = $this->substringPlacement( $lcsOffsets['newStart'], $lcsOffsets['newEnd'], $cols );
00483
00484 $substringSet = array();
00485
00486 $substringArray = $this->traceSubstring( $row, $col, $matrix, $val, $new );
00487 $substring = $substringArray['substring'];
00488 unset( $substringArray );
00489 $substringSet[] = array_reverse( $substring, true );
00490 $substring = array();
00491
00492
00493 if ( $lcsPlacement['hasTextLeft'] )
00494 {
00495 $row = $lcsOffsets['oldStart'];
00496 $col = $lcsOffsets['newStart'];
00497
00498 if ( $row != 0 )
00499 {
00500 $row--;
00501 }
00502
00503 if ( $col != 0 )
00504 {
00505 $col--;
00506 }
00507
00508 $done = false;
00509 $info = true;
00510 $prevRowUsed = -1;
00511
00512 while ( !$done && $info )
00513 {
00514 if ( $prevRowUsed == $row )
00515 {
00516 $done = true;
00517 continue;
00518 }
00519
00520 $info = $this->localSubstring( 'left', $row, $col, $rows, $cols, $matrix );
00521 $prevRowUsed = $row;
00522 if ( $info )
00523 {
00524 $substringArray = $this->traceSubstring( $info['remainRow'], $info['remainCol'], $matrix, $info['remainMax'], $new );
00525 $substring = $substringArray['substring'];
00526 array_unshift( $substringSet, array_reverse( $substring, true ) );
00527 $substring = array();
00528
00529 if ( $info['remainCol'] == 0 || $substringArray['lastCol'] == 0 )
00530 {
00531 $done = true;
00532 }
00533 else
00534 {
00535 $row = $substringArray['lastRow'];
00536 $col = $substringArray['lastCol'];
00537
00538 if ( $row != 0 )
00539 {
00540 $row--;
00541 }
00542 if ( $col != 0 )
00543 {
00544 $col--;
00545 }
00546 }
00547 unset( $substringArray );
00548 }
00549 }
00550 }
00551
00552
00553 if ( $lcsPlacement['hasTextRight'] )
00554 {
00555
00556 $row = $sub['maxRow'];
00557 $col = $sub['maxCol'];
00558
00559 if ( $row != $rows-1 )
00560 {
00561 $row++;
00562 }
00563
00564 if ( $col != $cols-1 )
00565 {
00566 $col++;
00567 }
00568
00569 $done = false;
00570 $info = true;
00571 $prevRowUsed = -1;
00572
00573 while( !$done && $info )
00574 {
00575 if ( $prevRowUsed == $row )
00576 {
00577 $done = true;
00578 continue;
00579 }
00580 $info = $this->localSubstring( 'right', $row, $col, $rows, $cols, $matrix );
00581 $prevRowUsed = $row;
00582 if ( $info )
00583 {
00584 $substringArray = $this->traceSubstring( $info['remainRow'], $info['remainCol'], $matrix, $info['remainMax'], $new );
00585 $substring = $substringArray['substring'];
00586 $substringSet[] = array_reverse( $substring, true );
00587 $substring = array();
00588
00589 if ( $info['remainCol'] == $cols-1 || $substringArray['lastRow'] == $cols-1 )
00590 {
00591 $done = true;
00592 }
00593 else
00594 {
00595 $row = $info['remainRow'];
00596 $col = $info['remainCol'];
00597
00598 if ( $row != $rows-1 )
00599 {
00600 $row++;
00601 }
00602
00603 if ( $col != $cols-1 )
00604 {
00605 $col++;
00606 }
00607 }
00608 unset( $substringArray );
00609 }
00610 }
00611 }
00612 return $substringSet;
00613 }
00614
00615
00616
00617
00618
00619
00620 function localSubstring( $direction, $row, $col, $rows, $cols, $matrix )
00621 {
00622 $colMax = 0;
00623 $prevColMax = 0;
00624 $colMaxRow = 0;
00625 $colMaxCol = 0;
00626
00627 $remainMax = 0;
00628 $remainRow = 0;
00629 $remainCol = 0;
00630
00631 switch( $direction )
00632 {
00633 case 'right':
00634 {
00635 for ( $j = $col; $j < $cols; $j++ )
00636 {
00637 $startRow = $row;
00638 $colMax = 0;
00639
00640 while ( $startRow < $rows )
00641 {
00642 $matVal = $matrix->get( $startRow, $j );
00643 if ( $matVal > $colMax )
00644 {
00645 $colMax = $matVal;
00646 $colMaxRow = $startRow;
00647 $colMaxCol = $j;
00648 }
00649 $startRow++;
00650 }
00651
00652 if ( $colMax > $remainMax )
00653 {
00654
00655 $remainMax = $colMax;
00656 $remainRow = $colMaxRow;
00657 $remainCol = $colMaxCol;
00658 }
00659
00660 if ( ( $prevColMax > 1 ) && ( $colMax < $prevColMax ) )
00661 {
00662 break 2;
00663 }
00664
00665 $prevColMax = $colMax;
00666 }
00667 }break;
00668
00669 case 'left':
00670 {
00671 for ( $j = $col; $j >= 0; $j-- )
00672 {
00673 $startRow = $row;
00674 $colMax = 0;
00675
00676 while ( $startRow >= 0 )
00677 {
00678 $matVal = $matrix->get( $startRow, $j );
00679 if ( $matVal > $colMax )
00680 {
00681 $colMax = $matVal;
00682 $colMaxRow = $startRow;
00683 $colMaxCol = $j;
00684 }
00685 $startRow--;
00686 }
00687 if ( $colMax > $remainMax )
00688 {
00689
00690 $remainMax = $colMax;
00691 $remainRow = $colMaxRow;
00692 $remainCol = $colMaxCol;
00693 }
00694 if ( ( $prevColMax > 1 ) && ( $colMax < $prevColMax ) )
00695 {
00696 break 2;
00697 }
00698
00699 $prevColMax = $colMax;
00700 }
00701 }break;
00702 }
00703
00704 if ( $remainMax > 0 )
00705 {
00706 return array( 'remainMax' => $remainMax,
00707 'remainRow' => $remainRow,
00708 'remainCol' => $remainCol );
00709 }
00710 else
00711 {
00712 return false;
00713 }
00714 }
00715
00716
00717
00718
00719
00720
00721
00722 function traceSubstring( $row, $col, $matrix, $val, $new )
00723 {
00724 $substring = array();
00725 while( $row >= 0 && $col >= 0 )
00726 {
00727 if ( $matrix->get( $row, $col ) == $val )
00728 {
00729 $substring[$col] = array( 'word' => $new[$col],
00730 'oldOffset' => $row );
00731
00732 if ( $val > 1 )
00733 {
00734 $val--;
00735 }
00736 else if ( $val == 1 )
00737 {
00738 break;
00739 }
00740
00741 $row--;
00742 $col--;
00743 if ( $row < 0 || $col < 0 )
00744 break;
00745 }
00746 }
00747 return array( 'substring' => $substring,
00748 'lastRow' => $row,
00749 'lastCol' => $col );
00750 }
00751
00752
00753
00754
00755
00756
00757
00758 function substringPlacement( $startOffset, $endOffset, $totalStringLength )
00759 {
00760 $leftText = false;
00761 $rightText = false;
00762
00763 if ( $startOffset > 0 )
00764 $leftText = true;
00765
00766 if ( $endOffset < $totalStringLength-1 )
00767 $rightText = true;
00768
00769 return array( 'hasTextLeft' => $leftText,
00770 'hasTextRight' => $rightText );
00771 }
00772
00773
00774
00775
00776
00777 function dumpMatrix( $arr, $rows, $cols )
00778 {
00779 for ( $i = 0; $i < $rows; $i++ )
00780 {
00781 for ( $j = 0; $j < $cols; $j++ )
00782 {
00783 print( $arr->get( $i, $j ) . " " );
00784 if ( $j == $cols-1 )
00785 print( "\n" );
00786 }
00787 }
00788 }
00789
00790
00791
00792
00793
00794
00795 function findLongestSubstringOffsets( $varArray )
00796 {
00797
00798
00799 $lengthMatrix = $varArray['lengthMatrix'];
00800 $max = $varArray['maxLength'];
00801 $len = $max;
00802 $maxRow = $varArray['maxRow'];
00803 $maxCol = $varArray['maxCol'];
00804 $newStart = 0;
00805 $newEnd = 0;
00806 $oldStart = 0;
00807 $oldEnd = 0;
00808
00809 while ( $len > 0 && $maxRow >= 0 && $maxCol >= 0)
00810 {
00811 $len = $lengthMatrix->get( $maxRow, $maxCol );
00812
00813 if ( $lengthMatrix->get( $maxRow, $maxCol ) == $max )
00814 {
00815 $newEnd = $maxCol;
00816 $oldEnd = $maxRow;
00817 }
00818
00819 if ( $lengthMatrix->get( $maxRow, $maxCol ) == 1 )
00820 {
00821 $newStart = $maxCol;
00822 $oldStart = $maxRow;
00823 }
00824
00825 $maxRow--;
00826 $maxCol--;
00827 }
00828 return array( 'newStart' => $newStart,
00829 'newEnd' => $newEnd,
00830 'oldStart' => $oldStart,
00831 'oldEnd' => $oldEnd );
00832 }
00833
00834
00835
00836
00837
00838
00839
00840
00841
00842
00843
00844
00845
00846
00847 function substringMatrix( $old, $new )
00848 {
00849 $maxLength = 0;
00850 $sizeOld = count( $old );
00851 $sizeNew = count( $new );
00852 $matrix = new eZDiffMatrix( $sizeOld, $sizeNew );
00853
00854 $maxC = 0;
00855 $maxR = 0;
00856
00857 for ( $row = 0; $row < $sizeOld; $row++ )
00858 {
00859 for ( $col = 0; $col < $sizeNew; $col++ )
00860 {
00861 if ( $old[$row] === $new[$col] )
00862 {
00863 if ( $row > 0 && $col > 0 )
00864 {
00865 $val = 1 + $matrix->get( $row-1, $col-1 );
00866 $matrix->set( $row, $col, $val );
00867 }
00868 else if ( $row > 0 && $col == 0 )
00869 {
00870 $val = 1 + $matrix->get( $row-1, $col );
00871 $matrix->set( $row, $col, $val );
00872 }
00873 else if ( $row == 0 && $col > 0 )
00874 {
00875 $val = 1 + $matrix->get( $row, $col-1 );
00876 $matrix->set( $row, $col, $val );
00877 }
00878 else if ( $row == 0 && $col == 0 )
00879 {
00880 $matrix->set( $row, $col, 1 );
00881 }
00882
00883 if ( $matrix->get( $row, $col ) > $maxLength )
00884 {
00885 $maxLength = $matrix->get( $row, $col );
00886 $maxR = $row;
00887 $maxC = $col;
00888 }
00889 }
00890 else
00891 {
00892 $matrix->set( $row, $col, 0 );
00893 }
00894 }
00895 }
00896 return array( 'maxLength' => $maxLength,
00897 'maxRow' => $maxR,
00898 'maxCol' => $maxC,
00899 'lengthMatrix' => $matrix );
00900 }
00901
00902
00903
00904
00905
00906
00907 function trimEmptyArrayElements( $array )
00908 {
00909 foreach( $array as $key => $value )
00910 {
00911 if ( empty( $value ) )
00912 {
00913 unset( $array[$key] );
00914 }
00915 }
00916
00917 return array_merge( $array );
00918 }
00919 }
00920
00921 ?>