eZ Publish  [4.0]
ezdifftextengine.php
Go to the documentation of this file.
00001 <?php
00002 //
00003 // Definition of eZDiffTextEngine class
00004 //
00005 // <creation-tag>
00006 //
00007 // ## BEGIN COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00008 // SOFTWARE NAME: eZ Publish
00009 // SOFTWARE RELEASE: 4.0.x
00010 // COPYRIGHT NOTICE: Copyright (C) 1999-2008 eZ Systems AS
00011 // SOFTWARE LICENSE: GNU General Public License v2.0
00012 // NOTICE: >
00013 //   This program is free software; you can redistribute it and/or
00014 //   modify it under the terms of version 2.0  of the GNU General
00015 //   Public License as published by the Free Software Foundation.
00016 //
00017 //   This program is distributed in the hope that it will be useful,
00018 //   but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 //   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020 //   GNU General Public License for more details.
00021 //
00022 //   You should have received a copy of version 2.0 of the GNU General
00023 //   Public License along with this program; if not, write to the Free
00024 //   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00025 //   MA 02110-1301, USA.
00026 //
00027 //
00028 // ## END COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00029 //
00030 
00031 /*! \file ezdifftextengine.php
00032   eZDiffTextEngine class
00033 */
00034 
00035 /*!
00036   \class eZDiffTextEngine ezdifftextengine.php
00037   \ingroup eZDiff
00038   \brief eZDiff provides an access point the diff system
00039 
00040   The eZDiffEngine class is an abstract class, providing interface and shared code
00041   for the different available DiffEngine.
00042 */
00043 
00044 //include_once( 'lib/ezdiff/classes/ezdiffengine.php' );
00045 //include_once( 'lib/ezdiff/classes/ezdiffmatrix.php' );
00046 
00047 class eZDiffTextEngine extends eZDiffEngine
00048 {
00049     function eZDiffTextEngine()
00050     {
00051     }
00052 
00053     /*!
00054       This function calculates changes in plain text and creates an object to hold
00055       overview of changes.
00056     */
00057     function createDifferenceObject( $fromData, $toData )
00058     {
00059         //include_once( 'lib/ezdiff/classes/eztextdiff.php' );
00060 
00061         $pattern = array( '/[ ][ ]+/',
00062                           '/ \n( \n)+/',
00063                           '/^ /m',
00064                           '/(\n){3,}/' );
00065         $replace = array( ' ',
00066                           "\n",
00067                           '',
00068                           "\n\n" );
00069 
00070         $old = preg_replace( $pattern, $replace, $fromData );
00071         $new = preg_replace( $pattern, $replace, $toData );
00072 
00073         $oldArray = split( "\r\n", $old );
00074         $newArray = split( "\r\n", $new );
00075 
00076         $oldSums = array();
00077         foreach( $oldArray as $paragraph )
00078         {
00079             $oldSums[] = crc32( $paragraph );
00080         }
00081 
00082         $newSums = array();
00083         foreach( $newArray as $paragraph )
00084         {
00085             $newSums[] = crc32( $paragraph );
00086         }
00087 
00088         $changes = new eZTextDiff();
00089 
00090         $pre = $this->preProcess( $oldSums, $newSums );
00091         $out = $this->createOutput( $pre, $oldArray, $newArray );
00092 
00093         $changes->setChanges( $out );
00094 
00095         return $changes;
00096     }
00097 
00098     /*!
00099       This Method will create the differences array which is processed by the templates
00100     */
00101     function createOutput( $arr, $oldArray, $newArray )
00102     {
00103         $diff = array();
00104 
00105         foreach( $arr as $item )
00106         {
00107             $old = null;
00108             $new = null;
00109             foreach ( $item as $par => $state )
00110             {
00111                 switch ( $state )
00112                 {
00113                     case 'unchanged':
00114                     {
00115                         $index = $par;
00116                     }break;
00117 
00118                     case 'changed':
00119                     {
00120                         $old = $oldArray[$par];
00121                         $new = $newArray[$par];
00122                     }break;
00123 
00124                     case 'added':
00125                     {
00126                         $new = $newArray[$par];
00127                     }break;
00128 
00129                     case 'removed':
00130                     {
00131                         $old = $oldArray[$par];
00132                     }break;
00133                 }
00134             }
00135 
00136             if ( $old === null )
00137             {
00138                 if ( $new === null )
00139                 {
00140                     // unchanged paragraph
00141                     $text = $newArray[$index];
00142                     $this->addNewLine( $text );
00143                     $diff[] = array( 'unchanged' => $text,
00144                                      'status' => 0 );
00145                 }
00146                 else
00147                 {
00148                     // added paragraph
00149                     $text = $new;
00150                     $this->addNewLine( $text );
00151                     $diff[] = array( 'added' => $text,
00152                                      'status' => 2 );
00153                 }
00154             }
00155             elseif ( $new === null )
00156             {
00157                 // removed paragraph
00158                 $text = $old;
00159                 $this->addNewLine( $text );
00160                 $diff[] = array( 'removed' => $text,
00161                                  'status' => 1 );
00162             }
00163             else
00164             {
00165                 // changed paragraph
00166                 $diffText = $this->buildDiff( explode( ' ', $old ), explode( ' ', $new ) );
00167                 $size = count( $diffText ) - 1;
00168 
00169                 foreach( $diffText as $number => $change )
00170                 {
00171                     $state = $change['status'];
00172                     switch( $state )
00173                     {
00174                         case '0':
00175                         {
00176                             if ( $number == $size )
00177                                 $this->addNewLine( $change['unchanged'] );
00178                         }break;
00179 
00180                         case '1':
00181                         {
00182                             if ( $number == $size )
00183                                 $this->addNewLine( $change['removed'] );
00184                         }break;
00185 
00186                         case '2':
00187                         {
00188                             if ( $number == $size )
00189                                 $this->addNewLine( $change['added'] );
00190                         }break;
00191                     }
00192                     $diff[] = $change;
00193                 }
00194             }
00195         }
00196         $output = $this->postProcessDiff( $diff );
00197         return $output;
00198     }
00199 
00200     /*!
00201       \private
00202       This method will add a newline after unempty paragraphs
00203     */
00204     function addNewLine( &$text )
00205     {
00206         $text .= "\n";
00207     }
00208 
00209     /*!
00210       This method will determine which paragraphs which need to be diffed.
00211     */
00212     function preProcess( $oldArray, $newArray )
00213     {
00214         $substr = $this->substringMatrix( $oldArray, $newArray );
00215 
00216         $nOldWords = count( $oldArray );
00217         $nNewWords = count( $newArray );
00218 
00219         /*
00220         $tmp = $substr['lengthMatrix'];
00221         print( "<pre>" );
00222         $this->dumpMatrix( $tmp, $nOldWords, $nNewWords );
00223         print( "</pre>" );
00224         */
00225 
00226         $strings = $this->substrings( $substr, $oldArray, $newArray );
00227 
00228         //Merge detected paragraphs
00229         $mergedStrings = array();
00230         foreach ( $strings as $sstring )
00231         {
00232             $mergedStrings = $mergedStrings + $sstring;
00233         }
00234         unset( $strings );
00235 
00236         //Check for changes in lead & inner paragraphs
00237         $offset = 0;
00238         $delOffset = 0;
00239         $internalOffset = 0;
00240         $merged = array();
00241 
00242         $oldOffset = -1;
00243 
00244         foreach ( $mergedStrings as $key => $wordArray )
00245         {
00246             if ( $oldOffset >= $wordArray['oldOffset'] )
00247             {
00248                 continue;
00249             }
00250 
00251             $oldOffset = $wordArray['oldOffset'];
00252 
00253             //Check for inserted paragraphs
00254             $nk = $internalOffset;
00255             while ( $key > $offset )
00256             {
00257                 $merged[$nk] = array( $offset => 'added' );
00258                 $nk++;
00259                 $offset++;
00260             }
00261 
00262             //Check for removed paragraphs
00263             $k = $internalOffset;
00264             while ( $oldOffset > $delOffset )
00265             {
00266                 if ( $k < $nk )
00267                 {
00268                     // Paragraph is changed
00269                     if ( array_key_exists( $delOffset, $merged[$k] ) )
00270                     {
00271                         // Old & new paragraph places is the same
00272                         $merged[$k][$delOffset] = 'changed';
00273                     }
00274                     else
00275                     {
00276                         $merged[$k][$delOffset] = 'removed';
00277                     }
00278                 }
00279                 else
00280                 {
00281                     $merged[$k] = array( $delOffset => 'removed' );
00282                 }
00283                 $k++;
00284                 $delOffset++;
00285             }
00286 
00287             $internalOffset = ($k > $nk) ? $k:$nk;
00288 
00289             //The default state - unchanged paragraph
00290             $merged[$internalOffset] = array( $key => 'unchanged' );
00291             $internalOffset++;
00292             $delOffset++;
00293             $offset++;
00294         }
00295 
00296         //Check for appended paragraphs
00297         $nk = $internalOffset;
00298         while ( $nNewWords > $offset )
00299         {
00300             $merged[$nk] = array( $offset => 'added' );
00301             $nk++;
00302             $offset++;
00303         }
00304 
00305         //Check for end-deleted paragraphs
00306         $k = $internalOffset;
00307         while ( $nOldWords > $delOffset )
00308         {
00309             if ( $k < $nk )
00310             {
00311                 if ( array_key_exists( $delOffset, $merged[$k] ) )
00312                 {
00313                     $merged[$k][$delOffset] = 'changed';
00314                 }
00315                 else
00316                 {
00317                     $merged[$k][$delOffset] = 'removed';
00318                 }
00319             }
00320             else
00321             {
00322                 $merged[$k] = array( $delOffset => 'removed' );
00323             }
00324             $k++;
00325             $delOffset++;
00326         }
00327 
00328         return $merged;
00329     }
00330 
00331 
00332     /*!
00333       \private
00334       This method will contruct a diff output for plain text.
00335     */
00336     function buildDiff( $oldArray, $newArray )
00337     {
00338         $substr = $this->substringMatrix( $oldArray, $newArray );
00339 
00340         $nOldWords = count( $oldArray );
00341         $nNewWords = count( $newArray );
00342 
00343         /*
00344         $tmp = $substr['lengthMatrix'];
00345         print( "<pre>" );
00346         $this->dumpMatrix( $tmp, $nOldWords, $nNewWords );
00347         print( "</pre>" );
00348         */
00349 
00350         $strings = $this->substrings( $substr, $oldArray, $newArray );
00351         $len = count( $strings );
00352 
00353         //Merge detected substrings
00354         $mergedStrings = array();
00355         foreach ( $strings as $sstring )
00356         {
00357             $mergedStrings = $mergedStrings + $sstring;
00358         }
00359         unset( $strings );
00360 
00361         //Check for changes in lead & inner words
00362         $differences = array();
00363         $offset = 0;
00364         $delOffset = 0;
00365 
00366         $oldOffset = -1;
00367 
00368         foreach ( $mergedStrings as $key => $wordArray )
00369         {
00370             if ( $oldOffset >= $wordArray['oldOffset'] )
00371             {
00372                 continue;
00373             }
00374 
00375             $oldOffset = $wordArray['oldOffset'];
00376 
00377             // Added words
00378             while ( $key > $offset )
00379             {
00380                 $differences[] = array( 'added' => $newArray[$offset],
00381                                         'status' => 2 );
00382                 $offset++;
00383             }
00384 
00385             // Removed words
00386             while ( $oldOffset > $delOffset )
00387             {
00388                 $differences[] = array( 'removed' => $oldArray[$delOffset],
00389                                         'status' => 1 );
00390                 $delOffset++;
00391             }
00392 
00393             //The default state - unchanged paragraph
00394             $differences[] = array( 'unchanged' => $newArray[$key],
00395                                     'status' => 0 );
00396             $delOffset++;
00397             $offset++;
00398         }
00399 
00400         // Appended words
00401         while ( $nNewWords > $offset )
00402         {
00403             $differences[] = array( 'added' => $newArray[$offset],
00404                                     'status' => 2 );
00405             $offset++;
00406         }
00407 
00408         // Words, removed at the paragraph end
00409         while ( $nOldWords > $delOffset )
00410         {
00411             $differences[] = array( 'removed' => $oldArray[$delOffset],
00412                                     'status' => 1 );
00413             $delOffset++;
00414         }
00415 
00416         $output = $this->postProcessDiff( $differences );
00417         return $output;
00418 
00419     }
00420 
00421     /*!
00422       \private
00423       This method will chain together similar changes, in order to create a more connected output.
00424     */
00425     function postProcessDiff( $diffArray )
00426     {
00427         $string = "";
00428         $diff = array();
00429         $item = current( $diffArray );
00430 
00431         $lastStatus = $item['status'];
00432 
00433         $mode = array( 0 => 'unchanged',
00434                        1 => 'removed',
00435                        2 => 'added' );
00436 
00437         while ( $item = current( $diffArray ) )
00438         {
00439             if ( $item['status'] != $lastStatus )
00440             {
00441                 $diff[] = array( $mode[$lastStatus] => $string,
00442                                  'status' => $lastStatus );
00443                 $lastStatus = $item['status'];
00444                 $string ="";
00445                 continue;
00446             }
00447 
00448             $string .= $item[$mode[$lastStatus]] . " ";
00449             next( $diffArray );
00450         }
00451         if ( strlen( $string ) > 0 )
00452         {
00453             $diff[] = array( $mode[$lastStatus] => $string,
00454                              'status' => $lastStatus );
00455         }
00456         return $diff;
00457     }
00458 
00459 
00460     /*!
00461       \private
00462       This method will detect discontinuous substrings in the matrix.
00463       \return Array of substrings
00464     */
00465     function substrings( $sub, $old, $new )
00466     {
00467         $matrix = $sub['lengthMatrix'];
00468         $val = $sub['maxLength'];
00469         $row = $sub['maxRow'];
00470         $col = $sub['maxCol'];
00471 
00472         if ( $val == 0 )
00473         {
00474             //No common substrings were found
00475             return array();
00476         }
00477 
00478         $rows = count( $old );
00479         $cols = count( $new );
00480 
00481         $lcsOffsets = $this->findLongestSubstringOffsets( $sub );
00482         $lcsPlacement = $this->substringPlacement( $lcsOffsets['newStart'], $lcsOffsets['newEnd'], $cols );
00483 
00484         $substringSet = array();
00485 
00486         $substringArray = $this->traceSubstring( $row, $col, $matrix, $val, $new );
00487         $substring = $substringArray['substring'];
00488         unset( $substringArray );
00489         $substringSet[] = array_reverse( $substring, true );
00490         $substring = array();
00491 
00492         //Get more text
00493         if ( $lcsPlacement['hasTextLeft'] )
00494         {
00495             $row = $lcsOffsets['oldStart'];
00496             $col = $lcsOffsets['newStart'];
00497 
00498             if ( $row != 0 )
00499             {
00500                 $row--;
00501             }
00502 
00503             if ( $col != 0 )
00504             {
00505                 $col--;
00506             }
00507 
00508             $done = false;
00509             $info = true;
00510             $prevRowUsed = -1;
00511 
00512             while ( !$done && $info )
00513             {
00514                 if ( $prevRowUsed == $row )
00515                 {
00516                     $done = true;
00517                     continue;
00518                 }
00519 
00520                 $info = $this->localSubstring( 'left', $row, $col, $rows, $cols, $matrix );
00521                 $prevRowUsed = $row;
00522                 if ( $info )
00523                 {
00524                     $substringArray = $this->traceSubstring( $info['remainRow'], $info['remainCol'], $matrix, $info['remainMax'], $new );
00525                     $substring = $substringArray['substring'];
00526                     array_unshift( $substringSet, array_reverse( $substring, true ) );
00527                     $substring = array();
00528 
00529                     if ( $info['remainCol'] == 0 || $substringArray['lastCol'] == 0 )
00530                     {
00531                         $done = true;
00532                     }
00533                     else
00534                     {
00535                         $row = $substringArray['lastRow'];
00536                         $col = $substringArray['lastCol'];
00537 
00538                         if ( $row != 0 )
00539                         {
00540                             $row--;
00541                         }
00542                         if ( $col != 0 )
00543                         {
00544                             $col--;
00545                         }
00546                     }
00547                     unset( $substringArray );
00548                 }
00549             }
00550         }
00551 
00552 
00553         if ( $lcsPlacement['hasTextRight'] )
00554         {
00555             //reset search location in matrix
00556             $row = $sub['maxRow'];
00557             $col = $sub['maxCol'];
00558 
00559             if ( $row != $rows-1 )
00560             {
00561                 $row++;
00562             }
00563 
00564             if ( $col != $cols-1 )
00565             {
00566                 $col++;
00567             }
00568 
00569             $done = false;
00570             $info = true;
00571             $prevRowUsed = -1;
00572 
00573             while( !$done && $info )
00574             {
00575                 if ( $prevRowUsed == $row )
00576                 {
00577                     $done = true;
00578                     continue;
00579                 }
00580                 $info = $this->localSubstring( 'right', $row, $col, $rows, $cols, $matrix );
00581                 $prevRowUsed = $row;
00582                 if ( $info )
00583                 {
00584                     $substringArray = $this->traceSubstring( $info['remainRow'], $info['remainCol'], $matrix, $info['remainMax'], $new );
00585                     $substring = $substringArray['substring'];
00586                     $substringSet[] = array_reverse( $substring, true );
00587                     $substring = array();
00588 
00589                     if ( $info['remainCol'] == $cols-1 || $substringArray['lastRow'] == $cols-1 )
00590                     {
00591                         $done = true;
00592                     }
00593                     else
00594                     {
00595                         $row = $info['remainRow'];
00596                         $col = $info['remainCol'];
00597 
00598                         if ( $row != $rows-1 )
00599                         {
00600                             $row++;
00601                         }
00602 
00603                         if ( $col != $cols-1 )
00604                         {
00605                             $col++;
00606                         }
00607                     }
00608                     unset( $substringArray );
00609                 }
00610             }
00611         }
00612         return $substringSet;
00613     }
00614 
00615     /*!
00616       \private
00617       This method checks  a patch of the length matrix for the longest substring.
00618       Depending on the \a $direction a sub matrix is searched for valid substrings.
00619     */
00620     function localSubstring( $direction, $row, $col, $rows, $cols, $matrix )
00621     {
00622         $colMax = 0;
00623         $prevColMax = 0;
00624         $colMaxRow = 0;
00625         $colMaxCol = 0;
00626 
00627         $remainMax = 0;
00628         $remainRow = 0;
00629         $remainCol = 0;
00630 
00631         switch( $direction )
00632         {
00633             case 'right':
00634             {
00635                 for ( $j = $col; $j < $cols; $j++ )
00636                 {
00637                     $startRow = $row;
00638                     $colMax = 0;
00639 
00640                     while ( $startRow < $rows )
00641                     {
00642                         $matVal = $matrix->get( $startRow, $j );
00643                         if ( $matVal > $colMax )
00644                         {
00645                             $colMax = $matVal;
00646                             $colMaxRow = $startRow;
00647                             $colMaxCol = $j;
00648                         }
00649                         $startRow++;
00650                     }
00651 
00652                     if ( $colMax > $remainMax )
00653                     {
00654                         //Set best candidate thus far
00655                         $remainMax = $colMax;
00656                         $remainRow = $colMaxRow;
00657                         $remainCol = $colMaxCol;
00658                     }
00659 
00660                     if ( ( $prevColMax > 1 ) &&  ( $colMax < $prevColMax ) )
00661                     {
00662                         break 2;
00663                     }
00664 
00665                     $prevColMax = $colMax;
00666                 }
00667             }break;
00668 
00669             case 'left':
00670             {
00671                 for ( $j = $col; $j >= 0; $j-- )
00672                 {
00673                     $startRow = $row;
00674                     $colMax = 0;
00675 
00676                     while ( $startRow >= 0 )
00677                     {
00678                         $matVal = $matrix->get( $startRow, $j );
00679                         if ( $matVal > $colMax )
00680                         {
00681                             $colMax = $matVal;
00682                             $colMaxRow = $startRow;
00683                             $colMaxCol = $j;
00684                         }
00685                         $startRow--;
00686                     }
00687                     if ( $colMax > $remainMax )
00688                     {
00689                         //Set best candidate thus far
00690                         $remainMax = $colMax;
00691                         $remainRow = $colMaxRow;
00692                         $remainCol = $colMaxCol;
00693                     }
00694                     if ( ( $prevColMax > 1 ) && ( $colMax < $prevColMax ) )
00695                     {
00696                         break 2;
00697                     }
00698 
00699                     $prevColMax = $colMax;
00700                 }
00701             }break;
00702         }
00703 
00704         if ( $remainMax > 0 )
00705         {
00706             return array( 'remainMax' => $remainMax,
00707                           'remainRow' => $remainRow,
00708                           'remainCol' => $remainCol );
00709         }
00710         else
00711         {
00712             return false;
00713         }
00714     }
00715 
00716     /*!
00717       \private
00718       This method will backtrace found substrings, it will start from the endpoint of the
00719       string and work towards its start.
00720       \return Substring with endpoing at \a $row, \a $col
00721     */
00722     function traceSubstring( $row, $col, $matrix, $val, $new )
00723     {
00724         $substring = array();
00725         while( $row >= 0 && $col >= 0 )
00726         {
00727             if ( $matrix->get( $row, $col ) == $val )
00728             {
00729                 $substring[$col] = array( 'word' => $new[$col],
00730                                           'oldOffset' => $row );
00731 
00732                 if ( $val > 1 )
00733                 {
00734                     $val--;
00735                 }
00736                 else if ( $val == 1 )
00737                 {
00738                     break;
00739                 }
00740 
00741                 $row--;
00742                 $col--;
00743                 if ( $row < 0 || $col < 0 )
00744                     break;
00745             }
00746         }
00747         return array( 'substring' => $substring,
00748                       'lastRow' => $row,
00749                       'lastCol' => $col );
00750     }
00751 
00752     /*!
00753       \private
00754       This method will return an array with boolean values indicating whether
00755       the specified offsets \a $startOffset and \a $endOffset have additional
00756       text to either the left or right side.
00757     */
00758     function substringPlacement( $startOffset, $endOffset, $totalStringLength )
00759     {
00760         $leftText = false;
00761         $rightText = false;
00762 
00763         if ( $startOffset > 0 )
00764             $leftText = true;
00765 
00766         if ( $endOffset < $totalStringLength-1 )
00767             $rightText = true;
00768 
00769         return array( 'hasTextLeft' => $leftText,
00770                       'hasTextRight' => $rightText );
00771     }
00772 
00773     /*!
00774       \private
00775       Helper method to matrices.
00776     */
00777     function dumpMatrix( $arr, $rows, $cols )
00778     {
00779         for ( $i = 0; $i < $rows; $i++ )
00780         {
00781             for ( $j = 0; $j < $cols; $j++ )
00782             {
00783                 print( $arr->get( $i, $j ) . " " );
00784                 if ( $j == $cols-1 )
00785                     print( "\n" );
00786             }
00787         }
00788     }
00789 
00790     /*!
00791       \private
00792       This method calculates the offsets in the old and new text for the
00793       longest common substring.
00794     */
00795     function findLongestSubstringOffsets( $varArray )
00796     {
00797         //The columns yields the offsets in the new text.
00798         //The rows gives us the offsets in the old text.
00799         $lengthMatrix = $varArray['lengthMatrix'];
00800         $max = $varArray['maxLength'];
00801         $len = $max;
00802         $maxRow = $varArray['maxRow'];
00803         $maxCol = $varArray['maxCol'];
00804         $newStart = 0;
00805         $newEnd = 0;
00806         $oldStart = 0;
00807         $oldEnd = 0;
00808 
00809         while ( $len > 0 && $maxRow >= 0 && $maxCol >= 0)
00810         {
00811             $len = $lengthMatrix->get( $maxRow, $maxCol );
00812 
00813             if ( $lengthMatrix->get( $maxRow, $maxCol ) == $max )
00814             {
00815                 $newEnd = $maxCol;
00816                 $oldEnd = $maxRow;
00817             }
00818 
00819             if ( $lengthMatrix->get( $maxRow, $maxCol ) == 1 )
00820             {
00821                 $newStart = $maxCol;
00822                 $oldStart = $maxRow;
00823             }
00824 
00825             $maxRow--;
00826             $maxCol--;
00827         }
00828         return array( 'newStart' => $newStart,
00829                       'newEnd' => $newEnd,
00830                       'oldStart' => $oldStart,
00831                       'oldEnd' => $oldEnd );
00832     }
00833 
00834     /*!
00835       \private
00836       This function find the longest common substrings in \a $old and \a $new
00837       It will build a matrix consisting of the length of detected substrings.
00838 
00839       The function will build a structure like the following:
00840       array = ( 'maxLength' =>
00841                 'maxRow' =>
00842                 'maxCol' =>
00843                 'lengthMatrix' => )
00844 
00845       \return Matrix containing the length of longest common substring string and where it is found in the substring length matrix.
00846     */
00847     function substringMatrix( $old, $new )
00848     {
00849         $maxLength = 0;
00850         $sizeOld = count( $old );
00851         $sizeNew =  count( $new );
00852         $matrix = new eZDiffMatrix( $sizeOld, $sizeNew );
00853 
00854         $maxC = 0;
00855         $maxR = 0;
00856 
00857         for ( $row = 0; $row < $sizeOld; $row++ )
00858         {
00859             for ( $col = 0; $col < $sizeNew; $col++ )
00860             {
00861                 if ( $old[$row] === $new[$col] )
00862                 {
00863                     if ( $row > 0 && $col > 0 )
00864                     {
00865                         $val = 1 + $matrix->get( $row-1, $col-1 );
00866                         $matrix->set( $row, $col, $val );
00867                     }
00868                     else if ( $row > 0 && $col == 0 )
00869                     {
00870                         $val = 1 + $matrix->get( $row-1, $col );
00871                         $matrix->set( $row, $col, $val );
00872                     }
00873                     else if ( $row == 0 && $col > 0 )
00874                     {
00875                         $val = 1 + $matrix->get( $row, $col-1 );
00876                         $matrix->set( $row, $col, $val );
00877                     }
00878                     else if ( $row == 0 && $col == 0 )
00879                     {
00880                         $matrix->set( $row, $col, 1 );
00881                     }
00882 
00883                     if ( $matrix->get( $row, $col ) > $maxLength )
00884                     {
00885                         $maxLength = $matrix->get( $row, $col );
00886                         $maxR = $row;
00887                         $maxC = $col;
00888                     }
00889                 }
00890                 else
00891                 {
00892                     $matrix->set( $row, $col, 0 );
00893                 }
00894             }
00895         }
00896         return array( 'maxLength' => $maxLength,
00897                       'maxRow' => $maxR,
00898                       'maxCol' => $maxC,
00899                       'lengthMatrix' => $matrix );
00900     }
00901 
00902     /*!
00903       \private
00904       Removes empty elements from array
00905       \return array without empty elements
00906     */
00907     function trimEmptyArrayElements( $array )
00908     {
00909         foreach( $array as $key => $value )
00910         {
00911             if ( empty( $value ) )
00912             {
00913                 unset( $array[$key] );
00914             }
00915         }
00916         //Calls array_merge to reset the array keys.
00917         return array_merge( $array );
00918     }
00919 }
00920 
00921 ?>