eZ Publish  [4.0]
updateniceurls.php
Go to the documentation of this file.
00001 #!/usr/bin/env php
00002 <?php
00003 //
00004 // Definition of Updateniceurls class
00005 //
00006 // Created on: <03-Apr-2003 16:05:43 sp>
00007 //
00008 // ## BEGIN COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00009 // SOFTWARE NAME: eZ Publish
00010 // SOFTWARE RELEASE: 4.0.x
00011 // COPYRIGHT NOTICE: Copyright (C) 1999-2008 eZ Systems AS
00012 // SOFTWARE LICENSE: GNU General Public License v2.0
00013 // NOTICE: >
00014 //   This program is free software; you can redistribute it and/or
00015 //   modify it under the terms of version 2.0  of the GNU General
00016 //   Public License as published by the Free Software Foundation.
00017 //
00018 //   This program is distributed in the hope that it will be useful,
00019 //   but WITHOUT ANY WARRANTY; without even the implied warranty of
00020 //   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00021 //   GNU General Public License for more details.
00022 //
00023 //   You should have received a copy of version 2.0 of the GNU General
00024 //   Public License along with this program; if not, write to the Free
00025 //   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00026 //   MA 02110-1301, USA.
00027 //
00028 //
00029 // ## END COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00030 //
00031 
00032 /*! \file updateniceurls.php
00033 */
00034 
00035 set_time_limit ( 0 );
00036 
00037 //include_once( 'lib/ezutils/classes/ezcli.php' );
00038 //include_once( 'kernel/classes/ezscript.php' );
00039 
00040 require 'autoload.php';
00041 $cli = eZCLI::instance();
00042 $script = eZScript::instance( array( 'description' => ( "eZ Publish url-alias imported and updater.\n\n" .
00043                                                          "Will import urls from the older (3.9) system into the new, controlled by the --import* options.\n" .
00044                                                          "Will also update the url-alias entries from the content object nodes in the system, controlled by the --update-nodes option.\n" .
00045                                                          "The default behaviour is to update urls for content object nodes only\n" .
00046                                                          "\n" .
00047                                                          "updateniceurls.php" ),
00048                                       'use-session' => true,
00049                                       'use-modules' => true,
00050                                       'use-extensions' => true ) );
00051 
00052 $script->startup();
00053 
00054 $options = $script->getOptions( "[db-host:][db-user:][db-password:][db-database:][db-type:|db-driver:][sql]" .
00055                                 "[no-import]" .
00056                                 "[import][import-nodes][import-aliases][import-redirections][import-wildcards]" .
00057                                 "[no-update-nodes][update-nodes]" .
00058                                 "[verify-data][interactive]" .
00059                                 "[backup-tables:]" .
00060                                 "[column-width:][fetch-limit:]",
00061                                 "",
00062                                 array( 'db-host' => "Database host",
00063                                        'db-user' => "Database user",
00064                                        'db-password' => "Database password",
00065                                        'db-database' => "Database name",
00066                                        'db-driver' => "Database driver",
00067                                        'db-type' => "Database driver, alias for --db-driver",
00068                                        'sql' => "Display sql queries",
00069 
00070                                        'no-import' => "Disables all import routines. To enable specific ones use the --import-* options.",
00071                                        'import' => "Enables all import routines.",
00072                                        'import-nodes' => "Enables importing of urls from the old node data.",
00073                                        'import-aliases' => "Enables importing of old urls (system and customized).",
00074                                        'import-redirections' => "Enables importing of urls which redirects to the correct url (ie. history).",
00075                                        'import-wildcards' => "Enables importing of urls which redirects to the correct url using wildcards (ie. history).",
00076 
00077                                        'no-update-nodes' => "Disable updating of the urls of content object nodes.",
00078                                        'update-nodes' => "Enable updating of the urls of content object nodes.",
00079 
00080                                        'verify-data' => "Verify the database after new data has been inserted, this should only be used for debugging.",
00081                                        'interactive' => "Enables interactive mode for --verify-data,\nthis will halt execution when database errors occurs and allow for manual inspection.",
00082                                        'backup-tables' => "Performs a backup of the ezurlalias and ezurlalias_ml tables after each stage is done (import or update),\nthe backup tables will use the original name but with the suffix supplied to this option.\nNote: Use only for debugging and only on MySQL.",
00083 
00084                                        'column-width' => "The approximate width of the output block, defaults to 72.",
00085                                        'fetch-limit' => "The number of items to fetch in one go, increasing it may reduce\ntotal time but will also increase memory usage, defaults to 200.",
00086                                        ) );
00087 $script->initialize();
00088 
00089 $dbUser = $options['db-user'] ? $options['db-user'] : false;
00090 $dbPassword = $options['db-password'] ? $options['db-password'] : false;
00091 $dbHost = isset( $options['db-host'] ) && $options['db-host'] ? $options['db-host'] : false;
00092 $dbName = $options['db-database'] ? $options['db-database'] : false;
00093 $dbImpl = $options['db-driver'] ? $options['db-driver'] : false;
00094 $showSQL = $options['sql'] ? true : false;
00095 $siteAccess = $options['siteaccess'] ? $options['siteaccess'] : false;
00096 if ( $siteAccess )
00097 {
00098     changeSiteAccessSetting( $siteAccess );
00099 }
00100 
00101 function changeSiteAccessSetting( $siteAccess )
00102 {
00103     global $isQuiet;
00104     $cli = eZCLI::instance();
00105     if ( file_exists( 'settings/siteaccess/' . $siteAccess) )
00106     {
00107         if ( !$isQuiet )
00108             $cli->notice( "Using siteaccess $siteAccess for nice url update" );
00109     }
00110     else
00111     {
00112         if ( !$isQuiet )
00113             $cli->notice( "Siteaccess $siteAccess does not exist, using default siteaccess" );
00114     }
00115 }
00116 
00117 //include_once( 'lib/ezdb/classes/ezdb.php' );
00118 //include_once( 'kernel/classes/ezcontentobjecttreenode.php' );
00119 //include_once( 'kernel/classes/ezurlwildcard.php' );
00120 
00121 $db = eZDb::instance();
00122 
00123 if ( $dbHost or $dbName or $dbUser or $dbImpl )
00124 {
00125     $params = array();
00126     if ( $dbHost !== false )
00127         $params['server'] = $dbHost;
00128     if ( $dbUser !== false )
00129     {
00130         $params['user'] = $dbUser;
00131         $params['password'] = '';
00132     }
00133     if ( $dbPassword !== false )
00134         $params['password'] = $dbPassword;
00135     if ( $dbName !== false )
00136         $params['database'] = $dbName;
00137     $db = eZDB::instance( $dbImpl, $params, true );
00138     eZDB::setInstance( $db );
00139 }
00140 
00141 $db->setIsSQLOutputEnabled( $showSQL );
00142 
00143 //include_once( 'kernel/classes/ezcontentlanguage.php' );
00144 eZContentLanguage::setCronjobMode( true );
00145 
00146 $fetchLimit = 200;
00147 if ( $options['fetch-limit'] !== null )
00148 {
00149     $fetchLimit = $options['fetch-limit'];
00150     if ( $fetchLimit <= 0 )
00151     {
00152         $script->shutdown( 1, "The --fetch-limit must be 1 or higher, tried with $fetchLimit" );
00153     }
00154 }
00155 
00156 $percentLength = 6;
00157 $timeLength = 12;
00158 $columnWidth = 72;
00159 if ( $options['column-width'] !== null )
00160 {
00161     $columnWidth = $options['column-width'];
00162     if ( $columnWidth <= 0 )
00163     {
00164         $script->shutdown( 1, "The --column-width must be 1 or higher, tried with $columnWidth" );
00165     }
00166 }
00167 $maxColumn = max( $columnWidth - $percentLength - $timeLength, $percentLength + $timeLength + 1 );
00168 $totalChangedNodes = 0;
00169 $totalNodeCount = 0;
00170 
00171 $interactive = false;
00172 $performVerification = false;
00173 
00174 if ( $options['verify-data'] )
00175 {
00176     $performVerification = true;
00177 }
00178 if ( $options['interactive'] )
00179 {
00180     $interactive = true;
00181 }
00182 
00183 $backupTables = false;
00184 $backupTableSuffix = false;
00185 if ( $options['backup-tables'] !== null )
00186 {
00187     $backupTables = $options['backup-tables'];
00188     $backupTableSuffix = $backupTables;
00189 }
00190 
00191 $importNodes = false;
00192 $importOldAlias = false;
00193 $importOldAliasRedirections = false;
00194 $importOldAliasWildcard = false;
00195 $updateNodeAlias = true;
00196 
00197 if ( $options['no-import'] )
00198 {
00199     $importNodes = false;
00200     $importOldAlias = false;
00201     $importOldAliasRedirections = false;
00202     $importOldAliasWildcard = false;
00203 }
00204 
00205 if ( $options['import'] )
00206 {
00207     $importNodes = true;
00208     $importOldAlias = true;
00209     $importOldAliasRedirections = true;
00210     $importOldAliasWildcard = true;
00211 }
00212 
00213 if ( $options['import-nodes'] )
00214 {
00215     $importNodes = true;
00216 }
00217 
00218 if ( $options['import-aliases'] )
00219 {
00220     $importOldAlias = true;
00221 }
00222 
00223 if ( $options['import-redirections'] )
00224 {
00225     $importOldAliasRedirections = true;
00226 }
00227 
00228 if ( $options['import-wildcards'] )
00229 {
00230     $importOldAliasWildcard = true;
00231 }
00232 
00233 if ( $options['no-update-nodes'] )
00234 {
00235     $updateNodeAlias = false;
00236 }
00237 
00238 if ( $options['update-nodes'] )
00239 {
00240     $updateNodeAlias = true;
00241 }
00242 
00243 function displayProgress( $statusCharacter, $startTime, $currentCount, $totalCount, $currentColumn )
00244 {
00245     global $maxColumn;
00246     global $cli;
00247 
00248     if ( $statusCharacter !== false )
00249         $cli->output( $statusCharacter, false );
00250 
00251     if ( $currentColumn > $maxColumn )
00252     {
00253         $endTime = microtime( true );
00254         $relTime = ( $endTime - $startTime ) / $currentCount;
00255         $totalTime = ( $relTime * (float)($totalCount - $currentCount) );
00256         $percent = number_format( ( $currentCount * 100.0 ) / ( $totalCount ), 2 );
00257 
00258         $timeLeft = formatTime( $totalTime );
00259 
00260         $items = $currentCount . '/' . $totalCount;
00261 
00262         $cli->output( " " . $percent . "% " . $timeLeft . ' ' . $items );
00263 
00264         $currentColumn = 0;
00265     }
00266     else
00267     {
00268         ++$currentColumn;
00269     }
00270     ++$currentCount;
00271     flush();
00272     return array( $currentColumn, $currentCount );
00273 }
00274 
00275 function formatTime( $totalTime )
00276 {
00277     $timeSeconds = (int)( $totalTime % 60 );
00278     $timeMinutes = (int)( ( $totalTime / 60.0 ) % 60 );
00279     $timeHours = (int)( $totalTime / ( 60.0 * 60.0 ) );
00280     $timeLeftArray = array();
00281     if ( $timeHours > 0 )
00282         $timeLeftArray[] = $timeHours . "h";
00283     if ( $timeMinutes > 0 )
00284         $timeLeftArray[] = $timeMinutes . "m";
00285     $timeLeftArray[] = $timeSeconds . "s";
00286     return implode( " ", $timeLeftArray );
00287 }
00288 
00289 function fetchMaskByNodeID( $nodeID )
00290 {
00291     $db = eZDB::instance();
00292     $sql = "SELECT language_mask FROM ezcontentobject, ezcontentobject_tree
00293             WHERE ezcontentobject.id = ezcontentobject_tree.contentobject_id
00294             AND   ezcontentobject_tree.node_id = " . (int)$nodeID;
00295     $rows = $db->arrayQuery( $sql );
00296     if ( count( $rows ) > 0 )
00297     {
00298         return $rows[0]['language_mask'];
00299     }
00300     return false;
00301 }
00302 
00303 function isAlwaysAvailable( $nodeID )
00304 {
00305     $mask = fetchMaskByNodeID( $nodeID );
00306     if ( ($mask & 1) > 0 )
00307         return true;
00308     return false;
00309 }
00310 
00311 function decodeAction( $destination )
00312 {
00313     $alwaysAvailable = false;
00314     if ( preg_match( "#^content/view/full/([0-9]+)$#", $destination, $matches ) )
00315     {
00316         $nodeID = $matches[1];
00317         $action = 'eznode:' . $nodeID;
00318         $alwaysAvailable = isAlwaysAvailable( $nodeID );
00319     }
00320     else
00321     {
00322         $action = 'module:' . $destination;
00323     }
00324     return array( $action, $alwaysAvailable );
00325 }
00326 
00327 function decodeNodeID( $destination )
00328 {
00329     if ( preg_match( "#^content/view/full/([0-9]+)$#", $destination, $matches ) )
00330     {
00331         return (int)$matches[1];
00332     }
00333     return null;
00334 }
00335 
00336 function logError( $msg )
00337 {
00338     $logFile = fopen( 'urlalias_error.log', "a" );
00339     if ( $logFile )
00340     {
00341         $time = strftime( "%b %d %Y %H:%M:%S", strtotime( "now" ) );
00342         $logMessage = "[ " . $time . " ] $msg\n";
00343         fwrite( $logFile, $logMessage );
00344         fclose( $logFile );
00345     }
00346 }
00347 
00348 function logStore( $res, $func, $args )
00349 {
00350     global $options;
00351     if ( !isset( $options['debug'] ) )
00352         return;
00353 
00354     $logFile = fopen( 'urlalias_store.log', "a" );
00355     if ( $logFile )
00356     {
00357         $time = strftime( "%b %d %Y %H:%M:%S", strtotime( "now" ) );
00358         $logMessage = "[ " . $time . " ] " . calltostring( $func, $args ) . "\n";
00359         fwrite( $logFile, $logMessage );
00360         fclose( $logFile );
00361     }
00362 }
00363 
00364 function resetLogFile( $file )
00365 {
00366     global $cli;
00367     if ( file_exists( $file ) )
00368     {
00369         $s = stat( $file );
00370         if ( $s['size'] > 0 )
00371         {
00372             $archive = $file . "." . strftime( "%Y%m%d%H%M%S", $s['mtime'] );
00373             copy( $file, $archive );
00374             $cli->output( "Archived log file $file to $archive" );
00375         }
00376         fopen( $file, "w" );
00377     }
00378 }
00379 
00380 function resetErrorLog()
00381 {
00382     resetLogFile( "urlalias_error.log" );
00383 }
00384 
00385 function resetStorageLog()
00386 {
00387     resetLogFile( "urlalias_store.log" );
00388 }
00389 
00390 function fetchHistoricURLCount()
00391 {
00392     $db = eZDB::instance();
00393     $sql = 'SELECT count(*) AS count FROM ezurlalias
00394             WHERE is_imported = 0 AND is_wildcard = 0 AND forward_to_id = 0';
00395     $rows = $db->arrayQuery( $sql );
00396     return $rows[0]['count'];
00397 }
00398 
00399 function fetchHistoricRedirectionCount()
00400 {
00401     $db = eZDB::instance();
00402     $sql = 'SELECT count(*) AS count FROM ezurlalias
00403             WHERE is_imported = 0 AND is_wildcard = 0 AND forward_to_id != 0';
00404     $rows = $db->arrayQuery( $sql );
00405     return $rows[0]['count'];
00406 }
00407 
00408 function fetchHistoricWildcardCount()
00409 {
00410     $db = eZDB::instance();
00411     $sql = 'SELECT count(*) AS count FROM ezurlalias
00412             WHERE is_imported = 0 AND is_wildcard != 0';
00413     $rows = $db->arrayQuery( $sql );
00414     return $rows[0]['count'];
00415 }
00416 
00417 function fetchHistoricURLChunk( $offset, $fetchLimit )
00418 {
00419     $db = eZDB::instance();
00420     $sql = 'SELECT id, source_url, destination_url, is_internal FROM ezurlalias
00421             WHERE is_imported = 0 AND is_wildcard = 0 AND forward_to_id = 0';
00422     $rows = $db->arrayQuery( $sql,
00423                              array( 'offset' => $offset,
00424                                     'limit' => $fetchLimit ) );
00425     return array( $rows, $offset + count( $rows ) );
00426 }
00427 
00428 function fetchHistoricRedirectionChunk( $offset, $fetchLimit )
00429 {
00430     $db = eZDB::instance();
00431     $sql = 'SELECT id, forward_to_id, source_url, destination_url FROM ezurlalias
00432             WHERE is_imported = 0 AND is_wildcard = 0 AND forward_to_id != 0';
00433     $rows = $db->arrayQuery( $sql,
00434                              array( 'offset' => $offset,
00435                                     'limit' => $fetchLimit ) );
00436     return array( $rows, $offset + count( $rows ) );
00437 }
00438 
00439 function fetchHistoricWildcardChunk( $offset, $fetchLimit )
00440 {
00441     $db = eZDB::instance();
00442     $sql = 'SELECT id, is_wildcard, is_internal, source_url, destination_url
00443             FROM ezurlalias WHERE is_imported = 0 AND is_wildcard != 0';
00444     $rows = $db->arrayQuery( $sql,
00445                              array( 'offset' => $offset,
00446                                     'limit' => $fetchLimit ) );
00447     return array( $rows, $offset + count( $rows ) );
00448 }
00449 
00450 function fetchPathIdentificationString( $nodeID )
00451 {
00452     $db = eZDB::instance();
00453     $sql = 'SELECT path_identification_string
00454             FROM ezcontentobject_tree WHERE node_id = ' . $nodeID;
00455     $rows = $db->arrayQuery( $sql );
00456     if ( count( $rows ) > 0 )
00457         return $rows[0]['path_identification_string'];
00458     return null;
00459 }
00460 
00461 function fetchPathIdentificationStringCount()
00462 {
00463     $db = eZDB::instance();
00464     $sql = 'SELECT count(*) AS count
00465             FROM ezcontentobject WHERE ezcontentobject.status = 1';
00466     $rows = $db->arrayQuery( $sql );
00467     if ( count( $rows ) > 0 )
00468         return $rows[0]['count'];
00469     return 0;
00470 }
00471 
00472 function fetchPathIdentificationStringChunk( $offset, $fetchLimit )
00473 {
00474     $db = eZDB::instance();
00475     $sql = 'SELECT id
00476             FROM ezcontentobject WHERE ezcontentobject.status = 1';
00477     $rows = $db->arrayQuery( $sql,
00478                              array( 'offset' => $offset,
00479                                     'limit' => $fetchLimit ) );
00480     if ( count( $rows ) == 0 )
00481         return false;
00482     $cond = createURLListCondition( $rows, 'contentobject_id', 'id' );
00483     $sql = 'SELECT path_identification_string, node_id, language_mask
00484             FROM ezcontentobject_tree, ezcontentobject WHERE contentobject_id = id AND (' . $cond . ')';
00485     $rows2 = $db->arrayQuery( $sql );
00486     return array( $rows2, $offset + count( $rows ) );
00487 }
00488 
00489 function createURLListCondition( $rows, $sqlField = 'id', $fieldKey = 'id' )
00490 {
00491     if ( count( $rows ) == 0 )
00492         return false;
00493     $cond = "";
00494     $start = false;
00495     $last  = false;
00496     $ids  = array();
00497     foreach ( $rows as $row )
00498     {
00499         $ids[] = (int)$row[$fieldKey];
00500     }
00501     sort( $ids );
00502     $singleIDs = array();
00503     $betweens  = array();
00504     foreach ( $ids as $id )
00505     {
00506         if ( $last === false )
00507         {
00508             $start = $id;
00509             $last  = $id;
00510         }
00511         else if ( $last + 1 != $id )
00512         {
00513             if ( $start != $last )
00514             {
00515                 $betweens[] = "({$sqlField} BETWEEN $start AND $last)";
00516             }
00517             else
00518             {
00519                 $singleIDs[] = $last;
00520             }
00521             $start = $id;
00522             $last  = $id;
00523         }
00524         else
00525             $last = $id;
00526     }
00527     if ( $start != $last )
00528     {
00529         $betweens[] = "({$sqlField} BETWEEN $start AND $last)";
00530     }
00531     else
00532     {
00533         $singleIDs[] = $last;
00534     }
00535     $cond = join( " OR ", $betweens );
00536     if ( count( $singleIDs ) > 0 )
00537     {
00538         if ( $cond != "" )
00539              $cond .= " OR ";
00540         $cond .= eZDB::instance()->generateSQLINStatement( $singleIDs, $sqlField );
00541     }
00542     return $cond;
00543 }
00544 
00545 function removeURLList( $rows )
00546 {
00547     if ( count( $rows ) == 0 )
00548         return;
00549     $db   = eZDB::instance();
00550     $cond =  createURLListCondition( $rows );
00551     $sql  =  "DELETE FROM ezurlalias WHERE $cond";
00552     $db->query( $sql );
00553 }
00554 
00555 function markAsImported( $rows )
00556 {
00557     if ( count( $rows ) == 0 )
00558         return;
00559     $db   = eZDB::instance();
00560     $cond =  createURLListCondition( $rows );
00561     $sql  =  "UPDATE ezurlalias SET is_imported = 1 WHERE $cond";
00562     $db->query( $sql );
00563 }
00564 
00565 function calltostring( $func, $args )
00566 {
00567     $msg = $func;
00568     if ( is_array( $args ) )
00569     {
00570         foreach ( $args as $key => $value )
00571         {
00572             $args[$key] = var_export( $value, true );
00573         }
00574         $msg .= "(" . join( ", ", $args ) . ")";
00575     }
00576     return $msg;
00577 }
00578 
00579 function logStoreError( $res, $func, $args )
00580 {
00581     $errmsg = "Failed (status: {$res['status']}) to store the url-alias path when executing " . calltostring( $func, $args );
00582     if ( isset( $res['error_message'] ) )
00583         $errmsg .= ", error: " . $res['error_message'];
00584     logError( $errmsg );
00585 }
00586 
00587 function verifyData( &$result, $url, $id )
00588 {
00589     return verifyDataInternal( $result, "Importing the URL " . var_export( $url, true ) . " with ID $id");
00590 }
00591 
00592 function verifyNodeData( &$result, $node )
00593 {
00594     return verifyDataInternal( $result, "Updating the node " . $node->attribute( 'node_id' ) );
00595 }
00596 
00597 function verifyDataInternal( &$result, $error )
00598 {
00599     global $interactive, $performVerification;
00600     global $cli;
00601     if ( !$performVerification )
00602         return;
00603 
00604     $db = eZDB::instance();
00605     if ( $db->databaseName() != 'mysql' )
00606     {
00607         $cli->error( "Can only perform verification on a MySQL database." );
00608         $performVerification = false;
00609         return; // We only support MySQL for now
00610     }
00611 
00612     $tmprows = $db->arrayQuery( "SELECT a1.*, a2.link FROM ezurlalias_ml a1 LEFT JOIN ezurlalias_ml a2 ON a1.parent = a2.id WHERE a1.parent != 0 HAVING a2.link is null" );
00613     if ( count( $tmprows ) > 0 )
00614     {
00615         $tmpParentID = $tmprows[0]['parent'];
00616         $tmpText = $tmprows[0]['text'];
00617         $tmpID = $tmprows[0]['id'];
00618         $error .= " caused a URL alias element ({$tmpText} with ID {$tmpID}) to have an parent ID ({$tmpParentID}) to a non-existing element.";
00619         logError( $error );
00620         if ( $interactive )
00621             $cli->error( $error );
00622         $result = "X";
00623         if ( $interactive )
00624         {
00625             echo "Execution halted, press enter to continue: ";
00626             fgets(STDIN);
00627         }
00628     }
00629 }
00630 
00631 function backupTables( $stage )
00632 {
00633     global $backupTables, $backupTableSuffix, $cli;
00634     if ( !$backupTables )
00635         return;
00636 
00637     $db = eZDB::instance();
00638     if ( $db->databaseName() != 'mysql' )
00639         return; // We only support MySQL for now
00640 
00641     foreach ( array( 'ezurlalias', 'ezurlalias_ml' ) as $table )
00642     {
00643         $newTable = $table . $backupTableSuffix . '_' . $stage;
00644         $cli->output( "Backing up table $table to $newTable" );
00645         $db->query( "DROP TABLE IF EXISTS $newTable" );
00646         $db->query( "CREATE TABLE $newTable LIKE $table" );
00647         $db->query( "INSERT INTO $newTable SELECT * FROM $table" );
00648     }
00649 }
00650 
00651 
00652 $cli->notice( "Note: any errors encountered will be logged to urlalias_error.log" );
00653 $cli->notice( "Using fetch limit: $fetchLimit" );
00654 
00655 resetErrorLog();
00656 resetStorageLog();
00657 
00658 $globalStartTime = microtime( true );
00659 
00660 // Move old historical elements to new table
00661 $urlCount = 0;
00662 if ( $importNodes || $importOldAlias || $importOldAliasRedirections || $importOldAliasWildcard )
00663 {
00664     $rows = $db->arrayQuery( 'SELECT count(*) AS count FROM ezurlalias' );
00665     $urlCount = $rows[0]['count'];
00666 }
00667 if ( $urlCount > 0 )
00668 {
00669     if ( $importNodes )
00670     {
00671         $cli->output( "Importing old node urls" );
00672 
00673         // First move standard urls
00674         $urlCount = fetchPathIdentificationStringCount();
00675         $cli->output( "Importing {$urlCount} " . $cli->stylize( 'emphasize', "node urls" ) );
00676         $column = $counter = $offset = 0;
00677         $urlImportStartTime = microtime( true );
00678         // First import from ezcontentobject_tree to get correct urls
00679         do
00680         {
00681             list( $rows, $offset ) = fetchPathIdentificationStringChunk( $offset, $fetchLimit );
00682             if ( !is_array( $rows ) )
00683             {
00684                 break;
00685             }
00686             $count = count( $rows );
00687             foreach ( $rows as $row )
00688             {
00689                 $nodeID = (int)$row['node_id'];
00690                 if ( $nodeID == 1 )
00691                     continue; // Skip the root node
00692                 $pathIdentificationString = $row['path_identification_string'];
00693                 $pathIdentificationString = eZURLAliasML::sanitizeURL( $pathIdentificationString, true );
00694                 $languageMask = $row['language_mask'];
00695                 $alwaysAvailable = $languageMask & 1;
00696                 $action = 'eznode:' . $nodeID;
00697                 $aliases = eZURLAliasML::fetchByPath( $pathIdentificationString );
00698                 if ( $aliases && $aliases[0]->attribute( 'action' ) != 'nop:' )
00699                 {
00700                     // It is already present, skip it
00701                     list( $column, $counter ) = displayProgress( 's', $urlImportStartTime, $counter, $urlCount, $column );
00702                     continue;
00703                 }
00704                 $res = eZURLAliasML::storePath( $pathIdentificationString, $action,
00705                                                 false, false, $alwaysAvailable, false,
00706                                                 false );
00707                 if ( !$res || $res['status'] !== true )
00708                 {
00709                     logStoreError( $res, "eZURLAliasML::storePath", array( $pathIdentificationString, $action, false, false, $alwaysAvailable, false, false ) );
00710                     list( $column, $counter ) = displayProgress( 'E', $urlImportStartTime, $counter, $urlCount, $column );
00711                     continue;
00712                 }
00713                 logStore( $res, "eZURLAliasML::storePath", array( $pathIdentificationString, $action, false, false, $alwaysAvailable, false, false ) );
00714                 list( $column, $counter ) = displayProgress( '.', $urlImportStartTime, $counter, $urlCount, $column );
00715             }
00716         } while ( $count > 0 );
00717         flush();
00718         if ( $column > 0 )
00719             $cli->output();
00720         backupTables( 'impnode' );
00721     }
00722 
00723     if ( $importOldAlias )
00724     {
00725         $cli->output( "Importing old url aliases" );
00726 
00727         // First move standard urls
00728         $urlCount = fetchHistoricURLCount();
00729         $cli->output( "Importing {$urlCount} " . $cli->stylize( 'emphasize', "standard urls" ) );
00730         $column = $counter = $offset = 0;
00731         $urlImportStartTime = microtime( true );
00732         // Then go over ezurlalias and make links to the real urls
00733         // Also import custom urls (non-node)
00734         do
00735         {
00736             list( $rows, $offset ) = fetchHistoricURLChunk( 0/*$offset*/, $fetchLimit );
00737             if ( !is_array( $rows ) )
00738             {
00739                 break;
00740             }
00741             $count = count( $rows );
00742             foreach ( $rows as $row )
00743             {
00744                 $source = $row['source_url'];
00745                 $linkID = false;
00746                 $source = eZURLAliasML::sanitizeURL( $source, true );
00747                 $destination = $row['destination_url'];
00748                 $aliasRedirects = true;
00749 
00750                 list( $action, $alwaysAvailable ) = decodeAction( $destination );
00751                 list( $actionType, $actionValue ) = explode( ":", $action, 2 );
00752                 $aliases = eZURLAliasML::fetchByAction( $actionType, $actionValue );
00753 
00754                 if ( $aliases && $actionType == 'eznode' )
00755                 {
00756                     // This is a user-entered URL so lets make it an alias of the found dupe.
00757                     $linkID = (int)$aliases[0]->attribute( 'id' );
00758                 }
00759                 else if ( $actionType == 'eznode' )
00760                 {
00761                     $query = "SELECT * FROM ezcontentobject_tree, ezcontentobject WHERE ezcontentobject_tree.contentobject_id = ezcontentobject.id AND ezcontentobject_tree.node_id = " . (int)$actionValue;
00762                     $tmprows = $db->arrayQuery( $query );
00763                     if ( count( $tmprows ) == 0 )
00764                     {
00765                         logError( "Found the alias " . var_export( $source, true ) . " with ID {$row['id']} which points to " . var_export( $action, true ) . " but that content-object/node does not exist in the database" );
00766                         list( $column, $counter ) = displayProgress( 's', $urlImportStartTime, $counter, $urlCount, $column );
00767                         continue;
00768                     }
00769                     if ( $tmprows[0]['status'] != 1 )
00770                     {
00771                         logError( "Found the alias " . var_export( $source, true ) . " with ID {$row['id']} which points to " . var_export( $action, true ) . " but that content-object/node is not currently published (status is {$tmprows[0]['status']})" );
00772                         list( $column, $counter ) = displayProgress( 's', $urlImportStartTime, $counter, $urlCount, $column );
00773                         continue;
00774                     }
00775                     $linkID = false;
00776                 }
00777                 else if ( $actionType == 'module' )
00778                 {
00779                     $linkID = true;
00780 
00781                     // Links that pointed to modules in the old system does not
00782                     // redirect. Make sure they won't redirect in the new system either.
00783                     $aliasRedirects = false;
00784                 }
00785 
00786                 $aliases = eZURLAliasML::fetchByPath( $source );
00787                 if ( $aliases )
00788                 {
00789                     if ( $aliases[0]->attribute( 'action' ) != $action )
00790                     {
00791                         logError( "Found the alias " . var_export( $source, true ) . " with ID {$row['id']} which points to " . var_export( $action, true ) . " but that URL already exists, however the existing URL has the action " . var_export( $aliases[0]->attribute( 'action' ), true ) );
00792                         list( $column, $counter ) = displayProgress( 'E', $urlImportStartTime, $counter, $urlCount, $column );
00793                         continue;
00794                     }
00795                     // The path already exists, do not import
00796                     list( $column, $counter ) = displayProgress( 's', $urlImportStartTime, $counter, $urlCount, $column );
00797                     continue;
00798                 }
00799                 $res = eZURLAliasML::storePath( $source, $action,
00800                                                 false, $linkID, $alwaysAvailable, false,
00801                                                 false, false, true, $aliasRedirects );
00802                 if ( !$res || $res['status'] !== true )
00803                 {
00804                     logStoreError( $res, "eZURLAliasML::storePath", array( $source, $action, false, $linkID, $alwaysAvailable, false, false, false, true, $aliasRedirects ) );
00805                     list( $column, $counter ) = displayProgress( 'E', $urlImportStartTime, $counter, $urlCount, $column );
00806                     continue;
00807                 }
00808                 logStore( $res, "eZURLAliasML::storePath", array( $source, $action, false, $linkID, $alwaysAvailable, false, false, false, true, $aliasRedirects ) );
00809                 $result = '.';
00810                 verifyData( $result, $source, $row['id'] );
00811                 list( $column, $counter ) = displayProgress( $result, $urlImportStartTime, $counter, $urlCount, $column );
00812             }
00813             markAsImported( $rows );
00814         } while ( $count > 0 );
00815         flush();
00816         if ( $column > 0 )
00817             $cli->output();
00818         backupTables( 'impalias' );
00819     }
00820 
00821     if ( $importOldAliasRedirections )
00822     {
00823         // Then redirect urls
00824         $urlCount = fetchHistoricRedirectionCount();
00825         $cli->output( "Importing {$urlCount} " . $cli->stylize( 'emphasize', "redirections" ) );
00826         $column = $counter = $offset = 0;
00827         $urlImportStartTime = microtime( true );
00828         do
00829         {
00830             list( $rows, $offset ) = fetchHistoricRedirectionChunk( 0, $fetchLimit );
00831             if ( !is_array( $rows ) )
00832             {
00833                 break;
00834             }
00835             $count = count( $rows );
00836             foreach ( $rows as $key => $row )
00837             {
00838                 $forwardFromURL = $row['source_url'];
00839                 $forwardFromURL = eZURLAliasML::sanitizeURL( $forwardFromURL, true );
00840                 $forwardToID = (int)$row['forward_to_id'];
00841                 $redirectedSource = false;
00842                 $linkID = false;
00843                 list( $action, $alwaysAvailable ) = decodeAction( $row['destination_url'] );
00844                 list( $actionType, $actionValue ) = explode( ":", $action, 2 );
00845 
00846                 $rows2 = $db->arrayQuery( "SELECT source_url FROM ezurlalias WHERE id = $forwardToID" );
00847                 if ( count( $rows2 ) != 0 )
00848                 {
00849                     $redirectedSource = $rows2[0]['source_url'];
00850                     $redirectedSource = eZURLAliasML::sanitizeURL( $redirectedSource, true );
00851                 }
00852                 if ( $redirectedSource === false )
00853                 {
00854                     // Forwarded item does not exist, try to find the action in the ml table
00855                     $aliases = eZURLAliasML::fetchByAction( $actionType, $actionValue );
00856                     if ( $aliases )
00857                     {
00858                         $linkID = (int)$aliases[0]->attribute( 'id' );
00859                     }
00860                 }
00861                 if ( $redirectedSource === false and $linkID === false )
00862                 {
00863                     // Did not find in ml table either, try to find one with same destination in old table
00864                     $rows2 = $db->arrayQuery( "SELECT source_url FROM ezurlalias WHERE destination_url = '" . $db->escapeString( $row['destination_url'] ) . "' AND forward_to_id = 0" );
00865                     if ( count( $rows2 ) == 0 )
00866                     {
00867                         // Did not find forwarded item, mark as error
00868                         logError( "Could not find urlalias entry with ID $forwardToID which was referenced by '{$forwardFromURL}' with ID " . $row['id'] );
00869                         list( $column, $counter ) = displayProgress( 'F', $urlImportStartTime, $counter, $urlCount, $column );
00870                         continue;
00871                     }
00872                     $redirectedSource = $rows2[0]['source_url'];
00873                 }
00874                 if ( $linkID === false )
00875                 {
00876                     $elements = eZURLAliasML::fetchByPath( $redirectedSource );
00877                     if ( count( $elements ) != 0 )
00878                     {
00879                         $linkID = (int)$elements[0]->attribute( 'id' );
00880                     }
00881                 }
00882                 if ( $linkID === false )
00883                 {
00884                     // Redirected source does not exist, try to find the action in the ml table
00885                     $aliases = eZURLAliasML::fetchByAction( $actionType, $actionValue );
00886                     if ( $aliases )
00887                     {
00888                         $linkID = (int)$aliases[0]->attribute( 'id' );
00889                     }
00890                 }
00891                 if ( $linkID === false )
00892                 {
00893                     // Referenced url does not exist
00894                     logError( "The referenced path '$redirectedSource' can not be found among the new URL alias entries, old url entry is '{$forwardFromURL}' with ID " . $row['id'] );
00895                     list( $column, $counter ) = displayProgress( 'E', $urlImportStartTime, $counter, $urlCount, $column );
00896                     continue;
00897                 }
00898 
00899                 // Fetch the ID of the element to redirect to.
00900                 $source      = $row['source_url'];
00901                 $destination = $row['destination_url'];
00902                 list( $action, $alwaysAvailable ) = decodeAction( $destination );
00903                 $res = eZURLAliasML::storePath( $source, $action,
00904                                                 false, $linkID, $alwaysAvailable, false,
00905                                                 true, true );
00906                 if ( !$res || $res['status'] !== true )
00907                 {
00908                     logStoreError( $res, "eZURLAliasML::storePath", array( $source, $action, false, $linkID, $alwaysAvailable, false, true, true ) );
00909                     list( $column, $counter ) = displayProgress( 'E', $urlImportStartTime, $counter, $urlCount, $column );
00910                     continue;
00911                 }
00912                 logStore( $res, "eZURLAliasML::storePath", array( $source, $action, false, $linkID, $alwaysAvailable, false, true, true ) );
00913                 $result = '.';
00914                 verifyData( $result, $source, $row['id'] );
00915                 list( $column, $counter ) = displayProgress( $result, $urlImportStartTime, $counter, $urlCount, $column );
00916             }
00917             markAsImported( $rows );
00918         } while ( $count > 0 );
00919         flush();
00920         if ( $column > 0 )
00921             $cli->output();
00922         backupTables( 'impredir' );
00923     }
00924 
00925     if ( $importOldAliasWildcard )
00926     {
00927         // Then the wildcard changes
00928         $urlCount = fetchHistoricWildcardCount();
00929         $cli->output( "Importing {$urlCount} " . $cli->stylize( 'emphasize', "wildcards" ) );
00930         $column = $counter = $offset = 0;
00931         $urlImportStartTime = microtime( true );
00932         do
00933         {
00934             list( $rows, $offset ) = fetchHistoricWildcardChunk( 0, $fetchLimit );
00935             if ( !is_array( $rows ) )
00936             {
00937                 break;
00938             }
00939             $count = count( $rows );
00940             foreach ( $rows as $key => $row )
00941             {
00942                 $wildcardType        = (int)$row['is_wildcard']; // 1 is forward, 2 is direct (alias) for now they are both treated as forwarding/redirect
00943                 $sourceWildcard      = $row['source_url'];
00944                 $sourceWildcard = eZURLAliasML::sanitizeURL( $sourceWildcard, true );
00945                 $destinationWildcard = $row['destination_url'];
00946                 $destinationWildcard = eZURLAliasML::sanitizeURL( $destinationWildcard, true );
00947                 if ( $row['is_wildcard'] && $row['is_internal'] != 1 )
00948                 {
00949                     // If the wildcard is made by a user we import using the new wildcard system.
00950                     $row['type'] = (int)$row['is_wildcard'];
00951 
00952                     $wildcard = new eZURLWildcard( $row );
00953                     $wildcard->store();
00954                     list( $column, $counter ) = displayProgress( '.', $urlImportStartTime, $counter, $urlCount, $column );
00955                     continue;
00956                 }
00957 
00958                 while ( true )
00959                 {
00960                     // Validate the wildcards
00961                     if ( !preg_match( "#^(.*)\*$#", $sourceWildcard, $matches ) )
00962                     {
00963                         logError( "Invalid source wildcard '$sourceWildcard', item is skipped, URL entry ID is " . $row['id'] );
00964                         list( $column, $counter ) = displayProgress( 'E', $urlImportStartTime, $counter, $urlCount, $column );
00965                         continue 2;
00966                     }
00967                     $fromPath = $matches[1];
00968                     $fromPath = eZURLAliasML::sanitizeURL( $fromPath, true );
00969                     if ( !preg_match( "#^(.*)\{1\}$#", $destinationWildcard, $matches ) )
00970                     {
00971                         logError( "Invalid destination wildcard '$destinationWildcard', item is skipped, URL entry ID is " . $row['id'] );
00972                         list( $column, $counter ) = displayProgress( 'E', $urlImportStartTime, $counter, $urlCount, $column );
00973                         continue 2;
00974                     }
00975                     $toPath = $matches[1];
00976                     $toPath = eZURLAliasML::sanitizeURL( $toPath, true );
00977 
00978                     $newWildcard = $toPath . '/*';
00979                     $newWildcardSQL = $db->escapeString( $newWildcard );
00980                     $query = "SELECT * FROM ezurlalias WHERE source_url = '{$newWildcardSQL}' AND is_wildcard=1";
00981                     $rowsw = $db->arrayQuery( $query );
00982                     if ( count( $rowsw ) == 0 )
00983                     {
00984                         // The redirection has stopped, we can use the destination
00985                         break;
00986                     }
00987                     $newSourceWildcard = $rowsw[0]['destination_url'];
00988                     if ( !preg_match( "#^(.*)\{1\}$#", $newSourceWildcard, $matches ) )
00989                     {
00990                         logError( "Invalid destination wildcard '$destinationWildcard', item is skipped, URL entry ID is " . $rowsw[0]['id'] );
00991                         list( $column, $counter ) = displayProgress( 'E', $urlImportStartTime, $counter, $urlCount, $column );
00992                         continue 2;
00993                     }
00994                     $newSourceWildcard = $matches[1];
00995                     $sourceWildcard = $newSourceWildcard;
00996                 }
00997 
00998                 $toPathSQL = $db->escapeString( $toPath );
00999                 $query = "SELECT * FROM ezurlalias WHERE source_url = '{$toPathSQL}' AND is_wildcard = 0 AND forward_to_id = 0";
01000                 $rowsw = $db->arrayQuery( $query );
01001                 if ( count( $rowsw ) > 0 )
01002                 {
01003                     list( $action, $alwaysAvailable ) = decodeAction( $rowsw[0]['destination_url'] );
01004                     list( $actionType, $actionValue ) = explode( ":", $action, 2 );
01005                     $elements = eZURLAliasML::fetchByAction( $actionType, $actionValue );
01006                     if ( $elements )
01007                     {
01008                         $toPath = $elements[0]->getPath();
01009                     }
01010                 }
01011 
01012                 $elements = eZURLAliasML::fetchByPath( $toPath );
01013                 if ( count( $elements ) == 0 )
01014                 {
01015                     // Referenced url does not exist
01016                     logError( "The referenced path '$toPath' can not be found among the new URL alias entries, url entry ID is " . $row['id'] );
01017                     list( $column, $counter ) = displayProgress( 'E', $urlImportStartTime, $counter, $urlCount, $column );
01018                     continue;
01019                 }
01020                 // Fetch the ID of the element to redirect to.
01021                 $linkID = $elements[0]->attribute( 'id' );
01022                 $action = $elements[0]->attribute( 'action' );
01023                 if ( $action == 'nop:' )
01024                 {
01025                     // Cannot redirect to nops
01026                     logError( "The referenced path '$toPath' with ID " . $elements[0]->attribute( 'id' ) . " is a 'nop:' entry and cannot be used" );
01027                     list( $column, $counter ) = displayProgress( 'E', $urlImportStartTime, $counter, $urlCount, $column );
01028                     continue;
01029                 }
01030                 $alwaysAvailable = ($elements[0]->attribute( 'lang_mask' ) & 1);
01031                 $res = eZURLAliasML::storePath( $fromPath, $action,
01032                                                 false, $linkID, $alwaysAvailable );
01033                 if ( !$res || $res['status'] == 3 )
01034                 {
01035                     logError( "The wildcard url " . var_export( $fromPath, true ) . " cannot be created since the path already exists" );
01036                     list( $column, $counter ) = displayProgress( 's', $urlImportStartTime, $counter, $urlCount, $column );
01037                     continue;
01038                 }
01039                 if ( !$res || $res['status'] !== true )
01040                 {
01041                     logStoreError( $res, "eZURLAliasML::storePath", array( $fromPath, $action, false, $linkID, $alwaysAvailable ) );
01042                     list( $column, $counter ) = displayProgress( 'E', $urlImportStartTime, $counter, $urlCount, $column );
01043                     continue;
01044                 }
01045                 logStore( $res, "eZURLAliasML::storePath", array( $fromPath, $action, false, $linkID, $alwaysAvailable ) );
01046                 $result = '.';
01047                 verifyData( $result, $source, $row['id'] );
01048                 list( $column, $counter ) = displayProgress( $result, $urlImportStartTime, $counter, $urlCount, $column );
01049             }
01050             markAsImported( $rows );
01051         } while ( $count > 0 );
01052         flush();
01053         if ( $column > 0 )
01054             $cli->output();
01055         backupTables( 'impwcard' );
01056     }
01057 
01058 //    $cli->output( "Removing urlalias data which have been imported" );
01059 //    $db = eZDB::instance();
01060 //    $db->query( "DELETE FROM ezurlalias WHERE is_imported = 1" ); // Removing all aliases which have been imported
01061 
01062     $rows = $db->arrayQuery( "SELECT count(*) AS count FROM ezurlalias WHERE is_imported = 0" );
01063     $remaining = $rows[0]['count'];
01064     if ( $remaining > 0 )
01065     {
01066         $cli->output( "There are $remaining remaining URL aliases in the old ezurlalias table, manual cleanup is needed." );
01067     }
01068 
01069     if ( $importOldAliasWildcard )
01070     {
01071         $cli->output( "Removing old wildcard caches" );
01072         //include_once( 'kernel/classes/ezcache.php' );
01073         eZCache::clearByID( 'urlalias' );
01074     }
01075 
01076     $cli->output( "Import completed" );
01077 
01078     $cli->output( "Import time taken: " . $cli->stylize( 'emphasize', formatTime( microtime( true ) - $globalStartTime ) ) );
01079 }
01080 
01081 if ( $updateNodeAlias )
01082 {
01083     $nodeGlobalStartTime = microtime( true );
01084     // Start updating nodes
01085     $topLevelNodesArray = $db->arrayQuery( 'SELECT node_id FROM ezcontentobject_tree WHERE depth = 1 ORDER BY node_id' );
01086 
01087     foreach ( array_keys( $topLevelNodesArray ) as $key )
01088     {
01089         $topLevelNodeID = $topLevelNodesArray[$key]['node_id'];
01090         $rootNode = eZContentObjectTreeNode::fetch( $topLevelNodeID );
01091         if ( $rootNode->updateSubTreePath() )
01092             ++$totalChangedNodes;
01093         $done = false;
01094         $offset = 0;
01095         $counter = 0;
01096         $column = 0;
01097         $changedNodes = 0;
01098         $nodeCount = $rootNode->subTreeCount( array( 'Limitation' => array(),
01099                                                      'IgnoreVisibility' => true ) );
01100         $totalNodeCount += $nodeCount + 1;
01101         $cli->output( "Starting updates for " . $cli->stylize( 'mark', $rootNode->attribute( 'name' ) ) . ", $nodeCount nodes" );
01102         $nodeStartTime = microtime( true );
01103         while ( !$done )
01104         {
01105             $nodeList = $rootNode->subTree( array( 'Offset' => $offset,
01106                                                     'Limit' => $fetchLimit,
01107                                                     'IgnoreVisibility' => true,
01108                                                     'Limitation' => array() ) );
01109             foreach ( array_keys( $nodeList ) as $key )
01110             {
01111                 $node = $nodeList[ $key ];
01112                 $hasChanged = $node->updateSubTreePath();
01113                 if ( $hasChanged )
01114                 {
01115                     ++$changedNodes;
01116                     ++$totalChangedNodes;
01117                 }
01118                 $changeCharacters = array( '.', '+', '*' );
01119                 $changeCharacter = '.';
01120                 if ( isset( $changeCharacters[$hasChanged] ) )
01121                     $changeCharacter = $changeCharacters[$hasChanged];
01122                 verifyNodeData( $changeCharacter, $node );
01123                 list( $column, $counter ) = displayProgress( $changeCharacter, $nodeStartTime, $counter, $nodeCount, $column );
01124             }
01125             if ( count( $nodeList ) == 0 )
01126                 $done = true;
01127             unset( $nodeList );
01128             $offset += $fetchLimit;
01129             eZContentObject::clearCache();
01130         }
01131         flush();
01132         if ( $column > 0 )
01133             $cli->output();
01134         $cli->output( "Updated " . $cli->stylize( 'emphasize', "$changedNodes/$nodeCount" ) . " for " . $cli->stylize( 'mark', $rootNode->attribute( 'name' ) ) );
01135         $cli->output();
01136         backupTables( 'node_' . strtolower( $rootNode->attribute( 'name' ) ) );
01137     }
01138 
01139     $cli->output();
01140     $cli->output( "Total update " . $cli->stylize( 'emphasize', "$totalChangedNodes/$totalNodeCount" ) );
01141     $cli->output( "Node time taken: " . $cli->stylize( 'emphasize', formatTime( microtime( true ) - $nodeGlobalStartTime ) ) );
01142 }
01143 
01144 
01145 $cli->output( "Total time taken: " . $cli->stylize( 'emphasize', formatTime( microtime( true ) - $globalStartTime ) ) );
01146 
01147 $script->shutdown();
01148 
01149 ?>