eZ Publish  [trunk]
ezstaticcache.php
Go to the documentation of this file.
00001 <?php
00002 /**
00003  * File containing the eZStaticCache class
00004  *
00005  * @copyright Copyright (C) 1999-2012 eZ Systems AS. All rights reserved.
00006  * @license http://www.gnu.org/licenses/gpl-2.0.txt GNU General Public License v2
00007  * @version //autogentag//
00008  * @package kernel
00009  */
00010 
00011 /**
00012  * The eZStaticCache class manages the static cache system.
00013  *
00014  * This class can be used to generate static cache files usable
00015  * by the static cache system.
00016  *
00017  * Generating static cache is done by instantiating the class and then
00018  * calling generateCache(). For example:
00019  *
00020  * <code>
00021  * $staticCache = new eZStaticCache();
00022  * $staticCache->generateCache();
00023  * </code>
00024  *
00025  * To generate the URLs that must always be updated call generateAlwaysUpdatedCache()
00026  *
00027  * @package kernel
00028  */
00029 class eZStaticCache implements ezpStaticCache
00030 {
00031     /**
00032      * User-Agent string
00033      */
00034     const USER_AGENT = 'eZ Publish static cache generator';
00035 
00036     private static $actionList = array();
00037 
00038     /**
00039      * The name of the host to fetch HTML data from.
00040      *
00041      * @deprecated deprecated since version 4.4, site.ini.[SiteSettings].SiteURL is used instead
00042      * @var string
00043      */
00044     private $hostName;
00045 
00046     /**
00047      * The base path for the directory where static files are placed.
00048      *
00049      * @var string
00050      */
00051     private $staticStorage;
00052 
00053     /**
00054      * The maximum depth of URLs that will be cached.
00055      *
00056      * @var int
00057      */
00058     private $maxCacheDepth;
00059 
00060     /**
00061      * Array of URLs to cache.
00062      *
00063      * @var array(int=>string)
00064      */
00065     private $cachedURLArray = array();
00066 
00067     /**
00068      * An array with siteaccesses names that will be cached.
00069      *
00070      * @var array(int=>string)
00071      */
00072     private $cachedSiteAccesses = array();
00073 
00074     /**
00075      * An array with URLs that is to always be updated.
00076      *
00077      * @var array(int=>string)
00078      */
00079     private $alwaysUpdate;
00080 
00081     /**
00082      *  Initialises the static cache object with settings from staticcache.ini.
00083      */
00084     public function __construct()
00085     {
00086         $ini = eZINI::instance( 'staticcache.ini');
00087         $this->hostName = $ini->variable( 'CacheSettings', 'HostName' );
00088         $this->staticStorageDir = $ini->variable( 'CacheSettings', 'StaticStorageDir' );
00089         $this->maxCacheDepth = $ini->variable( 'CacheSettings', 'MaxCacheDepth' );
00090         $this->cachedURLArray = $ini->variable( 'CacheSettings', 'CachedURLArray' );
00091         $this->cachedSiteAccesses = $ini->variable( 'CacheSettings', 'CachedSiteAccesses' );
00092         $this->alwaysUpdate = $ini->variable( 'CacheSettings', 'AlwaysUpdateArray' );
00093     }
00094 
00095     /**
00096      * Getter method for {@link eZStaticCache::$hostName}
00097      *
00098      * @deprecated deprecated since version 4.4
00099      * @return string The currently configured host-name.
00100      */
00101     public function hostName()
00102     {
00103         return $this->hostName;
00104     }
00105 
00106     /**
00107      * Getter method for {@link eZStaticCache::$staticStorageDir}
00108      *
00109      * @return string The currently configured storage directory for the static cache.
00110      */
00111     public function storageDirectory()
00112     {
00113         return $this->staticStorageDir;
00114     }
00115 
00116     /**
00117      * Getter method for {@link eZStaticCache::$maxCacheDepth}
00118      *
00119      * @return int The maximum depth in the url which will be cached.
00120      */
00121     public function maxCacheDepth()
00122     {
00123         return $this->maxCacheDepth;
00124     }
00125 
00126     /**
00127      * Getter method for {@link eZStaticCache::$cachedSiteAccesses}
00128      *
00129      * @return array An array with site-access names that should be cached.
00130      */
00131     public function cachedSiteAccesses()
00132     {
00133         return $this->cachedSiteAccesses;
00134     }
00135 
00136     /**
00137      * Getter method for {@link eZStaticCache::$cachedURLArray}
00138      *
00139      * @return array An array with URLs that is to be cached statically, the URLs may contain wildcards.
00140      */
00141     public function cachedURLArray()
00142     {
00143         return $this->cachedURLArray;
00144     }
00145 
00146     /**
00147      * Getter method for {@link eZStaticCache::$alwaysUpdate}
00148      *
00149      * These URLs are configured with AlwaysUpdateArray in staticcache.ini.
00150      *
00151      * @see eZStaticCache::generateAlwaysUpdatedCache()
00152      * @return array An array with URLs that is to always be updated.
00153      */
00154     function alwaysUpdateURLArray()
00155     {
00156         return $this->alwaysUpdate;
00157     }
00158 
00159     /**
00160      * Generates the caches for all URLs that must always be generated.
00161      *
00162      * @param bool $quiet If true then the function will not output anything.
00163      * @param eZCLI|false $cli The eZCLI object or false if no output can be done.
00164      * @param bool $delay
00165      */
00166     public function generateAlwaysUpdatedCache( $quiet = false, $cli = false, $delay = true )
00167     {
00168         foreach ( $this->alwaysUpdate as $uri )
00169         {
00170             if ( !$quiet and $cli )
00171                 $cli->output( "caching: $uri ", false );
00172             $this->storeCache( $uri, $this->staticStorageDir, array(), false, $delay );
00173             if ( !$quiet and $cli )
00174                 $cli->output( "done" );
00175         }
00176     }
00177 
00178     /**
00179      * Generates caches for all the urls of nodes in $nodeList.
00180      *
00181      * The associative array must have on of these entries:
00182      * - node_id - ID of the node
00183      * - path_identification_string - The path_identification_string from the node table, is used to fetch the node ID if node_id is missing.
00184      *
00185      * @param array $nodeList An array with node entries, each entry is either the node ID or an associative array.
00186      */
00187     public function generateNodeListCache( $nodeList )
00188     {
00189         $db = eZDB::instance();
00190 
00191         foreach ( $nodeList as $uri )
00192         {
00193             if ( is_array( $uri ) )
00194             {
00195                 if ( !isset( $uri['node_id'] ) )
00196                 {
00197                     eZDebug::writeError( "node_id is not set for uri entry " . var_export( $uri ) . ", will need to perform extra query to get node_id" );
00198                     $node = eZContentObjectTreeNode::fetchByURLPath( $uri['path_identification_string'] );
00199                     $nodeID = (int)$node->attribute( 'node_id' );
00200                 }
00201                 else
00202                 {
00203                     $nodeID = (int)$uri['node_id'];
00204                 }
00205             }
00206             else
00207             {
00208                 $nodeID = (int)$uri;
00209             }
00210             $elements = eZURLAliasML::fetchByAction( 'eznode', $nodeID, true, true, true );
00211             foreach ( $elements as $element )
00212             {
00213                 $path = $element->getPath();
00214                 $this->cacheURL( '/' . $path );
00215             }
00216         }
00217     }
00218 
00219     /**
00220      * Generates the static cache from the configured INI settings.
00221      *
00222      * @param bool $force If true then it will create all static caches even if it is not outdated.
00223      * @param bool $quiet If true then the function will not output anything.
00224      * @param eZCLI|false $cli The eZCLI object or false if no output can be done.
00225      * @param bool $delay
00226      */
00227     public function generateCache( $force = false, $quiet = false, $cli = false, $delay = true )
00228     {
00229         $staticURLArray = $this->cachedURLArray();
00230         $db = eZDB::instance();
00231         $configSettingCount = count( $staticURLArray );
00232         $currentSetting = 0;
00233 
00234         // This contains parent elements which must checked to find new urls and put them in $generateList
00235         // Each entry contains:
00236         // - url - Url of parent
00237         // - glob - A glob string to filter direct children based on name
00238         // - org_url - The original url which was requested
00239         // - parent_id - The element ID of the parent (optional)
00240         // The parent_id will be used to quickly fetch the children, if not it will use the url
00241         $parentList = array();
00242         // A list of urls which must generated, each entry is a string with the url
00243         $generateList = array();
00244         foreach ( $staticURLArray as $url )
00245         {
00246             $currentSetting++;
00247             if ( strpos( $url, '*') === false )
00248             {
00249                 $generateList[] = $url;
00250             }
00251             else
00252             {
00253                 $queryURL = ltrim( str_replace( '*', '', $url ), '/' );
00254                 $dir = dirname( $queryURL );
00255                 if ( $dir == '.' )
00256                     $dir = '';
00257                 $glob = basename( $queryURL );
00258                 $parentList[] = array( 'url' => $dir,
00259                                        'glob' => $glob,
00260                                        'org_url' => $url );
00261             }
00262         }
00263 
00264         // As long as we have urls to generate or parents to check we loop
00265         while ( count( $generateList ) > 0 || count( $parentList ) > 0 )
00266         {
00267             // First generate single urls
00268             foreach ( $generateList as $generateURL )
00269             {
00270                 if ( !$quiet and $cli )
00271                     $cli->output( "caching: $generateURL ", false );
00272                 $this->cacheURL( $generateURL, false, !$force, $delay );
00273                 if ( !$quiet and $cli )
00274                     $cli->output( "done" );
00275             }
00276             $generateList = array();
00277 
00278             // Then check for more data
00279             $newParentList = array();
00280             foreach ( $parentList as $parentURL )
00281             {
00282                 if ( isset( $parentURL['parent_id'] ) )
00283                 {
00284                     $elements = eZURLAliasML::fetchByParentID( $parentURL['parent_id'], true, true, false );
00285                     foreach ( $elements as $element )
00286                     {
00287                         $path = '/' . $element->getPath();
00288                         $generateList[] = $path;
00289                         $newParentList[] = array( 'parent_id' => $element->attribute( 'id' ) );
00290                     }
00291                 }
00292                 else
00293                 {
00294                     if ( !$quiet and $cli and $parentURL['glob'] )
00295                         $cli->output( "wildcard cache: " . $parentURL['url'] . '/' . $parentURL['glob'] . "*" );
00296                     $elements = eZURLAliasML::fetchByPath( $parentURL['url'], $parentURL['glob'] );
00297                     foreach ( $elements as $element )
00298                     {
00299                         $path = '/' . $element->getPath();
00300                         $generateList[] = $path;
00301                         $newParentList[] = array( 'parent_id' => $element->attribute( 'id' ) );
00302                     }
00303                 }
00304             }
00305             $parentList = $newParentList;
00306         }
00307     }
00308 
00309     /**
00310      * Generates the caches for the url $url using the currently configured storageDirectory().
00311      *
00312      * @param string $url The URL to cache, e.g /news
00313      * @param int|false $nodeID The ID of the node to cache, if supplied it will also cache content/view/full/xxx.
00314      * @param bool $skipExisting If true it will not unlink existing cache files.
00315      * @return bool
00316      */
00317     public function cacheURL( $url, $nodeID = false, $skipExisting = false, $delay = true )
00318     {
00319         // Check if URL should be cached
00320         if ( substr_count( $url, "/") >= $this->maxCacheDepth )
00321             return false;
00322 
00323         $doCacheURL = false;
00324         foreach ( $this->cachedURLArray as $cacheURL )
00325         {
00326             if ( $url == $cacheURL )
00327             {
00328                 $doCacheURL = true;
00329                 break;
00330             }
00331             else if ( strpos( $cacheURL, '*') !== false )
00332             {
00333                 if ( strpos( $url, str_replace( '*', '', $cacheURL ) ) === 0 )
00334                 {
00335                     $doCacheURL = true;
00336                     break;
00337                 }
00338             }
00339         }
00340 
00341         if ( $doCacheURL == false )
00342         {
00343             return false;
00344         }
00345 
00346         $this->storeCache( $url, $this->staticStorageDir, $nodeID ? array( "/content/view/full/$nodeID" ) : array(), $skipExisting, $delay );
00347 
00348         return true;
00349     }
00350 
00351     /**
00352      * Stores the static cache for $url and hostname defined in site.ini.[SiteSettings].SiteURL for cached siteaccess
00353      * by fetching the web page using {@link eZHTTPTool::getDataByURL()} and storing the fetched HTML data.
00354      *
00355      * @param string $url The URL to cache, e.g /news
00356      * @param string $staticStorageDir The base directory for storing cache files.
00357      * @param array $alternativeStaticLocations
00358      * @param bool $skipUnlink If true it will not unlink existing cache files.
00359      * @param bool $delay
00360      */
00361     private function storeCache( $url, $staticStorageDir, $alternativeStaticLocations = array(), $skipUnlink = false, $delay = true )
00362     {
00363         $dirs = array();
00364 
00365         foreach ( $this->cachedSiteAccesses as $cachedSiteAccess )
00366         {
00367             $dirs[] = $this->buildCacheDirPath( $cachedSiteAccess );
00368         }
00369 
00370         foreach ( $dirs as $dirParts )
00371         {
00372             foreach ( $dirParts as $dirPart )
00373             {
00374                 $dir = $dirPart['dir'];
00375                 $siteURL = $dirPart['site_url'];
00376 
00377                 $cacheFiles = array();
00378 
00379                 $cacheFiles[] = $this->buildCacheFilename( $staticStorageDir, $dir . $url );
00380                 foreach ( $alternativeStaticLocations as $location )
00381                 {
00382                     $cacheFiles[] = $this->buildCacheFilename( $staticStorageDir, $dir . $location );
00383                 }
00384 
00385                 // Store new content
00386                 $content = false;
00387                 foreach ( $cacheFiles as $file )
00388                 {
00389                     if ( !$skipUnlink || !file_exists( $file ) )
00390                     {
00391                         // Deprecated since 4.4, will be removed in future version
00392                         $fileName = "http://{$this->hostName}{$dir}{$url}";
00393 
00394                         // staticcache.ini.[CacheSettings].HostName has been deprecated since version 4.4
00395                         // hostname is read from site.ini.[SiteSettings].SiteURL per siteaccess
00396                         // defined in staticcache.ini.[CacheSettings].CachedSiteAccesses
00397                         if ( !$this->hostName )
00398                         {
00399                             $fileName = "http://{$siteURL}{$url}";
00400                         }
00401 
00402                         if ( $delay )
00403                         {
00404                             $this->addAction( 'store', array( $file, $fileName ) );
00405                         }
00406                         else
00407                         {
00408                             // Generate content, if required
00409                             if ( $content === false )
00410                             {
00411                                 if ( eZHTTPTool::getDataByURL( $fileName, true, eZStaticCache::USER_AGENT ) )
00412                                     $content = eZHTTPTool::getDataByURL( $fileName, false, eZStaticCache::USER_AGENT );
00413                             }
00414                             if ( $content === false )
00415                             {
00416                                 eZDebug::writeError( "Could not grab content (from $fileName), is the hostname correct and Apache running?", 'Static Cache' );
00417                             }
00418                             else
00419                             {
00420                                 eZStaticCache::storeCachedFile( $file, $content );
00421                             }
00422                         }
00423                     }
00424                 }
00425             }
00426         }
00427     }
00428 
00429     /**
00430      * Generates a full path to the cache file (index.html) based on the input parameters.
00431      *
00432      * @param string $staticStorageDir The storage for cache files.
00433      * @param string $url The URL for the current item, e.g /news
00434      * @return string The full path to the cache file (index.html).
00435      */
00436     private function buildCacheFilename( $staticStorageDir, $url )
00437     {
00438         $file = "{$staticStorageDir}{$url}/index.html";
00439         $file = preg_replace( '#//+#', '/', $file );
00440         return $file;
00441     }
00442 
00443     /**
00444      * Generates a cache directory parts including path, siteaccess name, site URL
00445      * depending on the match order type.
00446      *
00447      * @param string $siteAccess
00448      * @return array
00449      */
00450     private function buildCacheDirPath( $siteAccess )
00451     {
00452         $dirParts = array();
00453 
00454         $ini = eZINI::instance();
00455 
00456         $matchOderArray = $ini->variableArray( 'SiteAccessSettings', 'MatchOrder' );
00457 
00458         foreach ( $matchOderArray as $matchOrderItem )
00459         {
00460             switch ( $matchOrderItem )
00461             {
00462                 case 'host_uri':
00463                     foreach ( $ini->variable( 'SiteAccessSettings', 'HostUriMatchMapItems' ) as $hostUriMatchMapItem )
00464                     {
00465                         $parts = explode( ';', $hostUriMatchMapItem );
00466 
00467                         if ( $parts[2] === $siteAccess  )
00468                         {
00469                             $dirParts[] = $this->buildCacheDirPart( ( $parts[0] ? '/' . $parts[0] : '' ) .
00470                                                                     ( $parts[1] ? '/' . $parts[1] : '' ), $siteAccess );
00471                         }
00472                     }
00473                     break;
00474                 case 'host':
00475                     foreach ( $ini->variable( 'SiteAccessSettings', 'HostMatchMapItems' ) as $hostMatchMapItem )
00476                     {
00477                         $parts = explode( ';', $hostMatchMapItem );
00478 
00479                         if ( $parts[1] === $siteAccess  )
00480                         {
00481                             $dirParts[] = $this->buildCacheDirPart( ( $parts[0] ? '/' . $parts[0] : '' ), $siteAccess );
00482                         }
00483                     }
00484                     break;
00485                 default:
00486                     $dirParts[] = $this->buildCacheDirPart( '/' . $siteAccess, $siteAccess );
00487                     break;
00488             }
00489         }
00490 
00491         return $dirParts;
00492     }
00493 
00494     /**
00495      * A helper method used to create directory parts array
00496      *
00497      * @param string $dir
00498      * @param string $siteAccess
00499      * @return array
00500      */
00501     private function buildCacheDirPart( $dir, $siteAccess )
00502     {
00503         return array( 'dir' => $dir,
00504                       'access_name' => $siteAccess,
00505                       'site_url' => eZSiteAccess::getIni( $siteAccess, 'site.ini' )->variable( 'SiteSettings', 'SiteURL' ) );
00506     }
00507 
00508     /**
00509      * Stores the cache file $file with contents $content.
00510      * Takes care of setting proper permissions on the new file.
00511      *
00512      * @param string $file
00513      * @param string $content
00514      */
00515     static function storeCachedFile( $file, $content )
00516     {
00517         $dir = dirname( $file );
00518         if ( !is_dir( $dir ) )
00519         {
00520             eZDir::mkdir( $dir, false, true );
00521         }
00522 
00523         $oldumask = umask( 0 );
00524 
00525         $tmpFileName = $file . '.' . md5( $file. uniqid( "ezp". getmypid(), true ) );
00526 
00527         // Remove files, this might be necessary for Windows
00528         @unlink( $tmpFileName );
00529 
00530         // Write the new cache file with the data attached
00531         $fp = fopen( $tmpFileName, 'w' );
00532         if ( $fp )
00533         {
00534             $comment = ( eZINI::instance( 'staticcache.ini' )->variable( 'CacheSettings', 'AppendGeneratedTime' ) === 'true' ) ? "<!-- Generated: " . date( 'Y-m-d H:i:s' ). " -->\n\n" : null;
00535 
00536             fwrite( $fp, $content . $comment );
00537             fclose( $fp );
00538             eZFile::rename( $tmpFileName, $file, false, eZFile::CLEAN_ON_FAILURE | eZFile::APPEND_DEBUG_ON_FAILURE );
00539 
00540             $perm = eZINI::instance()->variable( 'FileSettings', 'StorageFilePermissions' );
00541             chmod( $file, octdec( $perm ) );
00542         }
00543 
00544         umask( $oldumask );
00545     }
00546 
00547     /**
00548      * Removes the static cache file (index.html) and its directory if it exists.
00549      * The directory path is based upon the URL $url and the configured static storage dir.
00550      *
00551      * @param string $url The URL for the current item, e.g /news
00552      */
00553     function removeURL( $url )
00554     {
00555         $dir = eZDir::path( array( $this->staticStorageDir, $url ) );
00556 
00557         @unlink( $dir . "/index.html" );
00558         @rmdir( $dir );
00559     }
00560 
00561     /**
00562      * This function adds an action to the list that is used at the end of the
00563      * request to remove and regenerate static cache files.
00564      *
00565      * @param string $action
00566      * @param array $parameters
00567      */
00568     private function addAction( $action, $parameters )
00569     {
00570         self::$actionList[] = array( $action, $parameters );
00571     }
00572 
00573     /**
00574      * This function goes over the list of recorded actions and excecutes them.
00575      */
00576     static function executeActions()
00577     {
00578         if ( empty( self::$actionList ) )
00579         {
00580             return;
00581         }
00582 
00583         $fileContentCache = array();
00584         $doneDestList = array();
00585 
00586         $ini = eZINI::instance( 'staticcache.ini');
00587         $clearByCronjob = ( $ini->variable( 'CacheSettings', 'CronjobCacheClear' ) == 'enabled' );
00588 
00589         if ( $clearByCronjob )
00590         {
00591             $db = eZDB::instance();
00592         }
00593 
00594         foreach ( self::$actionList as $action )
00595         {
00596             list( $action, $parameters ) = $action;
00597 
00598             switch( $action ) {
00599                 case 'store':
00600                     list( $destination, $source ) = $parameters;
00601 
00602                     if ( isset( $doneDestList[$destination] ) )
00603                         continue 2;
00604 
00605                     if ( $clearByCronjob )
00606                     {
00607                         $param = $db->escapeString( $destination . ',' . $source );
00608                         $db->query( 'INSERT INTO ezpending_actions( action, param ) VALUES ( \'static_store\', \''. $param . '\' )' );
00609                         $doneDestList[$destination] = 1;
00610                     }
00611                     else
00612                     {
00613                         if ( !isset( $fileContentCache[$source] ) )
00614                         {
00615                             if ( eZHTTPTool::getDataByURL( $source, true, eZStaticCache::USER_AGENT ) )
00616                                 $fileContentCache[$source] = eZHTTPTool::getDataByURL( $source, false, eZStaticCache::USER_AGENT );
00617                             else
00618                                 $fileContentCache[$source] = false;
00619                         }
00620                         if ( $fileContentCache[$source] === false )
00621                         {
00622                             eZDebug::writeError( "Could not grab content (from $source), is the hostname correct and Apache running?", 'Static Cache' );
00623                         }
00624                         else
00625                         {
00626                             eZStaticCache::storeCachedFile( $destination, $fileContentCache[$source] );
00627                             $doneDestList[$destination] = 1;
00628                         }
00629                     }
00630                     break;
00631             }
00632         }
00633         self::$actionList = array();
00634     }
00635 }
00636 
00637 ?>