eZ Publish  [4.0]
ezstaticcache.php
Go to the documentation of this file.
00001 <?php
00002 //
00003 // Definition of eZStaticClass class
00004 //
00005 // Created on: <12-Jan-2005 10:29:21 dr>
00006 //
00007 // ## BEGIN COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00008 // SOFTWARE NAME: eZ Publish
00009 // SOFTWARE RELEASE: 4.0.x
00010 // COPYRIGHT NOTICE: Copyright (C) 1999-2008 eZ Systems AS
00011 // SOFTWARE LICENSE: GNU General Public License v2.0
00012 // NOTICE: >
00013 //   This program is free software; you can redistribute it and/or
00014 //   modify it under the terms of version 2.0  of the GNU General
00015 //   Public License as published by the Free Software Foundation.
00016 //
00017 //   This program is distributed in the hope that it will be useful,
00018 //   but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 //   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020 //   GNU General Public License for more details.
00021 //
00022 //   You should have received a copy of version 2.0 of the GNU General
00023 //   Public License along with this program; if not, write to the Free
00024 //   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00025 //   MA 02110-1301, USA.
00026 //
00027 //
00028 // ## END COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00029 //
00030 
00031 /*! \file ezstaticcache.php
00032 */
00033 
00034 /*!
00035   \class eZStaticCache ezstaticcache.php
00036   \brief Manages the static cache system.
00037 
00038   This class can be used to generate static cache files usable
00039   by the static cache system.
00040 
00041   Generating static cache is done by instatiating the class and then
00042   calling generateCache(). For example:
00043   \code
00044   $staticCache = new eZStaticCache();
00045   $staticCache->generateCache();
00046   \endcode
00047 
00048   To generate the URLs that must always be updated call generateAlwaysUpdatedCache()
00049 
00050 */
00051 
00052 //include_once( 'lib/ezutils/classes/ezini.php' );
00053 //include_once( 'kernel/classes/ezurlaliasml.php' );
00054 
00055 class eZStaticCache
00056 {
00057     const USER_AGENT = 'eZ Publish static cache generator';
00058 
00059     /*!
00060      Initialises the static cache object with settings from staticcache.ini.
00061     */
00062     function eZStaticCache()
00063     {
00064         $ini = eZINI::instance( 'staticcache.ini');
00065         $this->HostName = $ini->variable( 'CacheSettings', 'HostName' );
00066         $this->StaticStorageDir = $ini->variable( 'CacheSettings', 'StaticStorageDir' );
00067         $this->MaxCacheDepth = $ini->variable( 'CacheSettings', 'MaxCacheDepth' );
00068         $this->CachedURLArray = $ini->variable( 'CacheSettings', 'CachedURLArray' );
00069         $this->CachedSiteAccesses = $ini->variable( 'CacheSettings', 'CachedSiteAccesses' );
00070         $this->AlwaysUpdate = $ini->variable( 'CacheSettings', 'AlwaysUpdateArray' );
00071     }
00072 
00073     /*!
00074      \return The currently configured host-name.
00075     */
00076     function hostName()
00077     {
00078         return $this->HostName;
00079     }
00080 
00081     /*!
00082      \return The currently configured storage directory for the static cache.
00083     */
00084     function storageDirectory()
00085     {
00086         return $this->StaticStorageDir;
00087     }
00088 
00089     /*!
00090      \return The maximum depth in the url which will be cached.
00091     */
00092     function maxCacheDepth()
00093     {
00094         return $this->MaxCacheDepth;
00095     }
00096 
00097     /*!
00098      \return An array with site-access names that should be cached.
00099     */
00100     function cachedSiteAccesses()
00101     {
00102         return $this->CachedSiteAccesses;
00103     }
00104 
00105     /*!
00106      \return An array with URLs that is to be cached statically, the URLs may contain wildcards.
00107     */
00108     function cachedURLArray()
00109     {
00110         return $this->CachedURLArray;
00111     }
00112 
00113     /*!
00114      \return An array with URLs that is to always be updated.
00115      \note These URLs are configured with \c AlwaysUpdateArray in \c staticcache.ini.
00116      \sa generateAlwaysUpdatedCache()
00117     */
00118     function alwaysUpdateURLArray()
00119     {
00120         return $this->AlwaysUpdate;
00121     }
00122 
00123     /*!
00124      Generates the caches for all URLs that must always be generated.
00125 
00126      \sa alwaysUpdateURLArray().
00127     */
00128     function generateAlwaysUpdatedCache( $quiet = false, $cli = false, $delay = true )
00129     {
00130         $hostname = $this->HostName;
00131         $staticStorageDir = $this->StaticStorageDir;
00132 
00133         foreach ( $this->AlwaysUpdate as $uri )
00134         {
00135             if ( !$quiet and $cli )
00136                 $cli->output( "caching: $uri ", false );
00137             $this->storeCache( $uri, $hostname, $staticStorageDir, array(), false, $delay );
00138             if ( !$quiet and $cli )
00139                 $cli->output( "done" );
00140         }
00141     }
00142 
00143     /*!
00144      Generates caches for all the urls of nodes in $nodeList.
00145      $nodeList is an array with node entries, each entry is either the node ID or an associative array.
00146      The associative array must have on of these entries:
00147      - node_id - ID of the node
00148      - path_identification_string - The path_identification_string from the node table, is used to fetch the node ID  if node_id is missing.
00149      */
00150     function generateNodeListCache( $nodeList )
00151     {
00152         $db = eZDB::instance();
00153 
00154         foreach ( $nodeList as $uri )
00155         {
00156             if ( is_array( $uri ) )
00157             {
00158                 if ( !isset( $uri['node_id'] ) )
00159                 {
00160                     eZDebug::writeError( "node_id is not set for uri entry " . var_export( $uri ) . ", will need to perform extra query to get node_id" );
00161                     $node = eZContentObjectTreeNode::fetchByURLPath( $uri['path_identification_string'] );
00162                     $nodeID = (int)$node->attribute( 'node_id' );
00163                 }
00164                 else
00165                 {
00166                     $nodeID = (int)$uri['node_id'];
00167                 }
00168             }
00169             else
00170             {
00171                 $nodeID = (int)$uri;
00172             }
00173             $elements = eZURLAliasML::fetchByAction( 'eznode', $nodeID, true, true, true );
00174             foreach ( $elements as $element )
00175             {
00176                 $path = $element->getPath();
00177                 $this->cacheURL( '/' . $path );
00178             }
00179         }
00180     }
00181 
00182     /*!
00183      Generates the static cache from the configured INI settings.
00184 
00185      \param $force If \c true then it will create all static caches even if it is not outdated.
00186      \param $quiet If \c true then the function will not output anything.
00187      \param $cli The eZCLI object or \c false if no output can be done.
00188     */
00189     function generateCache( $force = false, $quiet = false, $cli = false, $delay = true )
00190     {
00191         $staticURLArray = $this->cachedURLArray();
00192         $db = eZDB::instance();
00193         $configSettingCount = count( $staticURLArray );
00194         $currentSetting = 0;
00195 
00196         // This contains parent elements which must checked to find new urls and put them in $generateList
00197         // Each entry contains:
00198         // - url - Url of parent
00199         // - glob - A glob string to filter direct children based on name
00200         // - org_url - The original url which was requested
00201         // - parent_id - The element ID of the parent (optional)
00202         // The parent_id will be used to quickly fetch the children, if not it will use the url
00203         $parentList = array();
00204         // A list of urls which must generated, each entry is a string with the url
00205         $generateList = array();
00206         foreach ( $staticURLArray as $url )
00207         {
00208             $currentSetting++;
00209             if ( strpos( $url, '*') === false )
00210             {
00211                 $generateList[] = $url;
00212             }
00213             else
00214             {
00215                 $queryURL = ltrim( str_replace( '*', '', $url ), '/' );
00216                 $dir = dirname( $queryURL );
00217                 if ( $dir == '.' )
00218                     $dir = '';
00219                 $glob = basename( $queryURL );
00220                 $parentList[] = array( 'url' => $dir,
00221                                        'glob' => $glob,
00222                                        'org_url' => $url );
00223             }
00224         }
00225 
00226         // As long as we have urls to generate or parents to check we loop
00227         while ( count( $generateList ) > 0 || count( $parentList ) > 0 )
00228         {
00229             // First generate single urls
00230             foreach ( $generateList as $generateURL )
00231             {
00232                 if ( !$quiet and $cli )
00233                     $cli->output( "caching: $generateURL ", false );
00234                 $this->cacheURL( $generateURL, false, !$force, $delay );
00235                 if ( !$quiet and $cli )
00236                     $cli->output( "done" );
00237             }
00238             $generateList = array();
00239 
00240             // Then check for more data
00241             $newParentList = array();
00242             foreach ( $parentList as $parentURL )
00243             {
00244                 if ( isset( $parentURL['parent_id'] ) )
00245                 {
00246                     $elements = eZURLAliasML::fetchByParentID( $parentURL['parent_id'], true, true, false );
00247                     foreach ( $elements as $element )
00248                     {
00249                         $path = '/' . $element->getPath();
00250                         $generateList[] = $path;
00251                         $newParentList[] = array( 'parent_id' => $element->attribute( 'id' ) );
00252                     }
00253                 }
00254                 else
00255                 {
00256                     if ( !$quiet and $cli and $parentURL['glob'] )
00257                         $cli->output( "wildcard cache: " . $parentURL['url'] . '/' . $parentURL['glob'] . "*" );
00258                     $elements = eZURLAliasML::fetchByPath( $parentURL['url'], $parentURL['glob'], true, true );
00259                     foreach ( $elements as $element )
00260                     {
00261                         $path = '/' . $element->getPath();
00262                         $generateList[] = $path;
00263                         $newParentList[] = array( 'parent_id' => $element->attribute( 'id' ) );
00264                     }
00265                 }
00266             }
00267             $parentList = $newParentList;
00268         }
00269     }
00270 
00271     /*!
00272      \private
00273      Generates the caches for the url \a $url using the currently configured hostName() and storageDirectory().
00274 
00275      \param $url The URL to cache, e.g \c /news
00276      \param $nodeID The ID of the node to cache, if supplied it will also cache content/view/full/xxx.
00277      \param $skipExisting If \c true it will not unlink existing cache files.
00278     */
00279     function cacheURL( $url, $nodeID = false, $skipExisting = false, $delay = true )
00280     {
00281         // Set default hostname
00282         $hostname = $this->HostName;
00283         $staticStorageDir = $this->StaticStorageDir;
00284 
00285         // Check if URL should be cached
00286         if ( substr_count( $url, "/") >= $this->MaxCacheDepth )
00287             return false;
00288 
00289         $doCacheURL = false;
00290         foreach ( $this->CachedURLArray as $cacheURL )
00291         {
00292             if ( $url == $cacheURL )
00293             {
00294                 $doCacheURL = true;
00295             }
00296             else if ( strpos( $cacheURL, '*') !== false )
00297             {
00298                 if ( strpos( $url, str_replace( '*', '', $cacheURL ) ) === 0 )
00299                 {
00300                     $doCacheURL = true;
00301                 }
00302             }
00303         }
00304 
00305         if ( $doCacheURL == false )
00306         {
00307             return false;
00308         }
00309 
00310         $this->storeCache( $url, $hostname, $staticStorageDir, $nodeID ? array( "/content/view/full/$nodeID" ) : array(), $skipExisting, $delay );
00311 
00312         return true;
00313     }
00314 
00315     /*!
00316      \private
00317      Stores the static cache for \a $url and \a $hostname by fetching the web page using
00318      fopen() and storing the fetched HTML data.
00319 
00320      \param $url The URL to cache, e.g \c /news
00321      \param $hostname The name of the host which serves web pages dynamically, see hostName().
00322      \param $staticStorageDir The base directory for storing cache files, see storageDirectory().
00323      \param $alternativeStaticLocations An array with additional URLs that should also be cached.
00324      \param $skipUnlink If \c true it will not unlink existing cache files.
00325     */
00326     function storeCache( $url, $hostname, $staticStorageDir, $alternativeStaticLocations = array(), $skipUnlink = false, $delay = true )
00327     {
00328         if ( is_array( $this->CachedSiteAccesses ) and count ( $this->CachedSiteAccesses ) )
00329         {
00330             $dirs = array();
00331             foreach ( $this->CachedSiteAccesses as $dir )
00332             {
00333                 $dirs[] = '/' . $dir ;
00334             }
00335         }
00336         else
00337         {
00338             $dirs = array ('');
00339         }
00340         $http = eZHTTPTool::instance();
00341 
00342         foreach ( $dirs as $dir )
00343         {
00344             $cacheFiles = array();
00345             if ( !is_dir( $staticStorageDir . $dir ) )
00346             {
00347                 eZDir::mkdir( $staticStorageDir . $dir, false, true );
00348             }
00349 
00350             $cacheFiles[] = $this->buildCacheFilename( $staticStorageDir, $dir . $url );
00351             foreach ( $alternativeStaticLocations as $location )
00352             {
00353                 $cacheFiles[] = $this->buildCacheFilename( $staticStorageDir, $dir . $location );
00354             }
00355 
00356             /* Store new content */
00357             $content = false;
00358             foreach ( $cacheFiles as $file )
00359             {
00360                 if ( !$skipUnlink || !file_exists( $file ) )
00361                 {
00362                     $fileName = "http://$hostname$dir$url";
00363                     if ( $delay )
00364                     {
00365                         $this->addAction( 'store', array( $file, $fileName ) );
00366                     }
00367                     else
00368                     {
00369                         /* Generate content, if required */
00370                         if ( $content === false )
00371                         {
00372                             $content = $http->getDataByURL( $fileName, false, eZStaticCache::USER_AGENT );
00373                         }
00374                         if ( $content === false )
00375                         {
00376                             eZDebug::writeNotice( "Could not grab content (from $fileName), is the hostname correct and Apache running?",
00377                                                   'Static Cache' );
00378                         }
00379                         else
00380                         {
00381                             eZStaticCache::storeCachedFile( $file, $content );
00382                         }
00383                     }
00384                 }
00385             }
00386         }
00387     }
00388 
00389     /*!
00390      \private
00391      \param $staticStorageDir The storage for cache files.
00392      \param $url The URL for the current item, e.g \c /news
00393      \return The full path to the cache file (index.html) based on the input parameters.
00394     */
00395     function buildCacheFilename( $staticStorageDir, $url )
00396     {
00397         $file = "{$staticStorageDir}{$url}/index.html";
00398         $file = preg_replace( '#//+#', '/', $file );
00399         return $file;
00400     }
00401 
00402     /*!
00403      \private
00404      \static
00405      Stores the cache file \a $file with contents \a $content.
00406      Takes care of setting proper permissions on the new file.
00407     */
00408     static function storeCachedFile( $file, $content )
00409     {
00410         $dir = dirname( $file );
00411         if ( !is_dir( $dir ) )
00412         {
00413             eZDir::mkdir( $dir, false, true );
00414         }
00415 
00416         $oldumask = umask( 0 );
00417 
00418         $tmpFileName = $file . '.' . md5( $file. uniqid( "ezp". getmypid(), true ) );
00419 
00420         /* Remove files, this might be necessary for Windows */
00421         @unlink( $tmpFileName );
00422 
00423         /* Write the new cache file with the data attached */
00424         $fp = fopen( $tmpFileName, 'w' );
00425         if ( $fp )
00426         {
00427             fwrite( $fp, $content . '<!-- Generated: '. date( 'Y-m-d H:i:s' ). " -->\n\n" );
00428             fclose( $fp );
00429             eZFile::rename( $tmpFileName, $file );
00430 
00431             $perm = eZINI::instance()->variable( 'FileSettings', 'StorageFilePermissions' );
00432             chmod( $file, octdec( $perm ) );
00433         }
00434 
00435         umask( $oldumask );
00436     }
00437 
00438     /*!
00439      Removes the static cache file (index.html) and its directory if it exists.
00440      The directory path is based upon the URL \a $url and the configured static storage dir.
00441      \param $url The URL for the curren item, e.g \c /news
00442     */
00443     function removeURL( $url )
00444     {
00445         $dir = eZDir::path( array( $this->StaticStorageDir, $url ) );
00446 
00447         @unlink( $dir . "/index.html" );
00448         @rmdir( $dir );
00449     }
00450 
00451     /*!
00452      \private
00453      This function adds an action to the list that is used at the end of the
00454      request to remove and regenerate static cache files.
00455     */
00456     function addAction( $action, $parameters )
00457     {
00458         if (! isset( $GLOBALS['eZStaticCache-ActionList'] ) ) {
00459             $GLOBALS['eZStaticCache-ActionList'] = array();
00460         }
00461         $GLOBALS['eZStaticCache-ActionList'][] = array( $action, $parameters );
00462     }
00463 
00464     /*!
00465      \static
00466      This function goes over the list of recorded actions and excecutes them.
00467     */
00468     static function executeActions()
00469     {
00470         if (! isset( $GLOBALS['eZStaticCache-ActionList'] ) ) {
00471             return;
00472         }
00473 
00474         $fileContentCache = array();
00475         $doneDestList = array();
00476 
00477         $ini = eZINI::instance( 'staticcache.ini');
00478         $clearByCronjob = ( $ini->variable( 'CacheSettings', 'CronjobCacheClear' ) == 'enabled' );
00479 
00480         if ( $clearByCronjob )
00481         {
00482             //include_once( "lib/ezdb/classes/ezdb.php" );
00483             $db = eZDB::instance();
00484         }
00485 
00486         $http = eZHTTPTool::instance();
00487 
00488         foreach ( $GLOBALS['eZStaticCache-ActionList'] as $action )
00489         {
00490             list( $action, $parameters ) = $action;
00491 
00492             switch( $action ) {
00493                 case 'store':
00494                     list( $destination, $source ) = $parameters;
00495 
00496                     if ( isset( $doneDestList[$destination] ) )
00497                         continue 2;
00498 
00499                     if ( $clearByCronjob )
00500                     {
00501                         $param = $db->escapeString( $destination . ',' . $source );
00502                         $db->query( 'INSERT INTO ezpending_actions( action, param ) VALUES ( \'static_store\', \''. $param . '\' )' );
00503                         $doneDestList[$destination] = 1;
00504                     }
00505                     else
00506                     {
00507                         if ( !isset( $fileContentCache[$source] ) )
00508                         {
00509                             $fileContentCache[$source] = $http->getDataByURL( $source, false, eZStaticCache::USER_AGENT );
00510                         }
00511                         if ( $fileContentCache[$source] === false )
00512                         {
00513                             eZDebug::writeNotice( 'Could not grab content, is the hostname correct and Apache running?', 'Static Cache' );
00514                         }
00515                         else
00516                         {
00517                             eZStaticCache::storeCachedFile( $destination, $fileContentCache[$source] );
00518                             $doneDestList[$destination] = 1;
00519                         }
00520                     }
00521                     break;
00522             }
00523         }
00524         $GLOBALS['eZStaticCache-ActionList'] = array();
00525     }
00526 
00527     /// \privatesection
00528     /// The name of the host to fetch HTML data from.
00529     public $HostName;
00530     /// The base path for the directory where static files are placed.
00531     public $StaticStorage;
00532     /// The maximum depth of URLs that will be cached.
00533     public $MaxCacheDepth;
00534     /// Array of URLs to cache.
00535     public $CachedURLArray;
00536     /// An array with URLs that is to always be updated.
00537     public $AlwaysUpdate;
00538 }
00539 
00540 ?>