|
eZ Publish
[trunk]
|
00001 <?php 00002 /** 00003 * File containing the eZStaticCache class 00004 * 00005 * @copyright Copyright (C) 1999-2012 eZ Systems AS. All rights reserved. 00006 * @license http://www.gnu.org/licenses/gpl-2.0.txt GNU General Public License v2 00007 * @version //autogentag// 00008 * @package kernel 00009 */ 00010 00011 /** 00012 * The eZStaticCache class manages the static cache system. 00013 * 00014 * This class can be used to generate static cache files usable 00015 * by the static cache system. 00016 * 00017 * Generating static cache is done by instantiating the class and then 00018 * calling generateCache(). For example: 00019 * 00020 * <code> 00021 * $staticCache = new eZStaticCache(); 00022 * $staticCache->generateCache(); 00023 * </code> 00024 * 00025 * To generate the URLs that must always be updated call generateAlwaysUpdatedCache() 00026 * 00027 * @package kernel 00028 */ 00029 class eZStaticCache implements ezpStaticCache 00030 { 00031 /** 00032 * User-Agent string 00033 */ 00034 const USER_AGENT = 'eZ Publish static cache generator'; 00035 00036 private static $actionList = array(); 00037 00038 /** 00039 * The name of the host to fetch HTML data from. 00040 * 00041 * @deprecated deprecated since version 4.4, site.ini.[SiteSettings].SiteURL is used instead 00042 * @var string 00043 */ 00044 private $hostName; 00045 00046 /** 00047 * The base path for the directory where static files are placed. 00048 * 00049 * @var string 00050 */ 00051 private $staticStorage; 00052 00053 /** 00054 * The maximum depth of URLs that will be cached. 00055 * 00056 * @var int 00057 */ 00058 private $maxCacheDepth; 00059 00060 /** 00061 * Array of URLs to cache. 00062 * 00063 * @var array(int=>string) 00064 */ 00065 private $cachedURLArray = array(); 00066 00067 /** 00068 * An array with siteaccesses names that will be cached. 00069 * 00070 * @var array(int=>string) 00071 */ 00072 private $cachedSiteAccesses = array(); 00073 00074 /** 00075 * An array with URLs that is to always be updated. 00076 * 00077 * @var array(int=>string) 00078 */ 00079 private $alwaysUpdate; 00080 00081 /** 00082 * Initialises the static cache object with settings from staticcache.ini. 00083 */ 00084 public function __construct() 00085 { 00086 $ini = eZINI::instance( 'staticcache.ini'); 00087 $this->hostName = $ini->variable( 'CacheSettings', 'HostName' ); 00088 $this->staticStorageDir = $ini->variable( 'CacheSettings', 'StaticStorageDir' ); 00089 $this->maxCacheDepth = $ini->variable( 'CacheSettings', 'MaxCacheDepth' ); 00090 $this->cachedURLArray = $ini->variable( 'CacheSettings', 'CachedURLArray' ); 00091 $this->cachedSiteAccesses = $ini->variable( 'CacheSettings', 'CachedSiteAccesses' ); 00092 $this->alwaysUpdate = $ini->variable( 'CacheSettings', 'AlwaysUpdateArray' ); 00093 } 00094 00095 /** 00096 * Getter method for {@link eZStaticCache::$hostName} 00097 * 00098 * @deprecated deprecated since version 4.4 00099 * @return string The currently configured host-name. 00100 */ 00101 public function hostName() 00102 { 00103 return $this->hostName; 00104 } 00105 00106 /** 00107 * Getter method for {@link eZStaticCache::$staticStorageDir} 00108 * 00109 * @return string The currently configured storage directory for the static cache. 00110 */ 00111 public function storageDirectory() 00112 { 00113 return $this->staticStorageDir; 00114 } 00115 00116 /** 00117 * Getter method for {@link eZStaticCache::$maxCacheDepth} 00118 * 00119 * @return int The maximum depth in the url which will be cached. 00120 */ 00121 public function maxCacheDepth() 00122 { 00123 return $this->maxCacheDepth; 00124 } 00125 00126 /** 00127 * Getter method for {@link eZStaticCache::$cachedSiteAccesses} 00128 * 00129 * @return array An array with site-access names that should be cached. 00130 */ 00131 public function cachedSiteAccesses() 00132 { 00133 return $this->cachedSiteAccesses; 00134 } 00135 00136 /** 00137 * Getter method for {@link eZStaticCache::$cachedURLArray} 00138 * 00139 * @return array An array with URLs that is to be cached statically, the URLs may contain wildcards. 00140 */ 00141 public function cachedURLArray() 00142 { 00143 return $this->cachedURLArray; 00144 } 00145 00146 /** 00147 * Getter method for {@link eZStaticCache::$alwaysUpdate} 00148 * 00149 * These URLs are configured with AlwaysUpdateArray in staticcache.ini. 00150 * 00151 * @see eZStaticCache::generateAlwaysUpdatedCache() 00152 * @return array An array with URLs that is to always be updated. 00153 */ 00154 function alwaysUpdateURLArray() 00155 { 00156 return $this->alwaysUpdate; 00157 } 00158 00159 /** 00160 * Generates the caches for all URLs that must always be generated. 00161 * 00162 * @param bool $quiet If true then the function will not output anything. 00163 * @param eZCLI|false $cli The eZCLI object or false if no output can be done. 00164 * @param bool $delay 00165 */ 00166 public function generateAlwaysUpdatedCache( $quiet = false, $cli = false, $delay = true ) 00167 { 00168 foreach ( $this->alwaysUpdate as $uri ) 00169 { 00170 if ( !$quiet and $cli ) 00171 $cli->output( "caching: $uri ", false ); 00172 $this->storeCache( $uri, $this->staticStorageDir, array(), false, $delay ); 00173 if ( !$quiet and $cli ) 00174 $cli->output( "done" ); 00175 } 00176 } 00177 00178 /** 00179 * Generates caches for all the urls of nodes in $nodeList. 00180 * 00181 * The associative array must have on of these entries: 00182 * - node_id - ID of the node 00183 * - path_identification_string - The path_identification_string from the node table, is used to fetch the node ID if node_id is missing. 00184 * 00185 * @param array $nodeList An array with node entries, each entry is either the node ID or an associative array. 00186 */ 00187 public function generateNodeListCache( $nodeList ) 00188 { 00189 $db = eZDB::instance(); 00190 00191 foreach ( $nodeList as $uri ) 00192 { 00193 if ( is_array( $uri ) ) 00194 { 00195 if ( !isset( $uri['node_id'] ) ) 00196 { 00197 eZDebug::writeError( "node_id is not set for uri entry " . var_export( $uri ) . ", will need to perform extra query to get node_id" ); 00198 $node = eZContentObjectTreeNode::fetchByURLPath( $uri['path_identification_string'] ); 00199 $nodeID = (int)$node->attribute( 'node_id' ); 00200 } 00201 else 00202 { 00203 $nodeID = (int)$uri['node_id']; 00204 } 00205 } 00206 else 00207 { 00208 $nodeID = (int)$uri; 00209 } 00210 $elements = eZURLAliasML::fetchByAction( 'eznode', $nodeID, true, true, true ); 00211 foreach ( $elements as $element ) 00212 { 00213 $path = $element->getPath(); 00214 $this->cacheURL( '/' . $path ); 00215 } 00216 } 00217 } 00218 00219 /** 00220 * Generates the static cache from the configured INI settings. 00221 * 00222 * @param bool $force If true then it will create all static caches even if it is not outdated. 00223 * @param bool $quiet If true then the function will not output anything. 00224 * @param eZCLI|false $cli The eZCLI object or false if no output can be done. 00225 * @param bool $delay 00226 */ 00227 public function generateCache( $force = false, $quiet = false, $cli = false, $delay = true ) 00228 { 00229 $staticURLArray = $this->cachedURLArray(); 00230 $db = eZDB::instance(); 00231 $configSettingCount = count( $staticURLArray ); 00232 $currentSetting = 0; 00233 00234 // This contains parent elements which must checked to find new urls and put them in $generateList 00235 // Each entry contains: 00236 // - url - Url of parent 00237 // - glob - A glob string to filter direct children based on name 00238 // - org_url - The original url which was requested 00239 // - parent_id - The element ID of the parent (optional) 00240 // The parent_id will be used to quickly fetch the children, if not it will use the url 00241 $parentList = array(); 00242 // A list of urls which must generated, each entry is a string with the url 00243 $generateList = array(); 00244 foreach ( $staticURLArray as $url ) 00245 { 00246 $currentSetting++; 00247 if ( strpos( $url, '*') === false ) 00248 { 00249 $generateList[] = $url; 00250 } 00251 else 00252 { 00253 $queryURL = ltrim( str_replace( '*', '', $url ), '/' ); 00254 $dir = dirname( $queryURL ); 00255 if ( $dir == '.' ) 00256 $dir = ''; 00257 $glob = basename( $queryURL ); 00258 $parentList[] = array( 'url' => $dir, 00259 'glob' => $glob, 00260 'org_url' => $url ); 00261 } 00262 } 00263 00264 // As long as we have urls to generate or parents to check we loop 00265 while ( count( $generateList ) > 0 || count( $parentList ) > 0 ) 00266 { 00267 // First generate single urls 00268 foreach ( $generateList as $generateURL ) 00269 { 00270 if ( !$quiet and $cli ) 00271 $cli->output( "caching: $generateURL ", false ); 00272 $this->cacheURL( $generateURL, false, !$force, $delay ); 00273 if ( !$quiet and $cli ) 00274 $cli->output( "done" ); 00275 } 00276 $generateList = array(); 00277 00278 // Then check for more data 00279 $newParentList = array(); 00280 foreach ( $parentList as $parentURL ) 00281 { 00282 if ( isset( $parentURL['parent_id'] ) ) 00283 { 00284 $elements = eZURLAliasML::fetchByParentID( $parentURL['parent_id'], true, true, false ); 00285 foreach ( $elements as $element ) 00286 { 00287 $path = '/' . $element->getPath(); 00288 $generateList[] = $path; 00289 $newParentList[] = array( 'parent_id' => $element->attribute( 'id' ) ); 00290 } 00291 } 00292 else 00293 { 00294 if ( !$quiet and $cli and $parentURL['glob'] ) 00295 $cli->output( "wildcard cache: " . $parentURL['url'] . '/' . $parentURL['glob'] . "*" ); 00296 $elements = eZURLAliasML::fetchByPath( $parentURL['url'], $parentURL['glob'] ); 00297 foreach ( $elements as $element ) 00298 { 00299 $path = '/' . $element->getPath(); 00300 $generateList[] = $path; 00301 $newParentList[] = array( 'parent_id' => $element->attribute( 'id' ) ); 00302 } 00303 } 00304 } 00305 $parentList = $newParentList; 00306 } 00307 } 00308 00309 /** 00310 * Generates the caches for the url $url using the currently configured storageDirectory(). 00311 * 00312 * @param string $url The URL to cache, e.g /news 00313 * @param int|false $nodeID The ID of the node to cache, if supplied it will also cache content/view/full/xxx. 00314 * @param bool $skipExisting If true it will not unlink existing cache files. 00315 * @return bool 00316 */ 00317 public function cacheURL( $url, $nodeID = false, $skipExisting = false, $delay = true ) 00318 { 00319 // Check if URL should be cached 00320 if ( substr_count( $url, "/") >= $this->maxCacheDepth ) 00321 return false; 00322 00323 $doCacheURL = false; 00324 foreach ( $this->cachedURLArray as $cacheURL ) 00325 { 00326 if ( $url == $cacheURL ) 00327 { 00328 $doCacheURL = true; 00329 break; 00330 } 00331 else if ( strpos( $cacheURL, '*') !== false ) 00332 { 00333 if ( strpos( $url, str_replace( '*', '', $cacheURL ) ) === 0 ) 00334 { 00335 $doCacheURL = true; 00336 break; 00337 } 00338 } 00339 } 00340 00341 if ( $doCacheURL == false ) 00342 { 00343 return false; 00344 } 00345 00346 $this->storeCache( $url, $this->staticStorageDir, $nodeID ? array( "/content/view/full/$nodeID" ) : array(), $skipExisting, $delay ); 00347 00348 return true; 00349 } 00350 00351 /** 00352 * Stores the static cache for $url and hostname defined in site.ini.[SiteSettings].SiteURL for cached siteaccess 00353 * by fetching the web page using {@link eZHTTPTool::getDataByURL()} and storing the fetched HTML data. 00354 * 00355 * @param string $url The URL to cache, e.g /news 00356 * @param string $staticStorageDir The base directory for storing cache files. 00357 * @param array $alternativeStaticLocations 00358 * @param bool $skipUnlink If true it will not unlink existing cache files. 00359 * @param bool $delay 00360 */ 00361 private function storeCache( $url, $staticStorageDir, $alternativeStaticLocations = array(), $skipUnlink = false, $delay = true ) 00362 { 00363 $dirs = array(); 00364 00365 foreach ( $this->cachedSiteAccesses as $cachedSiteAccess ) 00366 { 00367 $dirs[] = $this->buildCacheDirPath( $cachedSiteAccess ); 00368 } 00369 00370 foreach ( $dirs as $dirParts ) 00371 { 00372 foreach ( $dirParts as $dirPart ) 00373 { 00374 $dir = $dirPart['dir']; 00375 $siteURL = $dirPart['site_url']; 00376 00377 $cacheFiles = array(); 00378 00379 $cacheFiles[] = $this->buildCacheFilename( $staticStorageDir, $dir . $url ); 00380 foreach ( $alternativeStaticLocations as $location ) 00381 { 00382 $cacheFiles[] = $this->buildCacheFilename( $staticStorageDir, $dir . $location ); 00383 } 00384 00385 // Store new content 00386 $content = false; 00387 foreach ( $cacheFiles as $file ) 00388 { 00389 if ( !$skipUnlink || !file_exists( $file ) ) 00390 { 00391 // Deprecated since 4.4, will be removed in future version 00392 $fileName = "http://{$this->hostName}{$dir}{$url}"; 00393 00394 // staticcache.ini.[CacheSettings].HostName has been deprecated since version 4.4 00395 // hostname is read from site.ini.[SiteSettings].SiteURL per siteaccess 00396 // defined in staticcache.ini.[CacheSettings].CachedSiteAccesses 00397 if ( !$this->hostName ) 00398 { 00399 $fileName = "http://{$siteURL}{$url}"; 00400 } 00401 00402 if ( $delay ) 00403 { 00404 $this->addAction( 'store', array( $file, $fileName ) ); 00405 } 00406 else 00407 { 00408 // Generate content, if required 00409 if ( $content === false ) 00410 { 00411 if ( eZHTTPTool::getDataByURL( $fileName, true, eZStaticCache::USER_AGENT ) ) 00412 $content = eZHTTPTool::getDataByURL( $fileName, false, eZStaticCache::USER_AGENT ); 00413 } 00414 if ( $content === false ) 00415 { 00416 eZDebug::writeError( "Could not grab content (from $fileName), is the hostname correct and Apache running?", 'Static Cache' ); 00417 } 00418 else 00419 { 00420 eZStaticCache::storeCachedFile( $file, $content ); 00421 } 00422 } 00423 } 00424 } 00425 } 00426 } 00427 } 00428 00429 /** 00430 * Generates a full path to the cache file (index.html) based on the input parameters. 00431 * 00432 * @param string $staticStorageDir The storage for cache files. 00433 * @param string $url The URL for the current item, e.g /news 00434 * @return string The full path to the cache file (index.html). 00435 */ 00436 private function buildCacheFilename( $staticStorageDir, $url ) 00437 { 00438 $file = "{$staticStorageDir}{$url}/index.html"; 00439 $file = preg_replace( '#//+#', '/', $file ); 00440 return $file; 00441 } 00442 00443 /** 00444 * Generates a cache directory parts including path, siteaccess name, site URL 00445 * depending on the match order type. 00446 * 00447 * @param string $siteAccess 00448 * @return array 00449 */ 00450 private function buildCacheDirPath( $siteAccess ) 00451 { 00452 $dirParts = array(); 00453 00454 $ini = eZINI::instance(); 00455 00456 $matchOderArray = $ini->variableArray( 'SiteAccessSettings', 'MatchOrder' ); 00457 00458 foreach ( $matchOderArray as $matchOrderItem ) 00459 { 00460 switch ( $matchOrderItem ) 00461 { 00462 case 'host_uri': 00463 foreach ( $ini->variable( 'SiteAccessSettings', 'HostUriMatchMapItems' ) as $hostUriMatchMapItem ) 00464 { 00465 $parts = explode( ';', $hostUriMatchMapItem ); 00466 00467 if ( $parts[2] === $siteAccess ) 00468 { 00469 $dirParts[] = $this->buildCacheDirPart( ( $parts[0] ? '/' . $parts[0] : '' ) . 00470 ( $parts[1] ? '/' . $parts[1] : '' ), $siteAccess ); 00471 } 00472 } 00473 break; 00474 case 'host': 00475 foreach ( $ini->variable( 'SiteAccessSettings', 'HostMatchMapItems' ) as $hostMatchMapItem ) 00476 { 00477 $parts = explode( ';', $hostMatchMapItem ); 00478 00479 if ( $parts[1] === $siteAccess ) 00480 { 00481 $dirParts[] = $this->buildCacheDirPart( ( $parts[0] ? '/' . $parts[0] : '' ), $siteAccess ); 00482 } 00483 } 00484 break; 00485 default: 00486 $dirParts[] = $this->buildCacheDirPart( '/' . $siteAccess, $siteAccess ); 00487 break; 00488 } 00489 } 00490 00491 return $dirParts; 00492 } 00493 00494 /** 00495 * A helper method used to create directory parts array 00496 * 00497 * @param string $dir 00498 * @param string $siteAccess 00499 * @return array 00500 */ 00501 private function buildCacheDirPart( $dir, $siteAccess ) 00502 { 00503 return array( 'dir' => $dir, 00504 'access_name' => $siteAccess, 00505 'site_url' => eZSiteAccess::getIni( $siteAccess, 'site.ini' )->variable( 'SiteSettings', 'SiteURL' ) ); 00506 } 00507 00508 /** 00509 * Stores the cache file $file with contents $content. 00510 * Takes care of setting proper permissions on the new file. 00511 * 00512 * @param string $file 00513 * @param string $content 00514 */ 00515 static function storeCachedFile( $file, $content ) 00516 { 00517 $dir = dirname( $file ); 00518 if ( !is_dir( $dir ) ) 00519 { 00520 eZDir::mkdir( $dir, false, true ); 00521 } 00522 00523 $oldumask = umask( 0 ); 00524 00525 $tmpFileName = $file . '.' . md5( $file. uniqid( "ezp". getmypid(), true ) ); 00526 00527 // Remove files, this might be necessary for Windows 00528 @unlink( $tmpFileName ); 00529 00530 // Write the new cache file with the data attached 00531 $fp = fopen( $tmpFileName, 'w' ); 00532 if ( $fp ) 00533 { 00534 $comment = ( eZINI::instance( 'staticcache.ini' )->variable( 'CacheSettings', 'AppendGeneratedTime' ) === 'true' ) ? "<!-- Generated: " . date( 'Y-m-d H:i:s' ). " -->\n\n" : null; 00535 00536 fwrite( $fp, $content . $comment ); 00537 fclose( $fp ); 00538 eZFile::rename( $tmpFileName, $file, false, eZFile::CLEAN_ON_FAILURE | eZFile::APPEND_DEBUG_ON_FAILURE ); 00539 00540 $perm = eZINI::instance()->variable( 'FileSettings', 'StorageFilePermissions' ); 00541 chmod( $file, octdec( $perm ) ); 00542 } 00543 00544 umask( $oldumask ); 00545 } 00546 00547 /** 00548 * Removes the static cache file (index.html) and its directory if it exists. 00549 * The directory path is based upon the URL $url and the configured static storage dir. 00550 * 00551 * @param string $url The URL for the current item, e.g /news 00552 */ 00553 function removeURL( $url ) 00554 { 00555 $dir = eZDir::path( array( $this->staticStorageDir, $url ) ); 00556 00557 @unlink( $dir . "/index.html" ); 00558 @rmdir( $dir ); 00559 } 00560 00561 /** 00562 * This function adds an action to the list that is used at the end of the 00563 * request to remove and regenerate static cache files. 00564 * 00565 * @param string $action 00566 * @param array $parameters 00567 */ 00568 private function addAction( $action, $parameters ) 00569 { 00570 self::$actionList[] = array( $action, $parameters ); 00571 } 00572 00573 /** 00574 * This function goes over the list of recorded actions and excecutes them. 00575 */ 00576 static function executeActions() 00577 { 00578 if ( empty( self::$actionList ) ) 00579 { 00580 return; 00581 } 00582 00583 $fileContentCache = array(); 00584 $doneDestList = array(); 00585 00586 $ini = eZINI::instance( 'staticcache.ini'); 00587 $clearByCronjob = ( $ini->variable( 'CacheSettings', 'CronjobCacheClear' ) == 'enabled' ); 00588 00589 if ( $clearByCronjob ) 00590 { 00591 $db = eZDB::instance(); 00592 } 00593 00594 foreach ( self::$actionList as $action ) 00595 { 00596 list( $action, $parameters ) = $action; 00597 00598 switch( $action ) { 00599 case 'store': 00600 list( $destination, $source ) = $parameters; 00601 00602 if ( isset( $doneDestList[$destination] ) ) 00603 continue 2; 00604 00605 if ( $clearByCronjob ) 00606 { 00607 $param = $db->escapeString( $destination . ',' . $source ); 00608 $db->query( 'INSERT INTO ezpending_actions( action, param ) VALUES ( \'static_store\', \''. $param . '\' )' ); 00609 $doneDestList[$destination] = 1; 00610 } 00611 else 00612 { 00613 if ( !isset( $fileContentCache[$source] ) ) 00614 { 00615 if ( eZHTTPTool::getDataByURL( $source, true, eZStaticCache::USER_AGENT ) ) 00616 $fileContentCache[$source] = eZHTTPTool::getDataByURL( $source, false, eZStaticCache::USER_AGENT ); 00617 else 00618 $fileContentCache[$source] = false; 00619 } 00620 if ( $fileContentCache[$source] === false ) 00621 { 00622 eZDebug::writeError( "Could not grab content (from $source), is the hostname correct and Apache running?", 'Static Cache' ); 00623 } 00624 else 00625 { 00626 eZStaticCache::storeCachedFile( $destination, $fileContentCache[$source] ); 00627 $doneDestList[$destination] = 1; 00628 } 00629 } 00630 break; 00631 } 00632 } 00633 self::$actionList = array(); 00634 } 00635 } 00636 00637 ?>