eZ Publish  [trunk]
ezurlwildcard.php
Go to the documentation of this file.
00001 <?php
00002 /**
00003  * File containing the eZURLWildcard class.
00004  *
00005  * @copyright Copyright (C) 1999-2012 eZ Systems AS. All rights reserved.
00006  * @license http://www.gnu.org/licenses/gpl-2.0.txt GNU General Public License v2
00007  * @version //autogentag//
00008  * @package kernel
00009  */
00010 
00011 /*!
00012   \class eZURLWildcard ezurlwildcard.php
00013   \brief Handles URL alias wildcards in eZ Publish
00014 
00015   \private
00016 */
00017 
00018 class eZURLWildcard extends eZPersistentObject
00019 {
00020     /**
00021      * Max number of wildcard entries per cache file
00022      * @var int
00023      */
00024     const WILDCARDS_PER_CACHE_FILE = 100;
00025 
00026     /**
00027      * Wildcards types
00028      * @var int
00029      */
00030     const TYPE_NONE = 0;
00031     const TYPE_FORWARD = 1;
00032     const TYPE_DIRECT = 2;
00033 
00034     /**
00035      * ExpiryHandler key
00036      * @var string
00037      */
00038     const CACHE_SIGNATURE = 'urlalias-wildcard';
00039 
00040     /**
00041      * Cluster file handler instances of cache files
00042      * @var array(eZClusterFileHandlerInterface)
00043      */
00044     protected static $cacheFiles = array();
00045 
00046     /**
00047      * Wildcards index local cache
00048      * @var array
00049      */
00050     protected static $wildcardsIndex = null;
00051 
00052     /**
00053      * Initializes a new URL alias persistent object
00054      * @param array $row
00055      */
00056     public function eZURLWildcard( $row )
00057     {
00058         $this->eZPersistentObject( $row );
00059     }
00060 
00061     public static function definition()
00062     {
00063         static $definition = array( "fields" => array( "id" => array( 'name' => 'ID',
00064                                                         'datatype' => 'integer',
00065                                                         'default' => 0,
00066                                                         'required' => true ),
00067                                          "source_url" => array( 'name' => "SourceURL",
00068                                                                 'datatype' => 'string',
00069                                                                 'default' => '',
00070                                                                 'required' => true ),
00071                                          "destination_url" => array( 'name' => "DestinationURL",
00072                                                                 'datatype' => 'string',
00073                                                                 'default' => '',
00074                                                                 'required' => true ),
00075                                          "type" => array( 'name' => "Type",
00076                                                           'datatype' => 'integer',
00077                                                           'default' => '0',
00078                                                           'required' => true ) ),
00079                       "keys" => array( "id" ),
00080                       'function_attributes' => array(),
00081                       "increment_key" => "id",
00082                       "class_name" => "eZURLWildcard",
00083                       "name" => "ezurlwildcard" );
00084         return $definition;
00085     }
00086 
00087     /**
00088      * Converts the url wildcard object to an associative array with the attribute
00089      * names as array keys and the values as array values
00090      * @return array
00091      */
00092     public function asArray()
00093     {
00094         return array( 'id' => $this->attribute( 'id' ),
00095                       'source_url' => $this->attribute( 'source_url' ),
00096                       'destination_url' => $this->attribute( 'destination_url' ),
00097                       'type' => $this->attribute( 'type' ) );
00098     }
00099 
00100     /**
00101      * Stores the eZURLWildcard persistent object
00102      */
00103     public function store( $fieldFilters = null )
00104     {
00105         eZPersistentObject::store( $fieldFilters );
00106     }
00107 
00108     /**
00109      * Removes a wildcard based on a source_url.
00110      * The URL should be provided without the /* prefix:
00111      * foobar will remove the wildcard with source_url = foobar/*
00112      * @param string $baseURL URL prefix matched against destination_url
00113      * @return void
00114      */
00115     static function cleanup( $baseURL )
00116     {
00117         $db = eZDB::instance();
00118         $baseURLText = $db->escapeString( $baseURL . "/*" );
00119         $sql = "DELETE FROM ezurlwildcard
00120                 WHERE source_url = '$baseURLText'";
00121         $db->query( $sql );
00122         self::expireCache();
00123     }
00124 
00125     /**
00126      * Removes all the wildcards
00127      * @return void
00128      */
00129     public static function removeAll()
00130     {
00131         eZPersistentObject::removeObject( self::definition() );
00132         self::expireCache();
00133     }
00134 
00135     /**
00136      * Removes wildcards based on an ID list
00137      * @param array $idList array of numerical ID
00138      * @return void
00139      */
00140     public static function removeByIDs( $idList )
00141     {
00142         if ( !is_array( $idList ) )
00143             return;
00144 
00145         while ( count( $idList ) > 0 )
00146         {
00147             // remove by portion of 100 rows.
00148             $ids = array_splice( $idList, 0, 100 );
00149 
00150             $conditions = array( 'id' => array( $ids ) );
00151 
00152             eZPersistentObject::removeObject( self::definition(),
00153                                               $conditions );
00154         }
00155     }
00156 
00157     /**
00158      * Fetch a wildcard by numerical ID
00159      * @param int $id
00160      * @param bool $asObject
00161      * @return eZURLWildcard null if no match was found
00162      */
00163     public static function fetch( $id, $asObject = true )
00164     {
00165         return eZPersistentObject::fetchObject( self::definition(),
00166                                                 null,
00167                                                 array( "id" => $id ),
00168                                                 $asObject );
00169     }
00170 
00171     /**
00172      * Fetches a wildcard by source url
00173      * @param string $url Source URL
00174      * @param bool $asObject
00175      * @return eZURLWildcard Null if no match was found
00176      */
00177     public static function fetchBySourceURL( $url, $asObject = true )
00178     {
00179         return eZPersistentObject::fetchObject( self::definition(),
00180                                                 null,
00181                                                 array( "source_url" => $url ),
00182                                                 $asObject );
00183     }
00184 
00185     /**
00186      * Fetches the list of URL wildcards. By defaults, fetches all the wildcards
00187      * @param int $offset Offset to limit the list from
00188      * @param int $limit Limit to the number of fetched items
00189      * @param bool $asObject
00190      * @return array[eZURLWildcard]
00191      */
00192     public static function fetchList( $offset = false, $limit = false, $asObject = true )
00193     {
00194         return eZPersistentObject::fetchObjectList( self::definition(),
00195                                                     null,
00196                                                     null,
00197                                                     null,
00198                                                     array( 'offset' => $offset, 'length' => $limit ),
00199                                                     $asObject );
00200     }
00201 
00202     /**
00203      * Returns the number of wildcards in the database without any filtering
00204      * @return int Number of wildcards in the database
00205      */
00206     public static function fetchListCount()
00207     {
00208         $rows = eZPersistentObject::fetchObjectList( self::definition(),
00209                                                      array(),
00210                                                      null,
00211                                                      false,
00212                                                      null,
00213                                                      false, false,
00214                                                      array( array( 'operation' => 'count( * )',
00215                                                                    'name' => 'count' ) ) );
00216         return $rows[0]['count'];
00217     }
00218 
00219     /**
00220      * Transforms the URI if there exists an alias for it.
00221      *
00222      * @param eZURI|string $uri
00223      * @return mixed The translated URI if the resource has moved, or true|false
00224      *               if translation was (un)successful
00225      */
00226     public static function translate( &$uri )
00227     {
00228         $result = false;
00229 
00230         // get uri string
00231         $uriString = ( $uri instanceof eZURI ) ? $uri->elements() : $uri;
00232         $uriString = eZURLAliasML::cleanURL( $uriString );
00233 
00234         eZDebugSetting::writeDebug( 'kernel-urltranslator', "input uriString: '$uriString'", __METHOD__ );
00235 
00236         if ( !$wildcards = self::wildcardsIndex() )
00237         {
00238             eZDebugSetting::writeDebug( 'kernel-urltranslator', "no match callbacks", __METHOD__ );
00239             return false;
00240         }
00241 
00242         $ini = eZINI::instance();
00243         $iteration = $ini->variable( 'URLTranslator', 'MaximumWildcardIterations' );
00244 
00245         eZDebugSetting::writeDebug( 'kernel-urltranslator', "MaximumWildcardIterations: '$iteration'", __METHOD__ );
00246 
00247         // translate
00248         $urlTranslated = false;
00249         while ( !$urlTranslated && $iteration >= 0 )
00250         {
00251             foreach ( $wildcards as $wildcardNum => $wildcard )
00252             {
00253                 if ( preg_match( $wildcard, $uriString ) )
00254                 {
00255                     eZDebugSetting::writeDebug( 'kernel-urltranslator', "matched with: '$wildcard'", __METHOD__ );
00256 
00257                     // get new $uriString from wildcard
00258                     self::translateWithCache( $wildcardNum, $uriString, $wildcardInfo, $wildcard );
00259 
00260                     eZDebugSetting::writeDebug( 'kernel-urltranslator', "new uri string: '$uriString'", __METHOD__ );
00261 
00262                     // optimization: don't try further translation if wildcard type is 'forward'
00263                     if ( $wildcardInfo['type'] == self::TYPE_FORWARD )
00264                     {
00265                         $urlTranslated = true;
00266                         break;
00267                     }
00268 
00269                     // try to tranlsate
00270                     if ( $urlTranslated = eZURLAliasML::translate( $uriString ) )
00271                     {
00272                         // success
00273                         eZDebugSetting::writeDebug( 'kernel-urltranslator', "uri is translated to '$uriString' with result '$urlTranslated'", __METHOD__ );
00274                         break;
00275                     }
00276 
00277                     eZDebugSetting::writeDebug( 'kernel-urltranslator', "uri is not translated, trying another wildcard", __METHOD__ );
00278 
00279                     // translation failed. Try to match new $uriString with another wildcard.
00280                     --$iteration;
00281                     continue 2;
00282                 }
00283             }
00284 
00285             // we here if non of the wildcards is matched
00286             break;
00287         }
00288 
00289         // check translation result
00290         // NOTE: 'eZURLAliasML::translate'(see above) can return 'true', 'false' or new url(in case of 'error/301').
00291         //       $urlTranslated can also be 'false' if no wildcard is matched.
00292         if ( $urlTranslated )
00293         {
00294             // check wildcard type and set appropriate $result and $uriString
00295             $wildcardType = $wildcardInfo['type'];
00296 
00297             eZDebugSetting::writeDebug( 'kernel-urltranslator', "wildcard type: $wildcardType", __METHOD__ );
00298 
00299             switch ( $wildcardType )
00300             {
00301                 case self::TYPE_FORWARD:
00302                 {
00303                     // do redirect:
00304                     //   => set $result to translated uri
00305                     //   => set uri string to a MOVED PERMANENTLY HTTP code
00306                     $result = $uriString;
00307                     $uriString = 'error/301';
00308                 }
00309                 break;
00310 
00311                 default:
00312                 {
00313                     eZDebug::writeError( 'Invalid wildcard type.', __METHOD__ );
00314                     // no break, using eZURLWildcard::TYPE_DIRECT as fallback
00315                 }
00316                 case self::TYPE_DIRECT:
00317                 {
00318                     $result = $urlTranslated;
00319                     // $uriString already has correct value
00320                     break;
00321                 }
00322             }
00323         }
00324         else
00325         {
00326             // we are here if:
00327             // - input url is not matched with any wildcard;
00328             // - url is matched with wildcard and:
00329             //   - points to module
00330             //   - invalide url
00331             eZDebugSetting::writeDebug( 'kernel-urltranslator', "wildcard is not translated", __METHOD__ );
00332             $result = false;
00333         }
00334 
00335         // set value back to $uri
00336         if ( $uri instanceof eZURI )
00337         {
00338             $uri->setURIString( $uriString, false );
00339         }
00340         else
00341         {
00342             $uri = $uriString;
00343         }
00344 
00345         eZDebugSetting::writeDebug( 'kernel-urltranslator', "finished with url '$uriString' and result '$result'", __METHOD__ );
00346 
00347         return $result;
00348     }
00349 
00350     /**
00351      * Returns an array with information on the wildcard cache
00352      * The array containst the following keys
00353      * - dir - The directory for the cache
00354      * - file - The base filename for the caches
00355      * - path - The entire path (including filename) for the cache
00356      * - keys - Array with key values which is used to uniquely identify the cache
00357      * @return array
00358      */
00359     protected static function cacheInfo()
00360     {
00361         static $cacheInfo = null;
00362 
00363         if ( $cacheInfo == null )
00364         {
00365             $cacheDir = eZSys::cacheDirectory();
00366             $ini = eZINI::instance();
00367             $keys = array( 'implementation' => $ini->variable( 'DatabaseSettings', 'DatabaseImplementation' ),
00368                            'server' => $ini->variable( 'DatabaseSettings', 'Server' ),
00369                            'database' => $ini->variable( 'DatabaseSettings', 'Database' ) );
00370             $wildcardKey = md5( implode( "\n", $keys ) );
00371             $wildcardCacheDir = "$cacheDir/wildcard";
00372             $wildcardCacheFile = "wildcard_$wildcardKey";
00373             $wildcardCachePath = "$wildcardCacheDir/$wildcardCacheFile";
00374             $cacheInfo = array( 'dir' => $wildcardCacheDir,
00375                                 'file' => $wildcardCacheFile,
00376                                 'path' => $wildcardCachePath,
00377                                 'keys' => $keys );
00378         }
00379 
00380         return $cacheInfo;
00381     }
00382 
00383     /**
00384      * Sets the various cache information to the parameters.
00385      * @private
00386      */
00387     protected static function cacheInfoDirectories( &$wildcardCacheDir, &$wildcardCacheFile, &$wildcardCachePath, &$wildcardKeys )
00388     {
00389         $info = self::cacheInfo();
00390         $wildcardCacheDir = $info['dir'];
00391         $wildcardCacheFile = $info['file'];
00392         $wildcardCachePath = $info['path'];
00393         $wildcardKeys = $info['keys'];
00394     }
00395 
00396     /**
00397      * Expires the wildcard cache. This causes the wildcard cache to be
00398      * regenerated on the next page load.
00399      * @return void
00400      */
00401     public static function expireCache()
00402     {
00403         $handler = eZExpiryHandler::instance();
00404         $handler->setTimestamp( self::CACHE_SIGNATURE, time() );
00405         $handler->store();
00406 
00407         self::$wildcardsIndex = null;
00408     }
00409 
00410     /**
00411      * Returns the expiry timestamp for wildcard cache from eZExpiryHandler
00412      * @return int|bool the timestamp if set, false otherwise
00413      */
00414     protected static function expiryTimestamp()
00415     {
00416         $handler = eZExpiryHandler::instance();
00417         if ( $handler->hasTimestamp( self::CACHE_SIGNATURE ) )
00418         {
00419             $ret = $handler->timestamp( self::CACHE_SIGNATURE );
00420         }
00421         else
00422         {
00423             $ret = false;
00424         }
00425         return $ret;
00426     }
00427 
00428     /**
00429      * Checks if the wildcard cache is expired
00430      *
00431      * @param int $timestamp Timestamp expiry should be checked against
00432      *
00433      * @return bool true if cache is expired
00434      * @deprecated since 4.2.0
00435      */
00436     public static function isCacheExpired( $timestamp )
00437     {
00438         return ( self::expiryTimestamp() > $timestamp );
00439     }
00440 
00441     /**
00442      * Assign function names to input variables. Generates the wildcard cache if
00443      * expired.
00444      *
00445      * @param $regexpArrayCallback function to get an array of regexps
00446      *
00447      * @return array The wildcards index, as an array of regexps
00448      */
00449     protected static function wildcardsIndex()
00450     {
00451         if ( self::$wildcardsIndex === null )
00452         {
00453             $cacheIndexFile = self::loadCacheFile();
00454 
00455             // if NULL is returned, the cache doesn't exist or isn't valid
00456             $wildcardsIndex = $cacheIndexFile->processFile( array( __CLASS__, 'fetchCacheFile' ), self::expiryTimestamp() );
00457             if ( $wildcardsIndex === null )
00458             {
00459                 // This will generate and return the index, and store the cache
00460                 // files for the different wildcards for later use
00461                 $wildcardsIndex = self::createWildcardsIndex();
00462             }
00463         }
00464 
00465         return $wildcardsIndex;
00466     }
00467 
00468     /**
00469      * Create the wildcard cache
00470      *
00471      * The wildcard caches are splitted between several files:
00472      *   'wildcard_<md5>_index.php': contains regexps for wildcards
00473      *   'wildcard_<md5>_0.php',
00474      *   'wildcard_<md5>_1.php',
00475      *   ...
00476      *   'wildcard_<md5>_N.php': contains cached wildcards.
00477      * Each file has info about eZURLWildcard::WILDCARDS_PER_CACHE_FILE wildcards.
00478      * @return void
00479      */
00480     protected static function createWildcardsIndex()
00481     {
00482         self::cacheInfoDirectories( $wildcardCacheDir, $wildcardCacheFile, $wildcardCachePath, $wildcardKeys );
00483         if ( !file_exists( $wildcardCacheDir ) )
00484         {
00485             eZDir::mkdir( $wildcardCacheDir, false, true );
00486         }
00487 
00488         // Index file (wildcard_md5_index.php)
00489         $wildcardsIndex = array();
00490 
00491         $limit = self::WILDCARDS_PER_CACHE_FILE;
00492         $offset = 0;
00493         $cacheFilesCount = 0;
00494         $wildcardNum = 0;
00495         while( 1 )
00496         {
00497             $wildcards = self::fetchList( $offset, $limit, false );
00498             if ( count( $wildcards ) === 0 )
00499             {
00500                 break;
00501             }
00502 
00503             // sub cache file (wildcard_md5_<i>.php)
00504             $wildcardDetails = array();
00505             $currentSubCacheFile = self::loadCacheFile( $cacheFilesCount );
00506             foreach ( $wildcards as $wildcard )
00507             {
00508                 $wildcardsIndex[] = self::matchRegexpCode( $wildcard );
00509                 $wildcardDetails[$wildcardNum] = self::matchReplaceCode( $wildcard );
00510 
00511                 ++$wildcardNum;
00512             }
00513             $binaryData = "<" . "?php\nreturn ". var_export( $wildcardDetails, true ) . ";\n?" . ">\n";
00514             $currentSubCacheFile->storeContents( $binaryData, "wildcard-cache-$cacheFilesCount", 'php', true );
00515 
00516             $offset += $limit;
00517             ++$cacheFilesCount;
00518         }
00519 
00520         $indexCacheFile = self::loadCacheFile();
00521         $indexBinaryData = "<" . "?php\nreturn ". var_export( $wildcardsIndex, true ) . ";\n?" . ">\n";
00522         $indexCacheFile->storeContents( $indexBinaryData, "wildcard-cache-index", 'php', true );
00523 
00524         return $wildcardsIndex;
00525         // end index cache file
00526     }
00527 
00528     /**
00529      * Transforms the source-url of a wildcard to a preg_match compatible expression
00530      * Example: foo/* will be converted to #^foo/(.*)$#
00531      *
00532      * @param array $wildcard wildcard data with a source_url key
00533      *
00534      * @return string preg_match compatible string
00535      */
00536     protected static function matchRegexpCode( $wildcard )
00537     {
00538         $matchWilcard = $wildcard['source_url'];
00539         $matchWilcardList = explode( "*", $matchWilcard );
00540         $regexpList = array();
00541         foreach ( $matchWilcardList as $matchWilcardItem )
00542         {
00543             $regexpList[] = preg_quote( $matchWilcardItem, '#' );
00544         }
00545         $matchRegexp = implode( '(.*)', $regexpList );
00546 
00547         return "#^$matchRegexp#i";
00548     }
00549 
00550     /**
00551      * Converts the destination-url of a wildcard to a preg_replace compatible
00552      * expression.
00553      * Example: foobar/{1} will be converted to ...
00554      * @todo fix the example
00555      *
00556      * @param array $wildcard Wildcard array with a destination_url key
00557      *
00558      * @return string match/replace PHP Code
00559      *
00560      * @todo Try to replace the eval'd code with a preg_replace expression
00561      */
00562     protected static function matchReplaceCode( $wildcard )
00563     {
00564         $return = array();
00565 
00566         $replaceWildcardList = preg_split( "#{([0-9]+)}#", $wildcard['destination_url'], false, PREG_SPLIT_DELIM_CAPTURE );
00567 
00568         $replaceArray = array();
00569         foreach ( $replaceWildcardList as $index => $replaceWildcardItem )
00570         {
00571             // even values are placeholders
00572             if ( ( $index % 2 ) == 0 )
00573             {
00574                 $replaceArray[] = $replaceWildcardItem;
00575             }
00576             else
00577             {
00578                 $replaceArray[] = "\${$replaceWildcardItem}";
00579             }
00580         }
00581         $replaceCode = implode( '', $replaceArray );
00582 
00583         $return['uri'] = $replaceCode;
00584         $return['info'] = $wildcard;
00585 
00586         return $return;
00587     }
00588 
00589     /**
00590      * The callback loads appropriate cache file for wildcard $wildcardNum,
00591      * extracts wildcard info and 'replace' url from cache.
00592      *
00593      * The wildcard number (not a wildcard id) is used here in order to load
00594      * the appropriate cache file.
00595      *
00596      * If it's needed to fetch wildcard from db, use eZURLWildcard::fetchList
00597      * with offset = $wildcardNum and $limit = 1.
00598      *
00599      * @param int $wildcardNum
00600      * @param eZURI|string $uri
00601      * @param mixed $wildcardInfo
00602      * @param mixed $matches
00603      *
00604      * @return bool
00605      *
00606      * @todo make private, this method isn't used anywhere else
00607      */
00608     protected static function translateWithCache( $wildcardNum, &$uri, &$wildcardInfo, $matchRegexp )
00609     {
00610         eZDebugSetting::writeDebug( 'kernel-urltranslator', "wildcardNum = $wildcardNum, uri = $uri", __METHOD__ );
00611 
00612         $cacheFileNum = (int) ( $wildcardNum / self::WILDCARDS_PER_CACHE_FILE );
00613 
00614         eZDebugSetting::writeDebug( 'kernel-urltranslator', "cacheFileNum = $cacheFileNum", __METHOD__ );
00615 
00616         $cacheFile = self::loadCacheFile( $cacheFileNum );
00617         $wildcardsInfos = $cacheFile->processFile( array( __CLASS__, 'fetchCacheFile' ) );
00618 
00619         if ( !isset( $wildcardsInfos[$wildcardNum] ) )
00620         {
00621             eZDebug::writeError( "An error occured: the requested wildcard couldn't be found", __METHOD__ );
00622             return false;
00623         }
00624 
00625         // @todo Try to replace this with a preg_replace in order to get rid of eval()
00626         $replaceRegexp = $wildcardsInfos[$wildcardNum]['uri'];
00627         $uri = preg_replace( $matchRegexp, $replaceRegexp, $uri );
00628         $wildcardInfo = $wildcardsInfos[$wildcardNum]['info'];
00629 
00630         eZDebugSetting::writeDebug( 'kernel-urltranslator', "found wildcard: " . var_export( $wildcardInfo, true ), __METHOD__ );
00631 
00632         return true;
00633     }
00634 
00635     /**
00636      * Loads and returns the cluster handler instance for the requested cache file.
00637      * The instance will be returned even if the file doesn't exist
00638      *
00639      * @param $cacheID Cache file number. Will load the index if not provided.
00640      *
00641      * @return eZClusterFileHandlerInterface
00642      */
00643     protected static function loadCacheFile( $cacheID = 'index' )
00644     {
00645         if ( isset( self::$cacheFiles[$cacheID] ) )
00646         {
00647             return self::$cacheFiles[$cacheID];
00648         }
00649 
00650         $info = self::cacheInfo();
00651         $cacheFileName = $info['path'] . '_' . $cacheID . '.php';
00652 
00653         self::$cacheFiles[$cacheID] = eZClusterFileHandler::instance( $cacheFileName );
00654         return self::$cacheFiles[$cacheID];
00655     }
00656 
00657     /**
00658      * Includes a wildcard cache file and returns its return value
00659      * This method is used as a callback by eZClusterFileHandler::processFile
00660      *
00661      * @param string $filepath
00662      *
00663      * @return array
00664      */
00665     public static function fetchCacheFile( $filepath )
00666     {
00667         return include( $filepath );
00668     }
00669 
00670     /**
00671      * Checks if $uriString exists as a wildcard
00672      *
00673      * @param string $uriString
00674      * @return boolean
00675      */
00676     public static function wildcardExists( $uriString )
00677     {
00678         $wildcardIndex = self::wildcardsIndex();
00679         $uriString = self::matchRegexpCode( array( 'source_url' => $uriString ) );
00680         if ( in_array( $uriString, $wildcardIndex ) )
00681         {
00682             return true;
00683         }
00684 
00685         return false;
00686     }
00687 }
00688 
00689 ?>