eZ Publish  [4.0]
linkcheck.php
Go to the documentation of this file.
00001 <?php
00002 //
00003 // Definition of  class
00004 //
00005 // Created on: <07-Jul-2003 10:06:19 wy>
00006 //
00007 // ## BEGIN COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00008 // SOFTWARE NAME: eZ Publish
00009 // SOFTWARE RELEASE: 4.0.x
00010 // COPYRIGHT NOTICE: Copyright (C) 1999-2008 eZ Systems AS
00011 // SOFTWARE LICENSE: GNU General Public License v2.0
00012 // NOTICE: >
00013 //   This program is free software; you can redistribute it and/or
00014 //   modify it under the terms of version 2.0  of the GNU General
00015 //   Public License as published by the Free Software Foundation.
00016 //
00017 //   This program is distributed in the hope that it will be useful,
00018 //   but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 //   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020 //   GNU General Public License for more details.
00021 //
00022 //   You should have received a copy of version 2.0 of the GNU General
00023 //   Public License along with this program; if not, write to the Free
00024 //   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00025 //   MA 02110-1301, USA.
00026 //
00027 //
00028 // ## END COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00029 //
00030 
00031 /*! \file linkcheck.php
00032 */
00033 //include_once( 'kernel/classes/datatypes/ezurl/ezurl.php' );
00034 //include_once( "lib/ezutils/classes/ezini.php" );
00035 //include_once( "lib/ezutils/classes/ezhttptool.php" );
00036 
00037 if ( !$isQuiet )
00038     $cli->output( "Checking link ..." );
00039 
00040 $cronjobIni = eZINI::instance( 'cronjob.ini' );
00041 $siteURLs = $cronjobIni->variable( 'linkCheckSettings', 'SiteURL' );
00042 $linkList = eZURL::fetchList( array( 'only_published' => true ) );
00043 foreach ( $linkList as $link )
00044 {
00045     $linkID = $link->attribute( 'id' );
00046     $url = $link->attribute( 'url' );
00047     $isValid = $link->attribute( 'is_valid' );
00048 
00049     $cli->output( "check-" . $cli->stylize( 'emphasize', $url ) . " ", false );
00050     if ( preg_match("/^(http:)/i", $url ) or
00051          preg_match("/^(ftp:)/i", $url ) or
00052          preg_match("/^(https:)/i", $url ) or
00053          preg_match("/^(file:)/i", $url ) or
00054          preg_match("/^(mailto:)/i", $url ) )
00055     {
00056         if ( preg_match("/^(mailto:)/i", $url))
00057         {
00058             if ( eZSys::osType() != 'win32' )
00059             {
00060                 $url = trim( preg_replace("/^mailto:(.+)/i", "\\1", $url));
00061                 list($userName, $host) = split("@", $url);
00062                 list($host, $junk)= split("\?", $host);
00063                 $dnsCheck = checkdnsrr( $host,"MX" );
00064                 if ( !$dnsCheck )
00065                 {
00066                     if ( $isValid )
00067                         eZURL::setIsValid( $linkID, false );
00068                     $cli->output( $cli->stylize( 'warning', "invalid" ) );
00069                 }
00070                 else
00071                 {
00072                     if ( !$isValid )
00073                         eZURL::setIsValid( $linkID, true );
00074                     $cli->output( $cli->stylize( 'success', "valid" ) );
00075                 }
00076             }
00077         }
00078         else if ( preg_match("/^(http:)/i", $url ) or
00079                   preg_match("/^(file:)/i", $url ) or
00080                   preg_match("/^(ftp:)/i", $url ) )
00081         {
00082             if ( !eZHTTPTool::getDataByURL( $url, true, 'eZ Publish Link Validator' ) )
00083             {
00084                 if ( $isValid )
00085                     eZURL::setIsValid( $linkID, false );
00086                 $cli->output( $cli->stylize( 'warning', "invalid" ) );
00087             }
00088             else
00089             {
00090                 if ( !$isValid )
00091                     eZURL::setIsValid( $linkID, true );
00092                 $cli->output( $cli->stylize( 'success', "valid" ) );
00093             }
00094         }
00095         else
00096         {
00097             $cli->output( "Couldn't check https protocol" );
00098         }
00099     }
00100     else
00101     {
00102         //include_once( 'kernel/classes/ezurlaliasml.php' );
00103         $translateResult = eZURLAliasML::translate( $url );
00104 
00105         if ( !$translateResult )
00106         {
00107               $isInternal = false;
00108               // Check if it is a valid internal link.
00109               foreach ( $siteURLs as $siteURL )
00110               {
00111                   $siteURL = preg_replace("/\/$/e", "", $siteURL );
00112                   $fp = @fopen( $siteURL . "/". $url, "r" );
00113                   if ( !$fp )
00114                   {
00115                       // do nothing
00116                   }
00117                   else
00118                   {
00119                       $isInternal = true;
00120                       fclose($fp);
00121                   }
00122               }
00123               $translateResult = $isInternal;
00124         }
00125         if ( $translateResult )
00126         {
00127             if ( !$isValid )
00128                 eZURL::setIsValid( $linkID, true );
00129             $cli->output( $cli->stylize( 'success', "valid" ) );
00130         }
00131         else
00132         {
00133             if ( $isValid )
00134                 eZURL::setIsValid( $linkID, false );
00135             $cli->output( $cli->stylize( 'warning', "invalid" ) );
00136         }
00137     }
00138     eZURL::setLastChecked( $linkID );
00139 }
00140 
00141 if ( !$isQuiet )
00142     $cli->output( "All links have been checked!" );
00143 
00144 ?>