eZ Publish  [4.0]
rssimport.php
Go to the documentation of this file.
00001 <?php
00002 //
00003 // Created on: <24-Sep-2003 16:09:21 sp>
00004 //
00005 // ## BEGIN COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00006 // SOFTWARE NAME: eZ Publish
00007 // SOFTWARE RELEASE: 4.0.x
00008 // COPYRIGHT NOTICE: Copyright (C) 1999-2008 eZ Systems AS
00009 // SOFTWARE LICENSE: GNU General Public License v2.0
00010 // NOTICE: >
00011 //   This program is free software; you can redistribute it and/or
00012 //   modify it under the terms of version 2.0  of the GNU General
00013 //   Public License as published by the Free Software Foundation.
00014 //
00015 //   This program is distributed in the hope that it will be useful,
00016 //   but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 //   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018 //   GNU General Public License for more details.
00019 //
00020 //   You should have received a copy of version 2.0 of the GNU General
00021 //   Public License along with this program; if not, write to the Free
00022 //   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00023 //   MA 02110-1301, USA.
00024 //
00025 //
00026 // ## END COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00027 //
00028 
00029 /*! \file rssimport.php
00030 */
00031 
00032 //include_once( 'kernel/classes/ezrssimport.php' );
00033 //include_once( 'kernel/classes/ezcontentclass.php' );
00034 //include_once( 'kernel/classes/ezcontentobject.php' );
00035 //include_once( 'kernel/classes/ezpersistentobject.php' );
00036 //include_once( 'kernel/classes/ezcontentobjecttreenode.php' );
00037 //include_once( 'kernel/classes/ezcontentobjectversion.php' );
00038 //include_once( 'lib/ezutils/classes/ezoperationhandler.php' );
00039 //include_once( "lib/ezdb/classes/ezdb.php" );
00040 //include_once( "lib/ezutils/classes/ezhttptool.php" );
00041 
00042 //For ezUser, we would make this the ezUser class id but otherwise just pick and choose.
00043 
00044 //fetch this class
00045 $rssImportArray = eZRSSImport::fetchActiveList();
00046 
00047 // Loop through all configured and active rss imports. If something goes wrong while processing them, continue to next import
00048 foreach ( $rssImportArray as $rssImport )
00049 {
00050     // Get RSSImport object
00051     $rssSource = $rssImport->attribute( 'url' );
00052     $addCount = 0;
00053 
00054     if ( !$isQuiet )
00055     {
00056         $cli->output( 'RSSImport '.$rssImport->attribute( 'name' ).': Starting.' );
00057     }
00058 
00059     $xmlData = eZHTTPTool::getDataByURL( $rssSource, false, 'eZ Publish RSS Import' );
00060     if ( $xmlData === false )
00061     {
00062         if ( !$isQuiet )
00063         {
00064             $cli->output( 'RSSImport '.$rssImport->attribute( 'name' ).': Failed to open RSS feed file: '.$rssSource );
00065         }
00066         continue;
00067     }
00068 
00069     // Create DomDocument from http data
00070     $domDocument = new DOMDocument( '1.0', 'utf-8' );
00071     $success = $domDocument->loadXML( $xmlData );
00072 
00073     if ( !$success )
00074     {
00075         if ( !$isQuiet )
00076         {
00077             $cli->output( 'RSSImport '.$rssImport->attribute( 'name' ).': Invalid RSS document.' );
00078         }
00079         continue;
00080     }
00081 
00082     $root = $domDocument->documentElement;
00083 
00084     switch( $root->getAttribute( 'version' ) )
00085     {
00086         default:
00087         case '1.0':
00088         {
00089             $version = '1.0';
00090         } break;
00091 
00092         case '0.91':
00093         case '0.92':
00094         case '2.0':
00095         {
00096             $version = $root->getAttribute( 'version' );
00097         } break;
00098     }
00099 
00100     $importDescription = $rssImport->importDescription();
00101     if ( $version != $importDescription['rss_version'] )
00102     {
00103         if ( !$isQuiet )
00104         {
00105             $cli->output( 'RSSImport '.$rssImport->attribute( 'name' ).': Invalid RSS version missmatch. Please reconfigure import.' );
00106         }
00107         continue;
00108     }
00109 
00110     switch( $root->getAttribute( 'version' ) )
00111     {
00112         default:
00113         case '1.0':
00114         {
00115             rssImport1( $root, $rssImport, $cli );
00116         } break;
00117 
00118         case '0.91':
00119         case '0.92':
00120         case '2.0':
00121         {
00122             rssImport2( $root, $rssImport, $cli );
00123         } break;
00124     }
00125 
00126 }
00127 
00128 //include_once( 'kernel/classes/ezstaticcache.php' );
00129 eZStaticCache::executeActions();
00130 
00131 /*!
00132   Parse RSS 1.0 feed
00133 
00134   \param DOM root node
00135   \param RSS Import item
00136   \param cli
00137 */
00138 function rssImport1( $root, $rssImport, $cli )
00139 {
00140     global $isQuiet;
00141 
00142     $addCount = 0;
00143 
00144     // Get all items in rss feed
00145     $itemArray = $root->getElementsByTagName( 'item' );
00146     $channel = $root->getElementsByTagName( 'channel' )->item( 0 );
00147 
00148     // Loop through all items in RSS feed
00149     foreach ( $itemArray as $item )
00150     {
00151         $addCount += importRSSItem( $item, $rssImport, $cli, $channel );
00152     }
00153 
00154     if ( !$isQuiet )
00155     {
00156         $cli->output( 'RSSImport '.$rssImport->attribute( 'name' ).': End. '.$addCount.' objects added' );
00157     }
00158 
00159 }
00160 
00161 /*!
00162   Parse RSS 2.0 feed
00163 
00164   \param DOM root node
00165   \param RSS Import item
00166   \param cli
00167 */
00168 function rssImport2( $root, $rssImport, $cli )
00169 {
00170     global $isQuiet;
00171 
00172     $addCount = 0;
00173 
00174     // Get all items in rss feed
00175     $channel = $root->getElementsByTagName( 'channel' )->item( 0 );
00176 
00177     // Loop through all items in RSS feed
00178     foreach ( $channel->getElementsByTagName( 'item' ) as $item )
00179     {
00180         $addCount += importRSSItem( $item, $rssImport, $cli, $channel );
00181     }
00182 
00183     if ( !$isQuiet )
00184     {
00185         $cli->output( 'RSSImport '.$rssImport->attribute( 'name' ).': End. '.$addCount.' objects added' );
00186     }
00187 
00188 }
00189 
00190 /*!
00191  Import specifiec rss item into content tree
00192 
00193  \param RSS item xml element
00194  \param $rssImport Object
00195  \param cli
00196  \param channel
00197 
00198  \return 1 if object added, 0 if not
00199 */
00200 function importRSSItem( $item, $rssImport, $cli, $channel )
00201 {
00202     global $isQuiet;
00203     $rssImportID = $rssImport->attribute( 'id' );
00204     $rssOwnerID = $rssImport->attribute( 'object_owner_id' ); // Get owner user id
00205     $parentContentObjectTreeNode = eZContentObjectTreeNode::fetch( $rssImport->attribute( 'destination_node_id' ) ); // Get parent treenode object
00206 
00207     if ( $parentContentObjectTreeNode == null )
00208     {
00209         if ( !$isQuiet )
00210         {
00211             $cli->output( 'RSSImport '.$rssImport->attribute( 'name' ).': Destination tree node seems to be unavailable' );
00212         }
00213         return 0;
00214     }
00215 
00216     $parentContentObject = $parentContentObjectTreeNode->attribute( 'object' ); // Get parent content object
00217     $titleElement = $item->getElementsByTagName( 'title' )->item( 0 );
00218     $title = is_object( $titleElement ) ? $titleElement->textContent : '';
00219 
00220     // Test for link or guid as unique identifier
00221     $link = $item->getElementsByTagName( 'link' )->item( 0 );
00222     $guid = $item->getElementsByTagName( 'guid' )->item( 0 );
00223     if ( $link->textContent )
00224     {
00225         $md5Sum = md5( $link->textContent );
00226     }
00227     elseif ( $guid->textContent )
00228     {
00229         $md5Sum = md5( $guid->textContent );
00230     }
00231     else
00232     {
00233         if ( !$isQuiet )
00234         {
00235             $cli->output( 'RSSImport '.$rssImport->attribute( 'name' ).': Item has no unique identifier. RSS guid or link missing.' );
00236         }
00237         return 0;
00238     }
00239 
00240     // Try to fetch RSSImport object with md5 sum matching link.
00241     $existingObject = eZPersistentObject::fetchObject( eZContentObject::definition(), null,
00242                                                        array( 'remote_id' => 'RSSImport_'.$rssImportID.'_'.$md5Sum ) );
00243 
00244     // if object exists, continue to next import item
00245     if ( $existingObject != null )
00246     {
00247         if ( !$isQuiet )
00248         {
00249             $cli->output( 'RSSImport '.$rssImport->attribute( 'name' ).': Object ( ' . $existingObject->attribute( 'id' ) . ' ) with URL: '.$linkURL.' already exists' );
00250         }
00251         unset( $existingObject ); // delete object to preserve memory
00252         return 0;
00253     }
00254 
00255     // Fetch class, and create ezcontentobject from it.
00256     $contentClass = eZContentClass::fetch( $rssImport->attribute( 'class_id' )  );
00257 
00258     // Instantiate the object with user $rssOwnerID and use section id from parent. And store it.
00259     $contentObject = $contentClass->instantiate( $rssOwnerID, $parentContentObject->attribute( 'section_id' ) );
00260 
00261     $db = eZDB::instance();
00262     $db->begin();
00263     $contentObject->store();
00264     $contentObjectID = $contentObject->attribute( 'id' );
00265 
00266     // Create node assignment
00267     $nodeAssignment = eZNodeAssignment::create( array( 'contentobject_id' => $contentObjectID,
00268                                                        'contentobject_version' => $contentObject->attribute( 'current_version' ),
00269                                                        'is_main' => 1,
00270                                                        'parent_node' => $parentContentObjectTreeNode->attribute( 'node_id' ) ) );
00271     $nodeAssignment->store();
00272 
00273     $version = $contentObject->version( 1 );
00274     $version->setAttribute( 'status', eZContentObjectVersion::STATUS_DRAFT );
00275     $version->store();
00276 
00277     // Get object attributes, and set their values and store them.
00278     $dataMap = $contentObject->dataMap();
00279     $importDescription = $rssImport->importDescription();
00280 
00281     // Set content object attribute values.
00282     $classAttributeList = $contentClass->fetchAttributes();
00283     foreach( $classAttributeList as $classAttribute )
00284     {
00285         $classAttributeID = $classAttribute->attribute( 'id' );
00286         if ( isset( $importDescription['class_attributes'][$classAttributeID] ) )
00287         {
00288             if ( $importDescription['class_attributes'][$classAttributeID] == '-1' )
00289             {
00290                 continue;
00291             }
00292 
00293             $importDescriptionArray = explode( ' - ', $importDescription['class_attributes'][$classAttributeID] );
00294             if ( count( $importDescriptionArray ) < 1 )
00295             {
00296                 $cli->output( 'RSSImport '.$rssImport->attribute( 'name' ).': Invalid import definition. Please redit.' );
00297                 break;
00298             }
00299 
00300             $elementType = $importDescriptionArray[0];
00301             array_shift( $importDescriptionArray );
00302             switch( $elementType )
00303             {
00304                 case 'item':
00305                 {
00306                     setObjectAttributeValue( $dataMap[$classAttribute->attribute( 'identifier' )],
00307                                              recursiveFindRSSElementValue( $importDescriptionArray,
00308                                                                            $item ) );
00309                 } break;
00310 
00311                 case 'channel':
00312                 {
00313                     setObjectAttributeValue( $dataMap[$classAttribute->attribute( 'identifier' )],
00314                                              recursiveFindRSSElementValue( $importDescriptionArray,
00315                                                                            $channel ) );
00316                 } break;
00317             }
00318         }
00319     }
00320 
00321     $contentObject->setAttribute( 'remote_id', 'RSSImport_'.$rssImportID.'_'. $md5Sum );
00322     $contentObject->store();
00323     $db->commit();
00324 
00325     // Publish new object. The user id is sent to make sure any workflow
00326     // requiring the user id has access to it.
00327     $operationResult = eZOperationHandler::execute( 'content', 'publish', array( 'object_id' => $contentObject->attribute( 'id' ),
00328                                                                                  'version' => 1,
00329                                                                                  'user_id' => $rssOwnerID ) );
00330 
00331     if ( !isset( $operationResult['status'] ) || $operationResult['status'] != eZModuleOperationInfo::STATUS_CONTINUE )
00332     {
00333         if ( isset( $operationResult['result'] ) && isset( $operationResult['result']['content'] ) )
00334             $failReason = $operationResult['result']['content'];
00335         else
00336             $failReason = "unknown error";
00337         $cli->error( "Publishing failed: $failReason" );
00338         unset( $failReason );
00339     }
00340 
00341     $db->begin();
00342     unset( $contentObject );
00343     unset( $version );
00344     $contentObject = eZContentObject::fetch( $contentObjectID );
00345     $version = $contentObject->attribute( 'current' );
00346     // Set object Attributes like modified and published timestamps
00347     $objectAttributeDescription = $importDescription['object_attributes'];
00348     foreach( $objectAttributeDescription as $identifier => $objectAttributeDefinition )
00349     {
00350         if ( $objectAttributeDefinition == '-1' )
00351         {
00352             continue;
00353         }
00354 
00355         $importDescriptionArray = explode( ' - ', $objectAttributeDefinition );
00356 
00357         $elementType = $importDescriptionArray[0];
00358         array_shift( $importDescriptionArray );
00359         switch( $elementType )
00360         {
00361             default:
00362             case 'item':
00363             {
00364                 $domNode = $item;
00365             } break;
00366 
00367             case 'channel':
00368             {
00369                 $domNode = $channel;
00370             } break;
00371         }
00372 
00373         switch( $identifier )
00374         {
00375             case 'modified':
00376             {
00377                 $dateTime = recursiveFindRSSElementValue( $importDescriptionArray,
00378                                                           $domNode );
00379                 if ( !$dateTime )
00380                 {
00381                     break;
00382                 }
00383                 $contentObject->setAttribute( $identifier, strtotime( $dateTime ) );
00384                 $version->setAttribute( $identifier, strtotime( $dateTime ) );
00385             } break;
00386 
00387             case 'published':
00388             {
00389                 $dateTime = recursiveFindRSSElementValue( $importDescriptionArray,
00390                                                           $domNode );
00391                 if ( !$dateTime )
00392                 {
00393                     break;
00394                 }
00395                 $contentObject->setAttribute( $identifier, strtotime( $dateTime ) );
00396                 $version->setAttribute( 'created', strtotime( $dateTime ) );
00397             } break;
00398         }
00399     }
00400     $version->store();
00401     $contentObject->store();
00402     $db->commit();
00403 
00404     if ( !$isQuiet )
00405     {
00406         $cli->output( 'RSSImport '.$rssImport->attribute( 'name' ).': Object created; ' . $title );
00407     }
00408 
00409     return 1;
00410 }
00411 
00412 function recursiveFindRSSElementValue( $importDescriptionArray, $xmlDomNode )
00413 {
00414     if ( !is_array( $importDescriptionArray ) )
00415     {
00416         return false;
00417     }
00418 
00419     $valueType = $importDescriptionArray[0];
00420     array_shift( $importDescriptionArray );
00421     switch( $valueType )
00422     {
00423         case 'elements':
00424         {
00425             if ( count( $importDescriptionArray ) == 1 )
00426             {
00427                 $element = $xmlDomNode->getElementsByTagName( $importDescriptionArray[0] )->item( 0 );
00428 
00429                 $resultText = is_object( $element ) ? $element->textContent : false;
00430                 return $resultText;
00431             }
00432             else
00433             {
00434                 $elementName = $importDescriptionArray[0];
00435                 array_shift( $importDescriptionArray );
00436                 return recursiveFindRSSElementValue( $importDescriptionArray, $xmlDomNode->getElementsByTagName( $elementName )->item( 0 ) );
00437             }
00438         }
00439 
00440         case 'attributes':
00441         {
00442             return $xmlDomNode->getAttribute( $importDescriptionArray[0] );
00443         } break;
00444     }
00445 }
00446 
00447 function setObjectAttributeValue( $objectAttribute, $value )
00448 {
00449     if ( $value === false )
00450     {
00451         return;
00452     }
00453 
00454     $dataType = $objectAttribute->attribute( 'data_type_string' );
00455     switch( $dataType )
00456     {
00457         case 'ezxmltext':
00458         {
00459             setEZXMLAttribute( $objectAttribute, $value );
00460         } break;
00461 
00462         case 'ezurl':
00463         {
00464             $objectAttribute->setContent( $value );
00465         } break;
00466 
00467         case 'ezkeyword':
00468         {
00469             //include_once( 'kernel/classes/datatypes/ezkeyword/ezkeyword.php' );
00470             $keyword = new eZKeyword();
00471             $keyword->initializeKeyword( $value );
00472             $objectAttribute->setContent( $keyword );
00473         } break;
00474 
00475         default:
00476         {
00477             $objectAttribute->setAttribute( 'data_text', $value );
00478         } break;
00479     }
00480 
00481     $objectAttribute->store();
00482 }
00483 
00484 function setEZXMLAttribute( $attribute, $attributeValue, $link = false )
00485 {
00486     //include_once( 'kernel/classes/datatypes/ezxmltext/handlers/input/ezsimplifiedxmlinputparser.php' );
00487     $contentObjectID = $attribute->attribute( "contentobject_id" );
00488     $parser = new eZSimplifiedXMLInputParser( $contentObjectID, false, 0, false );
00489 
00490     $attributeValue = str_replace( "\r", '', $attributeValue );
00491     $attributeValue = str_replace( "\n", '', $attributeValue );
00492     $attributeValue = str_replace( "\t", ' ', $attributeValue );
00493 
00494     $document = $parser->process( $attributeValue );
00495     if ( !is_object( $document ) )
00496     {
00497         $cli = eZCLI::instance();
00498         $cli->output( 'Error in xml parsing' );
00499         return;
00500     }
00501     $domString = eZXMLTextType::domString( $document );
00502 
00503     $attribute->setAttribute( 'data_text', $domString );
00504     $attribute->store();
00505 }
00506 
00507 ?>