eZ Publish  [4.0]
ezsimplifiedxmlinputparser.php
Go to the documentation of this file.
00001 <?php
00002 //
00003 // Definition of eZSimplifiedXMLInputParser class
00004 //
00005 // Created on: <27-Mar-2006 15:28:39 ks>
00006 //
00007 // ## BEGIN COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00008 // SOFTWARE NAME: eZ Publish
00009 // SOFTWARE RELEASE: 4.0.x
00010 // COPYRIGHT NOTICE: Copyright (C) 1999-2008 eZ Systems AS
00011 // SOFTWARE LICENSE: GNU General Public License v2.0
00012 // NOTICE: >
00013 //   This program is free software; you can redistribute it and/or
00014 //   modify it under the terms of version 2.0  of the GNU General
00015 //   Public License as published by the Free Software Foundation.
00016 //
00017 //   This program is distributed in the hope that it will be useful,
00018 //   but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 //   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020 //   GNU General Public License for more details.
00021 //
00022 //   You should have received a copy of version 2.0 of the GNU General
00023 //   Public License along with this program; if not, write to the Free
00024 //   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
00025 //   MA 02110-1301, USA.
00026 //
00027 //
00028 // ## END COPYRIGHT, LICENSE AND WARRANTY NOTICE ##
00029 //
00030 
00031 // if ( !class_exists( 'eZXMLInputParser' ) )
00032     //include_once( 'kernel/classes/datatypes/ezxmltext/ezxmlinputparser.php' );
00033 
00034 class eZSimplifiedXMLInputParser extends eZXMLInputParser
00035 {
00036     public $InputTags = array(
00037         'b'       => array( 'name' => 'strong' ),
00038         'bold'    => array( 'name' => 'strong' ),
00039         'i'       => array( 'name' => 'emphasize' ),
00040         'em'      => array( 'name' => 'emphasize' ),
00041         'h'       => array( 'name' => 'header' ),
00042         'p'       => array( 'name' => 'paragraph' ),
00043         'para'    => array( 'name' => 'paragraph' ),
00044         'br'      => array( 'name' => 'br',
00045                             'noChildren' => true ),
00046         'a'       => array( 'name' => 'link' ),
00047         );
00048 
00049     public $OutputTags = array(
00050         'section'   => array(),
00051 
00052         'embed'     => array( //'parsingHandler' => 'breakInlineFlow',
00053                               'structHandler' => 'appendLineParagraph',
00054                               'publishHandler' => 'publishHandlerEmbed',
00055                               'attributes' => array( 'id' => 'xhtml:id' ),
00056                               'requiredInputAttributes' => array( 'href' ) ),
00057 
00058         'embed-inline'     => array( //'parsingHandler' => 'breakInlineFlow',
00059                               'structHandler' => 'appendLineParagraph',
00060                               'publishHandler' => 'publishHandlerEmbed',
00061                               'attributes' => array( 'id' => 'xhtml:id' ),
00062                               'requiredInputAttributes' => array( 'href' ) ),
00063 
00064         'object'    => array( //'parsingHandler' => 'breakInlineFlow',
00065                               'structHandler' => 'appendLineParagraph',
00066                               'publishHandler' => 'publishHandlerObject',
00067                               'attributes' => array( 'href' => 'image:ezurl_href',
00068                                                      'target' => 'image:ezurl_target',
00069                                                      'ezurl_href' => 'image:ezurl_href',
00070                                                      'ezurl_id' => 'image:ezurl_id',
00071                                                      'ezurl_target' => 'image:ezurl_target' ),
00072                               'requiredInputAttributes' => array( 'id' ) ),
00073 
00074         'table'     => array( 'structHandler' => 'appendParagraph' ),
00075 
00076         'tr'        => array(),
00077 
00078         'td'        => array( 'attributes' => array( 'width' => 'xhtml:width',
00079                                                      'colspan' => 'xhtml:colspan',
00080                                                      'rowspan' => 'xhtml:rowspan' ) ),
00081 
00082         'th'        => array( 'attributes' => array( 'width' => 'xhtml:width',
00083                                                      'colspan' => 'xhtml:colspan',
00084                                                      'rowspan' => 'xhtml:rowspan' ) ),
00085 
00086         'ol'        => array( 'structHandler' => 'structHandlerLists' ),
00087 
00088         'ul'        => array( 'structHandler' => 'structHandlerLists' ),
00089 
00090         'li'        => array( 'autoCloseOn' => array( 'li' ) ),
00091 
00092         'header'    => array( 'autoCloseOn' => array( 'paragraph' ),
00093                               'structHandler' => 'structHandlerHeader' ),
00094 
00095         'paragraph' => array( 'autoCloseOn' => array( 'paragraph' ),
00096                               'publishHandler' => 'publishHandlerParagraph' ),
00097 
00098         'line'      => array(),
00099 
00100         'br'        => array( 'parsingHandler' => 'breakInlineFlow',
00101                               'structHandler' => 'structHandlerBr',
00102                               'attributes' => false ),
00103 
00104         'literal'   => array( 'parsingHandler' => 'parsingHandlerLiteral',
00105                               'structHandler' => 'appendParagraph' ),
00106 
00107         'strong'    => array( 'structHandler' => 'appendLineParagraph' ),
00108 
00109         'emphasize' => array( 'structHandler' => 'appendLineParagraph' ),
00110 
00111         'link'      => array( 'structHandler' => 'appendLineParagraph',
00112                               'publishHandler' => 'publishHandlerLink',
00113                               'attributes' => array( 'title' => 'xhtml:title',
00114                                                      'id' => 'xhtml:id' ),
00115                               'requiredInputAttributes' => array( 'href' ) ),
00116 
00117         'anchor'    => array( 'structHandler' => 'appendLineParagraph' ),
00118 
00119         'custom'    => array( 'structHandler' => 'structHandlerCustom',
00120                               'publishHandler' => 'publishHandlerCustom',
00121                               'requiredInputAttributes' => array( 'name' ) ),
00122 
00123         '#text'     => array( 'structHandler' => 'structHandlerText' )
00124         );
00125 
00126     function eZSimplifiedXMLInputParser( $contentObjectID, $validateErrorLevel = eZXMLInputParser::ERROR_ALL, $detectErrorLevel = eZXMLInputParser::ERROR_ALL,
00127                                          $parseLineBreaks = false, $removeDefaultAttrs = false )
00128     {
00129         $this->contentObjectID = $contentObjectID;
00130         $this->eZXMLInputParser( $validateErrorLevel, $detectErrorLevel, $parseLineBreaks, $removeDefaultAttrs );
00131     }
00132 
00133     /*
00134         Parsing Handlers (called at pass 1)
00135     */
00136     function parsingHandlerLiteral( $element, &$param )
00137     {
00138         $ret = null;
00139         $data = $param[0];
00140         $pos =& $param[1];
00141 
00142         $tablePos = strpos( $data, '</literal>', $pos );
00143         if ( $tablePos === false )
00144         {
00145             $tablePos = strpos( $data, '</LITERAL>', $pos );
00146         }
00147 
00148         if ( $tablePos === false )
00149         {
00150             return $ret;
00151         }
00152 
00153         $text = substr( $data, $pos, $tablePos - $pos );
00154 
00155         $textNode = $this->Document->createTextNode( $text );
00156         $element->appendChild( $textNode );
00157 
00158         $pos = $tablePos + strlen( '</literal>' );
00159         $ret = false;
00160 
00161         return $ret;
00162     }
00163 
00164     function breakInlineFlow( $element, $param )
00165     {
00166         // Breaks the flow of inline tags. Used for non-inline tags caught within inline.
00167         // Works for tags with no children only.
00168         $ret = null;
00169         $data =& $param[0];
00170         $pos =& $param[1];
00171         $tagBeginPos = $param[2];
00172         $parent = $element->parentNode;
00173 
00174         $wholeTagString = substr( $data, $tagBeginPos, $pos - $tagBeginPos );
00175 
00176         if ( $parent &&
00177              $this->XMLSchema->isInline( $parent ) )
00178         {
00179             $insertData = '';
00180             $currentParent = $parent;
00181             // Close all parent tags
00182             end( $this->ParentStack );
00183             do
00184             {
00185                 $stackData = current( $this->ParentStack );
00186                 $currentParentName = $stackData[0];
00187                 $insertData .= "</$currentParentName>";
00188                 $currentParent->setAttributeNS( 'http://ez.no/namespaces/ezpublish3/temporary/', 'tmp:new-element', 'true' );
00189                 $currentParent = $currentParent->parentNode;
00190                 prev( $this->ParentStack );
00191             }
00192             while( $this->XMLSchema->isInline( $currentParent ) );
00193 
00194             $insertData .= $wholeTagString;
00195 
00196             $currentParent = $parent;
00197             end( $this->ParentStack );
00198             $appendData = '';
00199             do
00200             {
00201                 $stackData = current( $this->ParentStack );
00202                 $currentParentName = $stackData[0];
00203                 $currentParentAttrString = '';
00204                 if ( $stackData[2] )
00205                 {
00206                     $currentParentAttrString = ' ' . $stackData[2];
00207                 }
00208                 $currentParentAttrString .= " tmp:new-element='true'";
00209                 $appendData = "<$currentParentName$currentParentAttrString>" . $appendData;
00210                 $currentParent = $currentParent->parentNode;
00211                 prev( $this->ParentStack );
00212             }
00213             while( $this->XMLSchema->isInline( $currentParent ) );
00214 
00215             $insertData .= $appendData;
00216 
00217             $data = $insertData . substr( $data, $pos );
00218             $pos = 0;
00219             $element = $parent->removeChild( $element );
00220             $ret = false;
00221         }
00222 
00223         return $ret;
00224     }
00225 
00226 
00227     /*
00228         Structure handlers. (called at pass 2)
00229     */
00230     // Structure handler for inline nodes.
00231     function appendLineParagraph( $element, $newParent )
00232     {
00233         eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', $newParent, 'eZSimplifiedXMLInputParser::appendLineParagraph new parent' );
00234         $ret = array();
00235         $parent = $element->parentNode;
00236         if ( !$parent instanceof DOMElement )
00237         {
00238             return $ret;
00239         }
00240 
00241         $parentName = $parent->nodeName;
00242         $newParentName = $newParent != null ? $newParent->nodeName : '';
00243 
00244         // Correct structure by adding <line> and <paragraph> tags.
00245         if ( $parentName == 'line' || $this->XMLSchema->isInline( $parent ) )
00246         {
00247             return $ret;
00248         }
00249 
00250         if ( $newParentName == 'line' )
00251         {
00252             $element = $parent->removeChild( $element );
00253             $newParent->appendChild( $element );
00254             $newLine = $newParent;
00255             $ret['result'] = $newParent;
00256         }
00257         elseif ( $parentName == 'paragraph' )
00258         {
00259             $newLine = $this->createAndPublishElement( 'line', $ret );
00260             $element = $parent->replaceChild( $newLine, $element );
00261             $newLine->appendChild( $element );
00262             $ret['result'] = $newLine;
00263         }
00264         elseif ( $newParentName == 'paragraph' )
00265         {
00266             $newLine = $this->createAndPublishElement( 'line', $ret );
00267             $element = $parent->removeChild( $element );
00268             $newParent->appendChild( $newLine );
00269             $newLine->appendChild( $element );
00270             $ret['result'] = $newLine;
00271         }
00272         elseif ( $this->XMLSchema->check( $parent, 'paragraph' ) )
00273         {
00274             $newLine = $this->createAndPublishElement( 'line', $ret );
00275             $newPara = $this->createAndPublishElement( 'paragraph', $ret );
00276             $element = $parent->replaceChild( $newPara, $element );
00277             $newPara->appendChild( $newLine );
00278             $newLine->appendChild( $element );
00279             $ret['result'] = $newLine;
00280         }
00281 
00282         return $ret;
00283     }
00284 
00285     // Structure handler for temporary <br> elements
00286     function structHandlerBr( $element, $newParent )
00287     {
00288         $ret = array();
00289         $ret['result'] = $newParent;
00290         $parent = $element->parentNode;
00291 
00292         $next = $element->nextSibling;
00293 
00294         if ( $element->getAttribute( 'ignore' ) != 'true' &&
00295              $next &&
00296              $next->nodeName == 'br' )
00297         {
00298             if ( $this->XMLSchema->check( $parent, 'paragraph' ) )
00299             {
00300                 if ( !$newParent )
00301                 {
00302                     // create paragraph in case of the first empty paragraph
00303                     $newPara = $this->createAndPublishElement( 'paragraph', $ret );
00304                     $parent->replaceChild( $newPara, $element );
00305                 }
00306                 elseif ( $newParent->nodeName == 'paragraph' ||
00307                          $newParent->nodeName == 'line' )
00308                 {
00309                     // break paragraph or line flow
00310                     unset( $ret );
00311                     $ret = array();
00312 
00313                     // Do not process next <br> tag
00314                     $next->setAttribute( 'ignore', 'true' );
00315 
00316                     // create paragraph in case of the last empty paragraph (not inside section)
00317                     $nextToNext = $next->nextSibling;
00318                     $tmp = $parent;
00319                     while( !$nextToNext && $tmp && $tmp->nodeName == 'section' )
00320                     {
00321                         $nextToNext = $tmp->nextSibling;
00322                         $tmp = $tmp->parentNode;
00323                     }
00324                     if ( !$nextToNext )
00325                     {
00326                         $newPara = $this->createAndPublishElement( 'paragraph', $ret );
00327                         $parent->replaceChild( $newPara, $element );
00328                     }
00329                 }
00330             }
00331         }
00332         else
00333         {
00334             if ( $newParent && $newParent->nodeName == 'line' )
00335             {
00336                 $ret['result'] = $newParent->parentNode;
00337             }
00338         }
00339 
00340         // Trim spaces used for tag indenting
00341         if ( $next && $next->nodeType == XML_TEXT_NODE && !trim( $next->textContent ) )
00342         {
00343             $nextToNext = $next->nextSibling;
00344             if ( !$nextToNext || $nextToNext->nodeName != 'br' )
00345             {
00346                 $next = $parent->removeChild( $next );
00347             }
00348         }
00349         return $ret;
00350     }
00351 
00352     // Structure handler for in-paragraph nodes.
00353     function appendParagraph( $element, &$newParent )
00354     {
00355         $ret = array();
00356         $parent = $element->parentNode;
00357         if ( !$parent )
00358         {
00359             return $ret;
00360         }
00361 
00362         $parentName = $parent->nodeName;
00363 
00364         if ( $parentName != 'paragraph' )
00365         {
00366             if ( $newParent && $newParent->nodeName == 'paragraph' )
00367             {
00368                 $element = $parent->removeChild( $element );
00369                 $newParent->appendChild( $element );
00370                 $ret['result'] = $newParent;
00371             }
00372             elseif ( $newParent && $newParent->parentNode && $newParent->parentNode->nodeName == 'paragraph' )
00373             {
00374                 $para = $newParent->parentNode;
00375                 $element = $parent->removeChild( $element );
00376                 $para->appendChild( $element );
00377                 $ret['result'] = $newParent->parentNode;
00378             }
00379             elseif ( $this->XMLSchema->check( $parentName, 'paragraph' ) )
00380             {
00381                 $newPara = $this->createAndPublishElement( 'paragraph', $ret );
00382                 $parent->replaceChild( $newPara, $element );
00383                 $newPara->appendChild( $element );
00384                 $ret['result'] = $newPara;
00385             }
00386         }
00387         return $ret;
00388     }
00389 
00390     // Structure handler for 'header' tag.
00391     function structHandlerHeader( $element, &$param )
00392     {
00393         $ret = null;
00394         $parent = $element->parentNode;
00395         $level = $element->getAttribute( 'level' );
00396         if ( !$level )
00397         {
00398             $level = 1;
00399         }
00400 
00401         $element->removeAttribute( 'level' );
00402         if ( $level )
00403         {
00404             $sectionLevel = -1;
00405             $current = $element;
00406             while( $current->parentNode )
00407             {
00408                 $current = $current->parentNode;
00409                 if ( $current->nodeName == 'section' )
00410                 {
00411                     $sectionLevel++;
00412                 }
00413                 else
00414                 {
00415                     if ( $current->nodeName == 'td' )
00416                     {
00417                         $sectionLevel++;
00418                         break;
00419                     }
00420                 }
00421             }
00422             if ( $level > $sectionLevel )
00423             {
00424                 if ( $this->StrictHeaders &&
00425                      $level - $sectionLevel > 1 )
00426                 {
00427                     $this->handleError( eZXMLInputParser::ERROR_SCHEMA,
00428                                         ezi18n( 'kernel/classes/datatypes/ezxmltext', "Incorrect headers nesting" ) );
00429                 }
00430 
00431                 $newParent = $parent;
00432                 for ( $i = $sectionLevel; $i < $level; $i++ )
00433                 {
00434                    $newSection = $this->Document->createElement( 'section' );
00435                    if ( $i == $sectionLevel )
00436                    {
00437                        $newSection = $newParent->insertBefore( $newSection, $element );
00438                    }
00439                    else
00440                    {
00441                        $newParent->appendChild( $newSection );
00442                    }
00443 
00444                    $newParent = $newSection;
00445                    unset( $newSection );
00446                 }
00447                 $elementToMove = $element;
00448                 while( $elementToMove &&
00449                        $elementToMove->nodeName != 'section' )
00450                 {
00451                     $next = $elementToMove->nextSibling;
00452                     $elementToMove = $parent->removeChild( $elementToMove );
00453                     $newParent->appendChild( $elementToMove );
00454                     $elementToMove = $next;
00455 
00456                     if ( $elementToMove && $elementToMove->nodeName == 'header' )
00457                     {
00458                         // in the case of non-strict headers
00459                         $headerLevel = $elementToMove->getAttribute( 'level' );
00460                         if ( $level - $sectionLevel > 1 )
00461                         {
00462                             if ( $headerLevel == $level )
00463                             {
00464                                 $newParent2 = $this->Document->createElement( 'section' );
00465                                 $newParent->parentNode->appendChild( $newParent2 );
00466                                 $newParent = $newParent2;
00467                             }
00468                             elseif ( $headerLevel < $level )
00469                             {
00470                                 break;
00471                             }
00472                         }
00473                         else
00474                         {
00475                             if ( $headerLevel <= $level )
00476                             {
00477                                 break;
00478                             }
00479                         }
00480                     }
00481                 }
00482             }
00483             elseif ( $level < $sectionLevel )
00484             {
00485                 $newLevel = $sectionLevel + 1;
00486                 $current = $element;
00487                 while( $level < $newLevel )
00488                 {
00489                     $current = $current->parentNode;
00490                     if ( $current->nodeName == 'section' )
00491                     {
00492                         $newLevel--;
00493                     }
00494                 }
00495                 $elementToMove = $element;
00496                 while( $elementToMove &&
00497                        $elementToMove->nodeName != 'section' )
00498                 {
00499                     $next = $elementToMove->nextSibling;
00500                     $elementToMove = $parent->removeChild( $elementToMove );
00501                     $current->appendChild( $elementToMove );
00502                     $elementToMove = $next;
00503 
00504                     if ( $elementToMove->nodeName == 'header' &&
00505                          $elementToMove->getAttribute( 'level' ) <= $level )
00506                     {
00507                         break;
00508                     }
00509                 }
00510             }
00511         }
00512         return $ret;
00513     }
00514 
00515     // Structure handler for 'custom' tag.
00516     function structHandlerCustom( $element, &$params )
00517     {
00518         $ret = null;
00519         if ( $this->XMLSchema->isInline( $element ) )
00520         {
00521             $ret = $this->appendLineParagraph( $element, $params );
00522         }
00523         else
00524         {
00525             $ret = $this->appendParagraph( $element, $params );
00526         }
00527         return $ret;
00528     }
00529 
00530     // Structure handler for 'ul' and 'ol' tags.
00531     function structHandlerLists( $element, &$params )
00532     {
00533         $ret = array();
00534         $parent = $element->parentNode;
00535         $parentName = $parent->nodeName;
00536 
00537         if ( $parentName == 'paragraph' )
00538         {
00539             return $ret;
00540         }
00541 
00542         // If we are inside a list
00543         if ( $parentName == 'ol' || $parentName == 'ul' )
00544         {
00545             // If previous 'li' doesn't exist, create it,
00546             // else append to the previous 'li' element.
00547             $prev = $element->previousSibling;
00548             if ( !$prev )
00549             {
00550                 $li = $this->Document->createElement( 'li' );
00551                 $li = $parent->insertBefore( $li, $element );
00552                 $element = $parent->removeChild( $element );
00553                 $li->appendChild( $element );
00554             }
00555             else
00556             {
00557                 $lastChild = $prev->lastChild;
00558                 if ( $lastChild->nodeName != 'paragraph' )
00559                 {
00560                     $para = $this->Document->createElement( 'paragraph' );
00561                     $element = $parent->removeChild( $element );
00562                     $prev->appendChild( $element );
00563                     $ret['result'] = $para;
00564                 }
00565                 else
00566                 {
00567                     $element = $parent->removeChild( $element );
00568                     $lastChild->appendChild( $element );
00569                     $ret['result'] = $lastChild;
00570                 }
00571                 return $ret;
00572             }
00573         }
00574         if ( $parentName == 'li' )
00575         {
00576             $prev = $element->previousSibling;
00577             if ( $prev )
00578             {
00579                 $element = $parent->removeChild( $element );
00580                 $prev->appendChild( $element );
00581                 $ret['result'] = $prev;
00582                 return $ret;
00583             }
00584         }
00585         $ret = $this->appendParagraph( $element, $params );
00586 
00587         return $ret;
00588     }
00589 
00590     // Structure handler for #text
00591     function structHandlerText( $element, &$newParent )
00592     {
00593         $ret = null;
00594         $parent = $element->parentNode;
00595         if ( !$parent )
00596         {
00597             return $ret;
00598         }
00599 
00600         // Remove empty text elements
00601         if ( $element->textContent == '' )
00602         {
00603             $element = $parent->removeChild( $element );
00604             return $ret;
00605         }
00606 
00607         $ret = $this->appendLineParagraph( $element, $newParent );
00608 
00609         // Left trim spaces:
00610         if ( $this->TrimSpaces )
00611         {
00612             $trim = false;
00613             $currentElement = $element;
00614 
00615             // Check if it is the first element in line
00616             do
00617             {
00618                 $prev = $currentElement->previousSibling;
00619                 if ( $prev )
00620                 {
00621                     break;
00622                 }
00623 
00624                 $currentElement = $currentElement->parentNode;
00625 
00626                 if ( $currentElement instanceof DOMElement &&
00627                      ( $currentElement->nodeName == 'line' ||
00628                        $currentElement->nodeName == 'paragraph' ) )
00629                 {
00630                     $trim = true;
00631                     break;
00632                 }
00633 
00634             } while ( $currentElement instanceof DOMElement );
00635 
00636             if ( $trim )
00637             {
00638                 // Trim and remove if empty
00639                 $element->textContent = ltrim( $element->textContent );
00640                 if ( $element->textContent == '' )
00641                 {
00642                     $parent = $element->parentNode;
00643                     $element = $parent->removeChild( $element );
00644                 }
00645             }
00646         }
00647 
00648         return $ret;
00649     }
00650 
00651     /*
00652         Publish handlers. (called at pass 2)
00653     */
00654     // Publish handler for 'paragraph' element.
00655     function publishHandlerParagraph( $element, &$params )
00656     {
00657         $ret = null;
00658         // Removes single line tag
00659         $line = $element->lastChild;
00660         if ( $element->childNodes->length == 1 && $line->nodeName == 'line' )
00661         {
00662             $lineChildren = array();
00663             $lineChildNodes = $line->childNodes;
00664             foreach ( $lineChildNodes as $lineChildNode )
00665             {
00666                 $lineChildren[] = $lineChildNode;
00667             }
00668 
00669             $line = $element->removeChild( $line );
00670             foreach ( $lineChildren as $lineChild )
00671             {
00672                 $element->appendChild( $lineChild );
00673             }
00674         }
00675 
00676         return $ret;
00677     }
00678 
00679     // Publish handler for 'link' element.
00680     function publishHandlerLink( $element, &$params )
00681     {
00682         $ret = null;
00683 
00684         $href = $element->getAttribute( 'href' );
00685 
00686         if ( $href )
00687         {
00688             if ( ereg( "^ezobject://[0-9]+(#.*)?$", $href ) )
00689             {
00690                 $url = strtok( $href, '#' );
00691                 $anchorName = strtok( '#' );
00692                 $objectID = substr( strrchr( $url, "/" ), 1 );
00693                 $element->setAttribute( 'object_id', $objectID );
00694 
00695                  if ( !in_array( $objectID, $this->linkedObjectIDArray ) )
00696                  {
00697                     $this->linkedObjectIDArray[] = $objectID;
00698                 }
00699             }
00700             elseif ( ereg( "^eznode://.+(#.*)?$" , $href ) )
00701             {
00702                 $objectID = null;
00703                 $url = strtok( $href, '#' );
00704                 $anchorName = strtok( '#' );
00705                 $nodePath = substr( strchr( $url, "/" ), 2 );
00706                 if ( ereg( "^[0-9]+$", $nodePath ) )
00707                 {
00708                     $nodeID = $nodePath;
00709                     $node = eZContentObjectTreeNode::fetch( $nodeID, false, false );
00710                     if ( !$node )
00711                     {
00712                         $this->handleError( eZXMLInputParser::ERROR_DATA,
00713                                             ezi18n( 'kernel/classes/datatypes/ezxmltext', "Node '%1' does not exist.", '', array( $nodeID ) ) );
00714                     }
00715                     else
00716                     {
00717                         $objectID = $node['contentobject_id'];
00718                     }
00719                 }
00720                 else
00721                 {
00722                     $node = eZContentObjectTreeNode::fetchByURLPath( $nodePath, false );
00723                     if ( !$node )
00724                     {
00725                         $this->handleError( eZXMLInputParser::ERROR_DATA,
00726                                             ezi18n( 'kernel/classes/datatypes/ezxmltext', "Node '%1' does not exist.", '', array( $nodePath ) ) );
00727                     }
00728                     else
00729                     {
00730                         $nodeID = $node['node_id'];
00731                         $objectID = $node['contentobject_id'];
00732                     }
00733                     $element->setAttribute( 'show_path', 'true' );
00734                 }
00735                 $element->setAttribute( 'node_id', $nodeID );
00736 
00737                 if ( $objectID && !in_array( $objectID, $this->linkedObjectIDArray ) )
00738                 {
00739                     $this->linkedObjectIDArray[] = $objectID;
00740                 }
00741             }
00742             elseif ( ereg( "^#.*$" , $href ) )
00743             {
00744                 $anchorName = substr( $href, 1 );
00745             }
00746             else
00747             {
00748                 //washing href. single and double quotes replaced with their urlencoded form
00749                 $href = str_replace( array('\'','"'), array('%27','%22'), $href );
00750 
00751                 $temp = explode( '#', $href );
00752                 $url = $temp[0];
00753                 if ( isset( $temp[1] ) )
00754                 {
00755                     $anchorName = $temp[1];
00756                 }
00757 
00758                 if ( $url )
00759                 {
00760                     // Protection from XSS attack
00761                     if ( preg_match( "/^(java|vb)script:.*/i" , $url ) )
00762                     {
00763                         $this->handleError( eZXMLInputParser::ERROR_DATA,
00764                                             ezi18n( 'kernel/classes/datatypes/ezxmltext', "Using scripts in links is not allowed, link '%1' has been removed", '', array( $url ) ) );
00765 
00766                         $element->removeAttribute( 'href' );
00767                         return $ret;
00768 
00769                     }
00770                     // Check mail address validity
00771                     //include_once( 'lib/ezutils/classes/ezmail.php' );
00772                     if ( preg_match( "/^mailto:(.*)/i" , $url, $mailAddr ) &&
00773                          !eZMail::validate( $mailAddr[1] ) )
00774                     {
00775                         $this->handleError( eZXMLInputParser::ERROR_DATA,
00776                                             ezi18n( 'kernel/classes/datatypes/ezxmltext', "Invalid e-mail address: '%1'", '' , array( $mailAddr[1] ) ) );
00777 
00778                         $element->removeAttribute( 'href' );
00779                         return $ret;
00780                     }
00781                     // Store urlID instead of href
00782                     $urlID = $this->convertHrefToID( $url );
00783                     if ( $urlID )
00784                     {
00785                         $urlIDAttributeName = 'url_id';
00786 
00787                         $element->setAttribute( $urlIDAttributeName, $urlID );
00788                     }
00789                 }
00790             }
00791 
00792             if ( isset( $anchorName ) && $anchorName )
00793             {
00794                 $element->setAttribute( 'anchor_name', $anchorName );
00795             }
00796 
00797             $element->removeAttribute( 'href' );
00798         }
00799 
00800         return $ret;
00801     }
00802 
00803     function convertHrefToID( $href )
00804     {
00805         $href = str_replace("&amp;", "&", $href );
00806 
00807         $urlID = eZURL::registerURL( $href );
00808 
00809         if ( !in_array( $urlID, $this->urlIDArray ) )
00810         {
00811              $this->urlIDArray[] = $urlID;
00812          }
00813 
00814         return $urlID;
00815     }
00816 
00817     // Publish handler for 'embed' element.
00818     function publishHandlerEmbed( $element, &$params )
00819     {
00820         $ret = null;
00821 
00822         $href = $element->getAttribute( 'href' );
00823         //washing href. single and double quotes replaced with their urlencoded form
00824         $href = str_replace( array('\'','"'), array('%27','%22'), $href );
00825 
00826         if ( $href != null )
00827         {
00828             if ( ereg( "^ezobject://[0-9]+$" , $href ) )
00829             {
00830                 $objectID = substr( strrchr( $href, "/" ), 1 );
00831 
00832                 // protection from self-embedding
00833                 if ( $objectID == $this->contentObjectID )
00834                 {
00835                     $this->handleError( eZXMLInputParser::ERROR_DATA,
00836                                         ezi18n( 'kernel/classes/datatypes/ezxmltext', 'Object %1 can not be embeded to itself.', '', array( $objectID ) ) );
00837 
00838                     $element->removeAttribute( 'href' );
00839                     return $ret;
00840                 }
00841 
00842                 $element->setAttribute( 'object_id', $objectID );
00843 
00844                 if ( !in_array( $objectID, $this->relatedObjectIDArray ) )
00845                 {
00846                     $this->relatedObjectIDArray[] = $objectID;
00847                 }
00848             }
00849             elseif ( ereg( "^eznode://.+$" , $href ) )
00850             {
00851                 $nodePath = substr( strchr( $href, "/" ), 2 );
00852 
00853                 if ( ereg( "^[0-9]+$", $nodePath ) )
00854                 {
00855                     $nodeID = $nodePath;
00856                     $node = eZContentObjectTreeNode::fetch( $nodeID, false, false );
00857                     if ( !$node )
00858                     {
00859                         $this->handleError( eZXMLInputParser::ERROR_DATA,
00860                                             ezi18n( 'kernel/classes/datatypes/ezxmltext', "Node '%1' does not exist.", '', array( $nodeID ) ) );
00861 
00862                         $element->removeAttribute( 'href' );
00863                         return $ret;
00864                     }
00865                 }
00866                 else
00867                 {
00868                     $node = eZContentObjectTreeNode::fetchByURLPath( $nodePath, false );
00869                     if ( !$node )
00870                     {
00871                         $this->handleError( eZXMLInputParser::ERROR_DATA,
00872                                             ezi18n( 'kernel/classes/datatypes/ezxmltext', 'Node \'%1\' does not exist.', '', array( $nodePath ) ) );
00873 
00874                         $element->removeAttribute( 'href' );
00875                         return $ret;
00876                     }
00877                     $nodeID = $node['node_id'];
00878                     $element->setAttribute( 'show_path', 'true' );
00879                 }
00880 
00881                 $element->setAttribute( 'node_id', $nodeID );
00882                 $objectID = $node['contentobject_id'];
00883 
00884                 // protection from self-embedding
00885                 if ( $objectID == $this->contentObjectID )
00886                 {
00887                     $this->handleError( eZXMLInputParser::ERROR_DATA,
00888                                         ezi18n( 'kernel/classes/datatypes/ezxmltext', 'Object %1 can not be embeded to itself.', '', array( $objectID ) ) );
00889 
00890                     $element->removeAttribute( 'href' );
00891                     return $ret;
00892                 }
00893 
00894                 if ( !in_array( $objectID, $this->relatedObjectIDArray ) )
00895                 {
00896                      $this->relatedObjectIDArray[] = $objectID;
00897                  }
00898             }
00899             else
00900             {
00901                 $this->isInputValid = false;
00902                 $this->Messages[] = ezi18n( 'kernel/classes/datatypes', 'Invalid reference in &lt;embed&gt; tag. Note that <embed> tag supports only \'eznode\' and \'ezobject\' protocols.' );
00903                 $element->removeAttribute( 'href' );
00904                 return $ret;
00905             }
00906         }
00907 
00908         $element->removeAttribute( 'href' );
00909         $this->convertCustomAttributes( $element );
00910         return $ret;
00911     }
00912 
00913     // Publish handler for 'object' element.
00914     function publishHandlerObject( $element, &$params )
00915     {
00916         $ret = null;
00917 
00918         $objectID = $element->getAttribute( 'id' );
00919         // protection from self-embedding
00920         if ( $objectID == $this->contentObjectID )
00921         {
00922             $this->isInputValid = false;
00923             $this->Messages[] = ezi18n( 'kernel/classes/datatypes',
00924                                         'Object %1 can not be embeded to itself.', false, array( $objectID ) );
00925             return $ret;
00926         }
00927 
00928         if ( !in_array( $objectID, $this->relatedObjectIDArray ) )
00929         {
00930             $this->relatedObjectIDArray[] = $objectID;
00931         }
00932 
00933         // If there are any image object with links.
00934         $href = $element->getAttributeNS( $this->Namespaces['image'], 'ezurl_href' );
00935         //washing href. single and double quotes inside url replaced with their urlencoded form
00936         $href = str_replace( array('\'','"'), array('%27','%22'), $href );
00937 
00938         $urlID = $element->getAttributeNS( $this->Namespaces['image'], 'ezurl_id' );
00939 
00940         if ( $href != null )
00941         {
00942             $urlID = eZURL::registerURL( $href );
00943             $element->setAttributeNS( $this->Namespaces['image'], 'image:ezurl_id', $urlID );
00944             $element->removeAttributeNS( $this->Namespaces['image'], 'ezurl_href' );
00945         }
00946 
00947         if ( $urlID != null )
00948         {
00949             $this->urlIDArray[] = $urlID;
00950         }
00951 
00952         $this->convertCustomAttributes( $element );
00953 
00954         return $ret;
00955     }
00956 
00957     // Publish handler for 'custom' element.
00958     function publishHandlerCustom( $element, &$params )
00959     {
00960         $ret = null;
00961 
00962         $element->removeAttribute( 'inline' );
00963         $this->convertCustomAttributes( $element );
00964 
00965         return $ret;
00966     }
00967 
00968     function convertCustomAttributes( $element )
00969     {
00970         $schemaAttrs = $this->XMLSchema->attributes( $element );
00971         $attributes = $element->attributes;
00972 
00973         for ( $i = $attributes->length - 1; $i >= 0; $i-- )
00974         {
00975             $attr = $attributes->item( $i );
00976             if ( !$attr->prefix && !in_array( $attr->nodeName, $schemaAttrs ) )
00977             {
00978                 $element->setAttributeNS( $this->Namespaces['custom'], 'custom:' . $attr->name, $element->getAttribute( $attr->name ) );
00979                 $element->removeAttributeNode( $attr );
00980             }
00981         }
00982     }
00983 
00984     function getRelatedObjectIDArray()
00985     {
00986         return $this->relatedObjectIDArray;
00987     }
00988 
00989     function getLinkedObjectIDArray()
00990     {
00991         return $this->linkedObjectIDArray;
00992     }
00993 
00994     function getUrlIDArray()
00995     {
00996         return $this->urlIDArray;
00997     }
00998 
00999     public $urlIDArray = array();
01000     public $relatedObjectIDArray = array();
01001     public $linkedObjectIDArray = array();
01002 
01003     // needed for self-embedding protection
01004     public $contentObjectID = 0;
01005 }
01006 ?>