|
eZ Publish
[4.0]
|
00001 <?php 00002 // 00003 // Definition of eZXMLInputParser class 00004 // 00005 // Created on: <27-Mar-2006 15:28:39 ks> 00006 // 00007 // ## BEGIN COPYRIGHT, LICENSE AND WARRANTY NOTICE ## 00008 // SOFTWARE NAME: eZ Publish 00009 // SOFTWARE RELEASE: 4.0.x 00010 // COPYRIGHT NOTICE: Copyright (C) 1999-2008 eZ Systems AS 00011 // SOFTWARE LICENSE: GNU General Public License v2.0 00012 // NOTICE: > 00013 // This program is free software; you can redistribute it and/or 00014 // modify it under the terms of version 2.0 of the GNU General 00015 // Public License as published by the Free Software Foundation. 00016 // 00017 // This program is distributed in the hope that it will be useful, 00018 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00019 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00020 // GNU General Public License for more details. 00021 // 00022 // You should have received a copy of version 2.0 of the GNU General 00023 // Public License along with this program; if not, write to the Free 00024 // Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 00025 // MA 02110-1301, USA. 00026 // 00027 // 00028 // ## END COPYRIGHT, LICENSE AND WARRANTY NOTICE ## 00029 // 00030 00031 /* 00032 Base class for the input parser. 00033 The goal of the parser is XML/HTML analyzing, fixing and transforming. 00034 The input is processed in 2 passes: 00035 - 1st pass: Parsing input, check for syntax errors, build DOM tree. 00036 - 2nd pass: Walking through DOM tree, checking validity by XML schema, 00037 calling tag handlers to transform the tree. 00038 00039 Both passes are controlled by the arrays described bellow and user handler functions. 00040 00041 */ 00042 00043 // if ( !class_exists( 'eZXMLSchema' ) ) // AS 21-09-2007: commented out because of include_once being commented out 00044 //include_once( 'kernel/classes/datatypes/ezxmltext/ezxmlschema.php' ); 00045 00046 class eZXMLInputParser 00047 { 00048 /// \deprecated (back-compatibility) 00049 const SHOW_NO_ERRORS = 0; 00050 const SHOW_SCHEMA_ERRORS = 1; 00051 const SHOW_ALL_ERRORS = 2; 00052 00053 /// Use these constants for error types 00054 const ERROR_NONE = 0; 00055 const ERROR_SYNTAX = 4; 00056 const ERROR_SCHEMA = 8; 00057 const ERROR_DATA = 16; 00058 const ERROR_ALL = 28; // 4+8+16 00059 00060 /* $InputTags array contains properties of elements that come from the input. 00061 00062 Each array element describes a tag that comes from the input. Arrays index is 00063 a tag's name. Each element is an array that may contain the following members: 00064 00065 'name' - a string representing a new name of the tag, 00066 'nameHandler' - a name of the function that returns new tag name. Function format: 00067 function tagNameHandler( $tagName, &$attributes ) 00068 00069 If no of those elements are defined the original tag's name is used. 00070 00071 'noChildren' - boolean value that determines if this tag could have child tags, 00072 default value is false. 00073 00074 Example: 00075 00076 public $InputTags = array( 00077 00078 'original-name' => array( 'name' => 'new-name' ), 00079 00080 'original-name2' => array( 'nameHandler' => 'tagNameHandler', 00081 'noChildren' => true ), 00082 00083 ... 00084 00085 ); 00086 */ 00087 00088 public $InputTags = array(); 00089 00090 /* 00091 $OutputTags array contains properties of elements that are produced in the output. 00092 Each array element describes a tag presented in the output. Arrays index is 00093 a tag's name. Each element is an array that may contain the following members: 00094 00095 'parsingHandler' - "Parsing handler" called at parse pass 1 before processing tag's children. 00096 'initHandler' - "Init handler" called at pass 2 before proccessing tag's children. 00097 'structHandler' - "Structure handler" called at pass 2 after proccessing tag's children, 00098 but before schema validity check. It can be used to implement structure 00099 transformations. 00100 'publishHandler' - "Publish handler" called at pass 2 after schema validity check, so it is called 00101 in case the element has it's guaranteed place in the DOM tree. 00102 00103 'attributes' - an array that describes attributes transformations. Array's index is the 00104 original name of an attribute, and the value is the new name. 00105 00106 'requiredInputAttributes' - attributes that are required in the input tag. If they are not presented 00107 it raises invalid input flag. 00108 00109 Example: 00110 00111 public $OutputTags = array( 00112 00113 'custom' => array( 'parsingHandler' => 'parsingHandlerCustom', 00114 'initHandler' => 'initHandlerCustom', 00115 'structHandler' => 'structHandlerCustom', 00116 'publishHandler' => 'publishHandlerCustom', 00117 'attributes' => array( 'title' => 'name' ) ), 00118 00119 ... 00120 ); 00121 00122 */ 00123 00124 public $OutputTags = array(); 00125 00126 public $Namespaces = array( 'image' => 'http://ez.no/namespaces/ezpublish3/image/', 00127 'xhtml' => 'http://ez.no/namespaces/ezpublish3/xhtml/', 00128 'custom' => 'http://ez.no/namespaces/ezpublish3/custom/', 00129 'tmp' => 'http://ez.no/namespaces/ezpublish3/temporary/' ); 00130 00131 /*! 00132 00133 The constructor. 00134 00135 \param $validate 00136 \param $validateErrorLevel Determines types of errors that break input processing 00137 It's possible to combine any error types, by creating a bitmask of EZ_XMLINPUTPARSER_ERROR_* constants. 00138 \c true value means that all errors defined by $detectErrorLevel parameter will break further processing 00139 \param $detectErrorLevel Determines types of errors that will be detected and added to error log ($Messages). 00140 */ 00141 00142 function eZXMLInputParser( $validateErrorLevel = self::ERROR_NONE, $detectErrorLevel = self::ERROR_NONE, $parseLineBreaks = false, 00143 $removeDefaultAttrs = false ) 00144 { 00145 // Back-compatibility fixes: 00146 if ( $detectErrorLevel === self::SHOW_SCHEMA_ERRORS ) 00147 { 00148 $detectErrorLevel = self::ERROR_SCHEMA; 00149 } 00150 elseif ( $detectErrorLevel === self::SHOW_ALL_ERRORS ) 00151 { 00152 $detectErrorLevel = self::ERROR_ALL; 00153 } 00154 00155 if ( $validateErrorLevel === false ) 00156 { 00157 $validateErrorLevel = self::ERROR_NONE; 00158 } 00159 elseif ( $validateErrorLevel === true ) 00160 { 00161 $validateErrorLevel = $detectErrorLevel; 00162 } 00163 00164 $this->ValidateErrorLevel = $validateErrorLevel; 00165 $this->DetectErrorLevel = $detectErrorLevel; 00166 00167 $this->RemoveDefaultAttrs = $removeDefaultAttrs; 00168 $this->ParseLineBreaks = $parseLineBreaks; 00169 00170 $this->XMLSchema = eZXMLSchema::instance(); 00171 00172 //include_once( 'lib/version.php' ); 00173 $this->eZPublishVersion = eZPublishSDK::majorVersion() + eZPublishSDK::minorVersion() * 0.1; 00174 00175 $ini = eZINI::instance( 'ezxml.ini' ); 00176 if ( $ini->hasVariable( 'InputSettings', 'TrimSpaces' ) ) 00177 { 00178 $trimSpaces = $ini->variable( 'InputSettings', 'TrimSpaces' ); 00179 $this->TrimSpaces = $trimSpaces == 'true' ? true : false; 00180 } 00181 00182 if ( $ini->hasVariable( 'InputSettings', 'AllowMultipleSpaces' ) ) 00183 { 00184 $allowMultipleSpaces = $ini->variable( 'InputSettings', 'AllowMultipleSpaces' ); 00185 $this->AllowMultipleSpaces = $allowMultipleSpaces == 'true' ? true : false; 00186 } 00187 00188 if ( $ini->hasVariable( 'InputSettings', 'AllowNumericEntities' ) ) 00189 { 00190 $allowNumericEntities = $ini->variable( 'InputSettings', 'AllowNumericEntities' ); 00191 $this->AllowNumericEntities = $allowNumericEntities == 'true' ? true : false; 00192 } 00193 00194 $contentIni = eZINI::instance( 'content.ini' ); 00195 $useStrictHeaderRule = $contentIni->variable( 'header', 'UseStrictHeaderRule' ); 00196 $this->StrictHeaders = $useStrictHeaderRule == 'true' ? true : false; 00197 } 00198 00199 /// \public 00200 function setDOMDocumentClass( $DOMDocumentClass ) 00201 { 00202 $this->DOMDocumentClass = $DOMDocumentClass; 00203 } 00204 00205 /// \public 00206 function setParseLineBreaks( $value ) 00207 { 00208 $this->ParseLineBreaks = $value; 00209 } 00210 00211 /// \public 00212 function setRemoveDefaultAttrs( $value ) 00213 { 00214 $this->RemoveDefaultAttrs = $value; 00215 } 00216 00217 /// \public 00218 function createRootNode() 00219 { 00220 if ( !$this->Document ) 00221 { 00222 $this->Document = new $this->DOMDocumentClass( '1.0', 'utf-8' ); 00223 } 00224 00225 // Creating root section with namespaces definitions 00226 $mainSection = $this->Document->createElement( 'section' ); 00227 $this->Document->appendChild( $mainSection ); 00228 foreach( array( 'image', 'xhtml', 'custom' ) as $prefix ) 00229 { 00230 $mainSection->setAttributeNS( 'http://www.w3.org/2000/xmlns/', 'xmlns:' . $prefix, $this->Namespaces[$prefix] ); 00231 } 00232 return $this->Document; 00233 } 00234 00235 /*! 00236 \public 00237 Call this function to process your input 00238 */ 00239 function process( $text, $createRootNode = true ) 00240 { 00241 $text = str_replace( "\r", '', $text); 00242 $text = str_replace( "\t", ' ', $text); 00243 if ( !$this->ParseLineBreaks ) 00244 { 00245 $text = str_replace( "\n", '', $text); 00246 } 00247 00248 $this->Document = new $this->DOMDocumentClass( '1.0', 'utf-8' ); 00249 00250 if ( $createRootNode ) 00251 { 00252 $this->createRootNode(); 00253 } 00254 00255 // Perform pass 1 00256 // Parsing the source string 00257 $this->performPass1( $text ); 00258 00259 //$this->Document->formatOutput = true; 00260 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', $this->Document->saveXML(), 'XML after pass 1' ); 00261 00262 if ( $this->QuitProcess ) 00263 { 00264 return false; 00265 } 00266 00267 // Perform pass 2 00268 $this->performPass2(); 00269 00270 //$this->Document->formatOutput = true; 00271 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', $this->Document->saveXML(), 'XML after pass 2' ); 00272 00273 if ( $this->QuitProcess ) 00274 { 00275 return false; 00276 } 00277 00278 return $this->Document; 00279 } 00280 00281 /* 00282 \public 00283 Pass 1: Parsing the source HTML string. 00284 */ 00285 00286 function performPass1( &$data ) 00287 { 00288 $ret = true; 00289 $pos = 0; 00290 00291 if ( $this->Document->documentElement ) 00292 { 00293 do 00294 { 00295 $this->parseTag( $data, $pos, $this->Document->documentElement ); 00296 if ( $this->QuitProcess ) 00297 { 00298 $ret = false; 00299 break; 00300 } 00301 00302 } 00303 while( $pos < strlen( $data ) ); 00304 } 00305 else 00306 { 00307 $tmp = null; 00308 $this->parseTag( $data, $pos, $tmp ); 00309 if ( $this->QuitProcess ) 00310 { 00311 $ret = false; 00312 } 00313 } 00314 return $ret; 00315 } 00316 00317 // The main recursive function for pass 1 00318 00319 function parseTag( &$data, &$pos, &$parent ) 00320 { 00321 // Find tag, determine it's type, name and attributes. 00322 $initialPos = $pos; 00323 00324 if ( $pos >= strlen( $data ) ) 00325 { 00326 return true; 00327 } 00328 $tagBeginPos = strpos( $data, '<', $pos ); 00329 00330 if ( $this->ParseLineBreaks ) 00331 { 00332 // Regard line break as a start tag position 00333 $lineBreakPos = strpos( $data, "\n", $pos ); 00334 if ( $lineBreakPos !== false ) 00335 { 00336 $tagBeginPos = $tagBeginPos === false ? $lineBreakPos : min( $tagBeginPos, $lineBreakPos ); 00337 } 00338 } 00339 00340 $tagName = ''; 00341 $attributes = null; 00342 // If it doesn't begin with '<' then its a text node. 00343 if ( $tagBeginPos != $pos || $tagBeginPos === false ) 00344 { 00345 $pos = $initialPos; 00346 $tagName = $newTagName = '#text'; 00347 $noChildren = true; 00348 00349 if ( !$tagBeginPos ) 00350 { 00351 $tagBeginPos = strlen( $data ); 00352 } 00353 00354 $textContent = substr( $data, $pos, $tagBeginPos - $pos ); 00355 00356 $textContent = $this->washText( $textContent ); 00357 00358 $pos = $tagBeginPos; 00359 if ( $textContent === '' ) 00360 { 00361 return false; 00362 } 00363 } 00364 // Process closing tag. 00365 elseif ( $data[$tagBeginPos] == '<' && $tagBeginPos + 1 < strlen( $data ) && 00366 $data[$tagBeginPos + 1] == '/' ) 00367 { 00368 $tagEndPos = strpos( $data, '>', $tagBeginPos + 1 ); 00369 if ( $tagEndPos === false ) 00370 { 00371 $pos = $tagBeginPos + 1; 00372 00373 $this->handleError( self::ERROR_SYNTAX, ezi18n( 'kernel/classes/datatypes/ezxmltext', 'Wrong closing tag' ) ); 00374 return false; 00375 } 00376 00377 $pos = $tagEndPos + 1; 00378 $closedTagName = strtolower( trim( substr( $data, $tagBeginPos + 2, $tagEndPos - $tagBeginPos - 2 ) ) ); 00379 00380 // Find matching tag in ParentStack array 00381 $firstLoop = true; 00382 for( $i = count( $this->ParentStack ) - 1; $i >= 0; $i-- ) 00383 { 00384 $parentNames = $this->ParentStack[$i]; 00385 if ( $parentNames[0] == $closedTagName ) 00386 { 00387 array_pop( $this->ParentStack ); 00388 if ( !$firstLoop ) 00389 { 00390 $pos = $tagBeginPos; 00391 return true; 00392 } 00393 // If newTagName was '' we don't break children loop 00394 elseif ( $parentNames[1] !== '' ) 00395 { 00396 return true; 00397 } 00398 else 00399 { 00400 return false; 00401 } 00402 } 00403 $firstLoop = false; 00404 } 00405 00406 $this->handleError( self::ERROR_SYNTAX, ezi18n( 'kernel/classes/datatypes/ezxmltext', 'Wrong closing tag : </%1>.', false, array( $closedTagName ) ) ); 00407 00408 return false; 00409 } 00410 // Insert <br/> instead of linebreaks 00411 elseif ( $this->ParseLineBreaks && $data[$tagBeginPos] == "\n" ) 00412 { 00413 $newTagName = 'br'; 00414 $noChildren = true; 00415 $pos = $tagBeginPos + 1; 00416 } 00417 // Regular tag: get tag's name and attributes. 00418 else 00419 { 00420 $tagEndPos = strpos( $data, '>', $tagBeginPos ); 00421 if ( $tagEndPos === false ) 00422 { 00423 $pos = $tagBeginPos + 1; 00424 00425 $this->handleError( self::ERROR_SYNTAX, ezi18n( 'kernel/classes/datatypes/ezxmltext', 'Wrong opening tag' ) ); 00426 return false; 00427 } 00428 00429 $pos = $tagEndPos + 1; 00430 $tagString = substr( $data, $tagBeginPos + 1, $tagEndPos - $tagBeginPos - 1 ); 00431 // Check for final backslash 00432 $noChildren = substr( $tagString, -1, 1 ) == '/' ? true : false; 00433 // Remove final backslash and spaces 00434 $tagString = preg_replace( "/\s*\/$/", "", $tagString ); 00435 00436 $firstSpacePos = strpos( $tagString, ' ' ); 00437 if ( $firstSpacePos === false ) 00438 { 00439 $tagName = strtolower( trim( $tagString ) ); 00440 $attributeString = ''; 00441 } 00442 else 00443 { 00444 $tagName = strtolower( substr( $tagString, 0, $firstSpacePos ) ); 00445 $attributeString = substr( $tagString, $firstSpacePos + 1 ); 00446 $attributeString = trim( $attributeString ); 00447 // Parse attribute string 00448 if ( $attributeString ) 00449 { 00450 $attributes = $this->parseAttributes( $attributeString ); 00451 } 00452 } 00453 00454 // Determine tag's name 00455 if ( isset( $this->InputTags[$tagName] ) ) 00456 { 00457 $thisInputTag = $this->InputTags[$tagName]; 00458 00459 if ( isset( $thisInputTag['name'] ) ) 00460 { 00461 $newTagName = $thisInputTag['name']; 00462 } 00463 else 00464 { 00465 $newTagName = $this->callInputHandler( 'nameHandler', $tagName, $attributes ); 00466 } 00467 } 00468 else 00469 { 00470 if ( $this->XMLSchema->exists( $tagName ) ) 00471 { 00472 $newTagName = $tagName; 00473 } 00474 else 00475 { 00476 $this->handleError( self::ERROR_SYNTAX, ezi18n( 'kernel/classes/datatypes/ezxmltext', 'Unknown tag: <%1>.', false, array( $tagName ) ) ); 00477 return false; 00478 } 00479 } 00480 00481 // Check 'noChildren' property 00482 if ( isset( $thisInputTag['noChildren'] ) ) 00483 { 00484 $noChildren = true; 00485 } 00486 00487 $thisOutputTag = isset( $this->OutputTags[$newTagName] ) ? $this->OutputTags[$newTagName] : null; 00488 00489 // Implementation of 'autoCloseOn' rule ( Handling of unclosed tags, ex.: <p>, <li> ) 00490 if ( isset( $thisOutputTag['autoCloseOn'] ) && 00491 $parent && 00492 $parent->parentNode instanceof DOMElement && 00493 in_array( $parent->nodeName, $thisOutputTag['autoCloseOn'] ) ) 00494 { 00495 // Wrong nesting: auto-close parent and try to re-parse this tag at higher level 00496 array_pop( $this->ParentStack ); 00497 $pos = $tagBeginPos; 00498 return true; 00499 } 00500 00501 // Append to parent stack 00502 if ( !$noChildren && $newTagName !== false ) 00503 { 00504 $this->ParentStack[] = array( $tagName, $newTagName, $attributeString ); 00505 } 00506 00507 if ( !$newTagName ) 00508 { 00509 // If $newTagName is an empty string then it's not a error 00510 if ( $newTagName === false ) 00511 $this->handleError( self::ERROR_SYNTAX, ezi18n( 'kernel/classes/datatypes/ezxmltext', "Can't convert tag's name: <%1>.", false, array( $tagName ) ) ); 00512 00513 return false; 00514 } 00515 00516 // wordmatch.ini support 00517 if ( $attributeString ) 00518 { 00519 $attributes = $this->wordMatchSupport( $newTagName, $attributes, $attributeString ); 00520 } 00521 } 00522 00523 // Create text or normal node. 00524 if ( $newTagName == '#text' ) 00525 { 00526 $element = $this->Document->createTextNode( $textContent ); 00527 } 00528 else 00529 { 00530 $element = $this->Document->createElement( $newTagName ); 00531 } 00532 00533 if ( $attributes ) 00534 { 00535 $this->setAttributes( $element, $attributes ); 00536 } 00537 00538 // Append element as a child or set it as root if there is no parent. 00539 if ( $parent ) 00540 { 00541 $parent->appendChild( $element ); 00542 } 00543 else 00544 { 00545 $this->Document->appendChild( $element ); 00546 } 00547 00548 $params = array(); 00549 $params[] =& $data; 00550 $params[] =& $pos; 00551 $params[] =& $tagBeginPos; 00552 $result = $this->callOutputHandler( 'parsingHandler', $element, $params ); 00553 00554 if ( $result === false ) 00555 { 00556 // This tag is already parsed in handler 00557 if ( !$noChildren ) 00558 { 00559 array_pop( $this->ParentStack ); 00560 } 00561 return false; 00562 } 00563 00564 if ( $this->QuitProcess ) 00565 { 00566 return false; 00567 } 00568 00569 // Process children 00570 if ( !$noChildren ) 00571 { 00572 do 00573 { 00574 $parseResult = $this->parseTag( $data, $pos, $element ); 00575 if ( $this->QuitProcess ) 00576 { 00577 return false; 00578 } 00579 } 00580 while( $parseResult !== true ); 00581 } 00582 00583 return false; 00584 } 00585 00586 /* 00587 Helper functions for pass 1 00588 */ 00589 00590 function parseAttributes( $attributeString ) 00591 { 00592 // Convert single quotes to double quotes 00593 $attributeString = preg_replace( "/ +([a-zA-Z0-9:-_#\-]+) *\='(.*?)'/e", "' \\1'.'=\"'.'\\2'.'\"'", ' ' . $attributeString ); 00594 00595 // Convert no quotes to double quotes and remove extra spaces 00596 $attributeString = preg_replace( "/ +([a-zA-Z0-9:-_#\-]+) *\= *([^\s'\"]+)/e", "' \\1'.'=\"'.'\\2'.'\" '", $attributeString ); 00597 00598 // Split by quotes followed by spaces 00599 $attributeArray = preg_split( "#(?<=\") +#", $attributeString ); 00600 00601 $attributes = array(); 00602 foreach( $attributeArray as $attrStr ) 00603 { 00604 if ( !$attrStr || strlen( $attrStr ) < 4 ) 00605 { 00606 continue; 00607 } 00608 00609 list( $attrName, $attrValue ) = preg_split( "/ *= *\"/", $attrStr ); 00610 00611 $attrName = strtolower( trim( $attrName ) ); 00612 if ( !$attrName ) 00613 { 00614 continue; 00615 } 00616 00617 $attrValue = substr( $attrValue, 0, -1 ); 00618 if ( $attrValue === '' || $attrValue === false ) 00619 { 00620 continue; 00621 } 00622 00623 $attributes[$attrName] = $attrValue; 00624 } 00625 00626 return $attributes; 00627 } 00628 00629 function setAttributes( $element, $attributes ) 00630 { 00631 $thisOutputTag = $this->OutputTags[$element->nodeName]; 00632 00633 foreach( $attributes as $key => $value ) 00634 { 00635 // Convert attribute names 00636 if ( isset( $thisOutputTag['attributes'] ) && 00637 isset( $thisOutputTag['attributes'][$key] ) ) 00638 { 00639 $qualifiedName = $thisOutputTag['attributes'][$key]; 00640 } 00641 else 00642 { 00643 $qualifiedName = $key; 00644 } 00645 00646 // Filter classes 00647 if ( $qualifiedName == 'class' ) 00648 { 00649 $classesList = $this->XMLSchema->getClassesList( $element->nodeName ); 00650 if ( !in_array( $value, $classesList ) ) 00651 { 00652 $this->handleError( self::ERROR_DATA, 00653 ezi18n( 'kernel/classes/datatypes/ezxmltext', "Class '%1' is not allowed for element <%2> (check content.ini).", 00654 false, array( $value, $element->nodeName ) ) ); 00655 continue; 00656 } 00657 } 00658 00659 // Create attribute nodes 00660 if ( $qualifiedName ) 00661 { 00662 if ( strpos( $qualifiedName, ':' ) ) 00663 { 00664 list( $prefix, $name ) = explode( ':', $qualifiedName ); 00665 if ( isset( $this->Namespaces[$prefix] ) ) 00666 { 00667 $URI = $this->Namespaces[$prefix]; 00668 $element->setAttributeNS( $URI, $qualifiedName, $value ); 00669 } 00670 else 00671 { 00672 eZDebug::writeWarning( "No namespace defined for prefix '$prefix'.", 'eZXML input parser' ); 00673 } 00674 } 00675 else 00676 { 00677 $element->setAttribute( $qualifiedName, $value ); 00678 } 00679 } 00680 } 00681 00682 // Check for required attrs are present 00683 if ( isset( $this->OutputTags[$element->nodeName]['requiredInputAttributes'] ) ) 00684 { 00685 foreach( $this->OutputTags[$element->nodeName]['requiredInputAttributes'] as $reqAttrName ) 00686 { 00687 $presented = false; 00688 foreach( $attributes as $key => $value ) 00689 { 00690 if ( $key == $reqAttrName ) 00691 { 00692 $presented = true; 00693 break; 00694 } 00695 } 00696 if ( !$presented ) 00697 { 00698 $this->handleError( self::ERROR_SCHEMA, 00699 ezi18n( 'kernel/classes/datatypes/ezxmltext', "Required attribute '%1' is not presented in tag <%2>.", 00700 false, array( $reqAttrName, $element->nodeName ) ) ); 00701 } 00702 } 00703 } 00704 } 00705 00706 function washText( $textContent ) 00707 { 00708 $textContent = $this->entitiesDecode( $textContent ); 00709 00710 if ( !$this->AllowNumericEntities ) 00711 { 00712 $textContent = $this->convertNumericEntities( $textContent ); 00713 } 00714 00715 if ( !$this->AllowMultipleSpaces ) 00716 { 00717 $textContent = preg_replace( "/ {2,}/", " ", $textContent ); 00718 } 00719 00720 return $textContent; 00721 } 00722 00723 function entitiesDecode( $text ) 00724 { 00725 $text = str_replace( ''', "'", $text ); 00726 00727 $text = str_replace( '>', '>', $text ); 00728 $text = str_replace( '<', '<', $text ); 00729 $text = str_replace( ''', "'", $text ); 00730 $text = str_replace( '"', '"', $text ); 00731 $text = str_replace( '&', '&', $text ); 00732 $text = str_replace( ' ', ' ', $text ); 00733 return $text; 00734 } 00735 00736 function convertNumericEntities( $text ) 00737 { 00738 if ( strlen( $text ) < 4 ) 00739 { 00740 return $text; 00741 } 00742 // Convert other HTML entities to the current charset characters. 00743 //include_once( 'lib/ezi18n/classes/eztextcodec.php' ); 00744 $codec = eZTextCodec::instance( 'unicode', false ); 00745 $pos = 0; 00746 $domString = ""; 00747 while ( $pos < strlen( $text ) - 1 ) 00748 { 00749 $startPos = $pos; 00750 while( !( $text[$pos] == '&' && $text[$pos + 1] == '#' ) && $pos < strlen( $text ) - 1 ) 00751 { 00752 $pos++; 00753 } 00754 00755 $domString .= substr( $text, $startPos, $pos - $startPos ); 00756 00757 if ( $pos < strlen( $text ) - 1 ) 00758 { 00759 $endPos = strpos( $text, ';', $pos + 2 ); 00760 if ( $endPos === false ) 00761 { 00762 $convertedText .= '&#'; 00763 $pos += 2; 00764 continue; 00765 } 00766 00767 $code = substr( $text, $pos + 2, $endPos - ( $pos + 2 ) ); 00768 $char = $codec->convertString( array( $code ) ); 00769 00770 $pos = $endPos + 1; 00771 $domString .= $char; 00772 } 00773 else 00774 { 00775 $domString .= substr( $text, $pos, 2 ); 00776 } 00777 } 00778 return $domString; 00779 } 00780 00781 /*! 00782 Returns modified attributes parameter 00783 */ 00784 protected function wordMatchSupport( $newTagName, $attributes, $attributeString ) 00785 { 00786 $ini = eZINI::instance( 'wordmatch.ini' ); 00787 if ( $ini->hasVariable( $newTagName, 'MatchString' ) ) 00788 { 00789 $matchArray = $ini->variable( $newTagName, 'MatchString' ); 00790 if ( $matchArray ) 00791 { 00792 foreach ( array_keys( $matchArray ) as $key ) 00793 { 00794 $matchString = $matchArray[$key]; 00795 if ( preg_match( "/$matchString/i", $attributeString ) ) 00796 { 00797 $attributes['class'] = $key; 00798 unset( $attributes['style'] ); 00799 } 00800 } 00801 } 00802 } 00803 return $attributes; 00804 } 00805 00806 00807 /*! 00808 \public 00809 Pass 2: Process the tree, run handlers, rebuild and validate. 00810 */ 00811 00812 function performPass2() 00813 { 00814 $tmp = null; 00815 00816 $this->processSubtree( $this->Document->documentElement, $tmp ); 00817 } 00818 00819 // main recursive function for pass 2 00820 00821 function processSubtree( $element, &$lastHandlerResult ) 00822 { 00823 $ret = null; 00824 $tmp = null; 00825 00826 // Call "Init handler" 00827 $this->callOutputHandler( 'initHandler', $element, $tmp ); 00828 00829 // Process children 00830 if ( $element->hasChildNodes() ) 00831 { 00832 // Make another copy of children to save primary structure 00833 $childNodes = $element->childNodes; 00834 $childrenCount = $childNodes->length; 00835 00836 // we can not loop directly over the childNodes property, because this will change while we are working on it's parent's children 00837 $children = array(); 00838 foreach ( $childNodes as $childNode ) 00839 { 00840 $children[] = $childNode; 00841 } 00842 00843 $lastResult = null; 00844 $newElements = array(); 00845 foreach ( $children as $child ) 00846 { 00847 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', 'processing children, current child: ' . $child->nodeName ); 00848 $childReturn = $this->processSubtree( $child, $lastResult ); 00849 00850 unset( $lastResult ); 00851 if ( isset( $childReturn['result'] ) ) 00852 { 00853 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', 'return result is set for child ' . $child->nodeName ); 00854 $lastResult = $childReturn['result']; 00855 } 00856 00857 if ( isset( $childReturn['new_elements'] ) ) 00858 { 00859 $newElements = array_merge( $newElements, $childReturn['new_elements'] ); 00860 } 00861 00862 if ( $this->QuitProcess ) 00863 { 00864 return $ret; 00865 } 00866 } 00867 00868 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', $this->Document->saveXML(), 'XML before processNewElements for element ' . $element->nodeName ); 00869 // process elements created in children handlers 00870 $this->processNewElements( $newElements ); 00871 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', $this->Document->saveXML(), 'XML after processNewElements for element ' . $element->nodeName ); 00872 } 00873 00874 // Call "Structure handler" 00875 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', $this->Document->saveXML(), 'XML before callOutputHandler structHandler for element ' . $element->nodeName ); 00876 $ret = $this->callOutputHandler( 'structHandler', $element, $lastHandlerResult ); 00877 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', $this->Document->saveXML(), 'XML after callOutputHandler structHandler for element ' . $element->nodeName ); 00878 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', $ret, 'return value of callOutputHandler structHandler for element ' . $element->nodeName ); 00879 00880 // Process by schema (check if element is allowed to exist) 00881 if ( !$this->processBySchemaPresence( $element ) ) 00882 { 00883 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', $this->Document->saveXML(), 'XML after processBySchemaPresence for element ' . $element->nodeName ); 00884 return $ret; 00885 } 00886 00887 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', $this->Document->saveXML(), 'XML after processBySchemaPresence for element ' . $element->nodeName ); 00888 00889 // Process by schema (check place in the tree) 00890 if ( !$this->processBySchemaTree( $element ) ) 00891 { 00892 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', $this->Document->saveXML(), 'XML after processBySchemaTree for element ' . $element->nodeName ); 00893 return $ret; 00894 } 00895 00896 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', $this->Document->saveXML(), 'XML after processBySchemaTree for element ' . $element->nodeName ); 00897 00898 00899 $tmp = null; 00900 // Call "Publish handler" 00901 $this->callOutputHandler( 'publishHandler', $element, $tmp ); 00902 00903 // Process attributes according to the schema 00904 if ( $element->hasAttributes() ) 00905 { 00906 if ( !$this->XMLSchema->hasAttributes( $element ) ) 00907 { 00908 eZXMLInputParser::removeAllAttributes( $element ); 00909 } 00910 else 00911 { 00912 $this->processAttributesBySchema( $element ); 00913 } 00914 } 00915 return $ret; 00916 } 00917 /* 00918 Helper functions for pass 2 00919 */ 00920 00921 /*! 00922 Removes all attribute nodes from element node $element 00923 */ 00924 function removeAllAttributes( DOMElement $element ) 00925 { 00926 $attribs = $element->attributes; 00927 for ( $i = $attribs->length - 1; $i >= 0; $i-- ) 00928 { 00929 $element->removeAttributeNode( $attribs->item( $i ) ); 00930 } 00931 } 00932 00933 // Check if the element is allowed to exist in this document and remove it if not. 00934 function processBySchemaPresence( $element ) 00935 { 00936 $parent = $element->parentNode; 00937 if ( $parent instanceof DOMElement ) 00938 { 00939 // If this is a foreign element, remove it 00940 if ( !$this->XMLSchema->exists( $element ) ) 00941 { 00942 if ( $element->nodeName == 'custom' ) 00943 { 00944 $this->handleError( self::ERROR_SCHEMA, 00945 ezi18n( 'kernel/classes/datatypes/ezxmltext', "Custom tag '%1' is not allowed.", 00946 false, array( $element->getAttribute( 'name' ) ) ) ); 00947 } 00948 $element = $parent->removeChild( $element ); 00949 return false; 00950 } 00951 00952 // Delete if children required and no children 00953 // If this is an auto-added element, then do not throw error 00954 00955 if ( $element->nodeType == XML_ELEMENT_NODE && ( $this->XMLSchema->childrenRequired( $element ) || $element->getAttribute( 'children_required' ) ) 00956 && !$element->hasChildNodes() ) 00957 { 00958 $element = $parent->removeChild( $element ); 00959 if ( !$element->getAttributeNS( 'http://ez.no/namespaces/ezpublish3/temporary/', 'new-element' ) ) 00960 { 00961 $this->handleError( self::ERROR_SCHEMA, ezi18n( 'kernel/classes/datatypes/ezxmltext', "<%1> tag can't be empty.", 00962 false, array( $element->nodeName ) ) ); 00963 return false; 00964 } 00965 } 00966 } 00967 // TODO: break processing of any node that doesn't have parent 00968 // and is not a root node. 00969 elseif ( $element->nodeName != 'section' ) 00970 { 00971 return false; 00972 } 00973 return true; 00974 } 00975 00976 // Check that element has a correct position in the tree and fix it if not. 00977 function processBySchemaTree( $element ) 00978 { 00979 $parent = $element->parentNode; 00980 00981 if ( $parent instanceof DOMElement ) 00982 { 00983 $schemaCheckResult = $this->XMLSchema->check( $parent, $element ); 00984 if ( !$schemaCheckResult ) 00985 { 00986 if ( $schemaCheckResult === false ) 00987 { 00988 // Remove indenting spaces 00989 if ( $element->nodeType == XML_TEXT_NODE && !trim( $element->textContent ) ) 00990 { 00991 $element = $parent->removeChild( $element ); 00992 return false; 00993 } 00994 00995 $elementName = $element->nodeType == XML_ELEMENT_NODE ? '<' . $element->nodeName . '>' : $element->nodeName; 00996 $this->handleError( self::ERROR_SCHEMA, ezi18n( 'kernel/classes/datatypes/ezxmltext', "%1 is not allowed to be a child of <%2>.", 00997 false, array( $elementName, $parent->nodeName ) ) ); 00998 } 00999 $this->fixSubtree( $element, $element ); 01000 return false; 01001 } 01002 } 01003 // TODO: break processing of any node that doesn't have parent 01004 // and is not a root node. 01005 elseif ( $element->nodeName != 'section' ) 01006 { 01007 return false; 01008 } 01009 return true; 01010 } 01011 01012 // Remove only nodes that don't match schema (recursively) 01013 function fixSubtree( $element, $mainChild ) 01014 { 01015 $parent = $element->parentNode; 01016 $mainParent = $mainChild->parentNode; 01017 while ( $element->hasChildNodes() ) 01018 { 01019 $child = $element->firstChild; 01020 01021 $child = $element->removeChild( $child ); 01022 $child = $mainParent->insertBefore( $child, $mainChild ); 01023 01024 if ( !$this->XMLSchema->check( $mainParent, $child ) ) 01025 { 01026 $this->fixSubtree( $child, $mainChild ); 01027 } 01028 } 01029 $parent->removeChild( $element ); 01030 } 01031 01032 function processAttributesBySchema( $element ) 01033 { 01034 // Remove attributes that don't match schema 01035 $schemaAttributes = $this->XMLSchema->attributes( $element ); 01036 $schemaCustomAttributes = $this->XMLSchema->customAttributes( $element ); 01037 01038 $attributes = $element->attributes; 01039 01040 for ( $i = $attributes->length - 1; $i >=0; $i-- ) 01041 { 01042 $attr = $attributes->item( $i ); 01043 if ( $attr->prefix == 'tmp' ) 01044 { 01045 $element->removeAttributeNode( $attr ); 01046 continue; 01047 } 01048 01049 $allowed = false; 01050 $removeAttr = false; 01051 01052 $fullName = $attr->prefix ? $attr->prefix . ':' . $attr->localName : $attr->nodeName; 01053 01054 // check for allowed custom attributes (3.9) 01055 if ( $attr->prefix == 'custom' && in_array( $attr->localName, $schemaCustomAttributes ) ) 01056 { 01057 $allowed = true; 01058 } 01059 else 01060 { 01061 if ( in_array( $fullName, $schemaAttributes ) ) 01062 { 01063 $allowed = true; 01064 } 01065 elseif ( in_array( $fullName, $schemaCustomAttributes ) ) 01066 { 01067 // add 'custom' prefix if it is not given 01068 $allowed = true; 01069 $removeAttr = true; 01070 $element->setAttributeNS( $this->Namespaces['custom'], 'custom:' . $fullName, $attr->value ); 01071 } 01072 } 01073 01074 if ( !$allowed ) 01075 { 01076 $removeAttr = true; 01077 $this->handleError( self::ERROR_SCHEMA, 01078 ezi18n( 'kernel/classes/datatypes/ezxmltext', "Attribute '%1' is not allowed in <%2> element.", 01079 false, array( $fullName, $element->nodeName ) ) ); 01080 } 01081 elseif ( $this->RemoveDefaultAttrs ) 01082 { 01083 // Remove attributes having default values 01084 $default = $this->XMLSchema->attrDefaultValue( $element->nodeName, $fullName ); 01085 if ( $attr->value == $default ) 01086 { 01087 $removeAttr = true; 01088 } 01089 } 01090 01091 if ( $removeAttr ) 01092 { 01093 $element->removeAttributeNode( $attr ); 01094 } 01095 } 01096 } 01097 01098 function callInputHandler( $handlerName, $tagName, &$attributes ) 01099 { 01100 $result = null; 01101 $thisInputTag = $this->InputTags[$tagName]; 01102 if ( isset( $thisInputTag[$handlerName] ) ) 01103 { 01104 if ( is_callable( array( $this, $thisInputTag[$handlerName] ) ) ) 01105 { 01106 $result = call_user_func_array( array( $this, $thisInputTag[$handlerName] ), 01107 array( $tagName, &$attributes ) ); 01108 } 01109 else 01110 { 01111 eZDebug::writeWarning( "'$handlerName' input handler for tag <$tagName> doesn't exist: '" . $thisInputTag[$handlerName] . "'.", 'eZXML input parser' ); 01112 } 01113 } 01114 return $result; 01115 } 01116 01117 function callOutputHandler( $handlerName, $element, &$params ) 01118 { 01119 $result = null; 01120 $thisOutputTag = $this->OutputTags[$element->nodeName]; 01121 if ( isset( $thisOutputTag[$handlerName] ) ) 01122 { 01123 if ( is_callable( array( $this, $thisOutputTag[$handlerName] ) ) ) 01124 { 01125 $result = call_user_func_array( array( $this, $thisOutputTag[$handlerName] ), 01126 array( $element, &$params ) ); 01127 } 01128 else 01129 { 01130 eZDebug::writeWarning( "'$handlerName' output handler for tag <$element->nodeName> doesn't exist: '" . $thisOutputTag[$handlerName] . "'.", 'eZXML input parser' ); 01131 } 01132 } 01133 01134 return $result; 01135 } 01136 01137 // Creates new element and adds it to array for further post-processing. 01138 // Use this function if you need to process newly created element (check it by schema 01139 // and call 'structure' and 'publish' handlers) 01140 function createAndPublishElement( $elementName, &$ret ) 01141 { 01142 $element = $this->Document->createElement( $elementName ); 01143 $element->setAttributeNS( 'http://ez.no/namespaces/ezpublish3/temporary/', 'tmp:new-element', 'true' ); 01144 01145 if ( !isset( $ret['new_elements'] ) ) 01146 { 01147 $ret['new_elements'] = array(); 01148 } 01149 01150 $ret['new_elements'][] = $element; 01151 return $element; 01152 } 01153 01154 function processNewElements( $createdElements ) 01155 { 01156 // Call handlers for newly created elements 01157 foreach ( $createdElements as $element ) 01158 { 01159 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', 'processing new element ' . $element->nodeName ); 01160 $tmp = null; 01161 01162 if ( !$this->processBySchemaPresence( $element ) ) 01163 { 01164 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', $this->Document->saveXML(), 'xml string after processBySchemaPresence for new element ' . $element->nodeName ); 01165 continue; 01166 } 01167 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', $this->Document->saveXML(), 'xml string after processBySchemaPresence for new element ' . $element->nodeName ); 01168 01169 01170 // Call "Structure handler" 01171 $this->callOutputHandler( 'structHandler', $element, $tmp ); 01172 01173 if ( !$this->processBySchemaTree( $element ) ) 01174 { 01175 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', $this->Document->saveXML(), 'xml string after processBySchemaTree for new element ' . $element->nodeName ); 01176 continue; 01177 } 01178 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', $this->Document->saveXML(), 'xml string after processBySchemaTree for new element ' . $element->nodeName ); 01179 01180 01181 $tmp2 = null; 01182 // Call "Publish handler" 01183 $this->callOutputHandler( 'publishHandler', $element, $tmp2 ); 01184 eZDebugSetting::writeDebug( 'kernel-datatype-ezxmltext', $this->Document->saveXML(), 'xml string after callOutputHandler publishHandler for new element ' . $element->nodeName ); 01185 01186 // Process attributes according to the schema 01187 if( $element->hasAttributes() ) 01188 { 01189 if ( !$this->XMLSchema->hasAttributes( $element ) ) 01190 { 01191 eZXMLInputParser::removeAllAttributes( $element ); 01192 } 01193 else 01194 { 01195 $this->processAttributesBySchema( $element ); 01196 } 01197 } 01198 } 01199 } 01200 01201 /// \public 01202 function getMessages() 01203 { 01204 return $this->Messages; 01205 } 01206 01207 /// \public 01208 function isValid() 01209 { 01210 return $this->IsInputValid; 01211 } 01212 01213 function handleError( $type, $message ) 01214 { 01215 if ( $type & $this->DetectErrorLevel ) 01216 { 01217 $this->IsInputValid = false; 01218 if ( $message ) 01219 { 01220 $this->Messages[] = $message; 01221 } 01222 } 01223 01224 if ( $type & $this->ValidateErrorLevel ) 01225 { 01226 $this->IsInputValid = false; 01227 $this->QuitProcess = true; 01228 } 01229 } 01230 01231 public $DOMDocumentClass = 'DOMDOcument'; 01232 01233 public $XMLSchema; 01234 public $Document = null; 01235 public $Messages = array(); 01236 public $eZPublishVersion; 01237 01238 public $ParentStack = array(); 01239 01240 public $ValidateErrorLevel; 01241 public $DetectErrorLevel; 01242 01243 public $IsInputValid = true; 01244 public $QuitProcess = false; 01245 01246 // options that depend on settings 01247 public $TrimSpaces = true; 01248 public $AllowMultipleSpaces = false; 01249 public $AllowNumericEntities = false; 01250 public $StrictHeaders = false; 01251 01252 // options that depend on parameters passed 01253 public $ParseLineBreaks = false; 01254 public $RemoveDefaultAttrs = false; 01255 } 01256 ?>