|
eZ Publish
[trunk]
|
00001 <?php 00002 /** 00003 * File containing the eZXMLInputParser class. 00004 * 00005 * @copyright Copyright (C) 1999-2012 eZ Systems AS. All rights reserved. 00006 * @license http://www.gnu.org/licenses/gpl-2.0.txt GNU General Public License v2 00007 * @version //autogentag// 00008 * @package kernel 00009 */ 00010 00011 /* 00012 Base class for the input parser. 00013 The goal of the parser is XML/HTML analyzing, fixing and transforming. 00014 The input is processed in 2 passes: 00015 - 1st pass: Parsing input, check for syntax errors, build DOM tree. 00016 - 2nd pass: Walking through DOM tree, checking validity by XML schema, 00017 calling tag handlers to transform the tree. 00018 00019 Both passes are controlled by the arrays described bellow and user handler functions. 00020 00021 */ 00022 00023 // if ( !class_exists( 'eZXMLSchema' ) ) // AS 21-09-2007: commented out because of include_once being commented out 00024 class eZXMLInputParser 00025 { 00026 /// \deprecated (back-compatibility) 00027 const SHOW_NO_ERRORS = 0; 00028 const SHOW_SCHEMA_ERRORS = 1; 00029 const SHOW_ALL_ERRORS = 2; 00030 00031 /// Use these constants for error types 00032 const ERROR_NONE = 0; 00033 const ERROR_SYNTAX = 4; 00034 const ERROR_SCHEMA = 8; 00035 const ERROR_DATA = 16; 00036 const ERROR_ALL = 28; // 4+8+16 00037 00038 /* $InputTags array contains properties of elements that come from the input. 00039 00040 Each array element describes a tag that comes from the input. Arrays index is 00041 a tag's name. Each element is an array that may contain the following members: 00042 00043 'name' - a string representing a new name of the tag, 00044 'nameHandler' - a name of the function that returns new tag name. Function format: 00045 function tagNameHandler( $tagName, &$attributes ) 00046 00047 If no of those elements are defined the original tag's name is used. 00048 00049 'noChildren' - boolean value that determines if this tag could have child tags, 00050 default value is false. 00051 00052 Example: 00053 00054 public $InputTags = array( 00055 00056 'original-name' => array( 'name' => 'new-name' ), 00057 00058 'original-name2' => array( 'nameHandler' => 'tagNameHandler', 00059 'noChildren' => true ), 00060 00061 ... 00062 00063 ); 00064 */ 00065 00066 public $InputTags = array(); 00067 00068 /* 00069 $OutputTags array contains properties of elements that are produced in the output. 00070 Each array element describes a tag presented in the output. Arrays index is 00071 a tag's name. Each element is an array that may contain the following members: 00072 00073 'parsingHandler' - "Parsing handler" called at parse pass 1 before processing tag's children. 00074 'initHandler' - "Init handler" called at pass 2 before proccessing tag's children. 00075 'structHandler' - "Structure handler" called at pass 2 after proccessing tag's children, 00076 but before schema validity check. It can be used to implement structure 00077 transformations. 00078 'publishHandler' - "Publish handler" called at pass 2 after schema validity check, so it is called 00079 in case the element has it's guaranteed place in the DOM tree. 00080 00081 'attributes' - an array that describes attributes transformations. Array's index is the 00082 original name of an attribute, and the value is the new name. 00083 00084 'requiredInputAttributes' - attributes that are required in the input tag. If they are not presented 00085 it raises invalid input flag. 00086 00087 Example: 00088 00089 public $OutputTags = array( 00090 00091 'custom' => array( 'parsingHandler' => 'parsingHandlerCustom', 00092 'initHandler' => 'initHandlerCustom', 00093 'structHandler' => 'structHandlerCustom', 00094 'publishHandler' => 'publishHandlerCustom', 00095 'attributes' => array( 'title' => 'name' ) ), 00096 00097 ... 00098 ); 00099 00100 */ 00101 00102 public $OutputTags = array(); 00103 00104 public $Namespaces = array( 'image' => 'http://ez.no/namespaces/ezpublish3/image/', 00105 'xhtml' => 'http://ez.no/namespaces/ezpublish3/xhtml/', 00106 'custom' => 'http://ez.no/namespaces/ezpublish3/custom/', 00107 'tmp' => 'http://ez.no/namespaces/ezpublish3/temporary/' ); 00108 00109 /*! 00110 00111 The constructor. 00112 00113 \param $validate 00114 \param $validateErrorLevel Determines types of errors that break input processing 00115 It's possible to combine any error types, by creating a bitmask of EZ_XMLINPUTPARSER_ERROR_* constants. 00116 \c true value means that all errors defined by $detectErrorLevel parameter will break further processing 00117 \param $detectErrorLevel Determines types of errors that will be detected and added to error log ($Messages). 00118 */ 00119 00120 function eZXMLInputParser( $validateErrorLevel = self::ERROR_NONE, $detectErrorLevel = self::ERROR_NONE, $parseLineBreaks = false, 00121 $removeDefaultAttrs = false ) 00122 { 00123 // Back-compatibility fixes: 00124 if ( $detectErrorLevel === self::SHOW_SCHEMA_ERRORS ) 00125 { 00126 $detectErrorLevel = self::ERROR_SCHEMA; 00127 } 00128 elseif ( $detectErrorLevel === self::SHOW_ALL_ERRORS ) 00129 { 00130 $detectErrorLevel = self::ERROR_ALL; 00131 } 00132 00133 if ( $validateErrorLevel === false ) 00134 { 00135 $validateErrorLevel = self::ERROR_NONE; 00136 } 00137 elseif ( $validateErrorLevel === true ) 00138 { 00139 $validateErrorLevel = $detectErrorLevel; 00140 } 00141 00142 $this->ValidateErrorLevel = $validateErrorLevel; 00143 $this->DetectErrorLevel = $detectErrorLevel; 00144 00145 $this->RemoveDefaultAttrs = $removeDefaultAttrs; 00146 $this->ParseLineBreaks = $parseLineBreaks; 00147 00148 $this->XMLSchema = eZXMLSchema::instance(); 00149 00150 $this->eZPublishVersion = eZPublishSDK::majorVersion() + eZPublishSDK::minorVersion() * 0.1; 00151 00152 $ini = eZINI::instance( 'ezxml.ini' ); 00153 if ( $ini->hasVariable( 'InputSettings', 'TrimSpaces' ) ) 00154 { 00155 $trimSpaces = $ini->variable( 'InputSettings', 'TrimSpaces' ); 00156 $this->TrimSpaces = $trimSpaces == 'true' ? true : false; 00157 } 00158 00159 if ( $ini->hasVariable( 'InputSettings', 'AllowMultipleSpaces' ) ) 00160 { 00161 $allowMultipleSpaces = $ini->variable( 'InputSettings', 'AllowMultipleSpaces' ); 00162 $this->AllowMultipleSpaces = $allowMultipleSpaces == 'true' ? true : false; 00163 } 00164 00165 if ( $ini->hasVariable( 'InputSettings', 'AllowNumericEntities' ) ) 00166 { 00167 $allowNumericEntities = $ini->variable( 'InputSettings', 'AllowNumericEntities' ); 00168 $this->AllowNumericEntities = $allowNumericEntities == 'true' ? true : false; 00169 } 00170 00171 $contentIni = eZINI::instance( 'content.ini' ); 00172 $useStrictHeaderRule = $contentIni->variable( 'header', 'UseStrictHeaderRule' ); 00173 $this->StrictHeaders = $useStrictHeaderRule == 'true' ? true : false; 00174 } 00175 00176 /// \public 00177 function setDOMDocumentClass( $DOMDocumentClass ) 00178 { 00179 $this->DOMDocumentClass = $DOMDocumentClass; 00180 } 00181 00182 /// \public 00183 function setParseLineBreaks( $value ) 00184 { 00185 $this->ParseLineBreaks = $value; 00186 } 00187 00188 /// \public 00189 function setRemoveDefaultAttrs( $value ) 00190 { 00191 $this->RemoveDefaultAttrs = $value; 00192 } 00193 00194 /// \public 00195 function createRootNode() 00196 { 00197 if ( !$this->Document ) 00198 { 00199 $this->Document = new $this->DOMDocumentClass( '1.0', 'utf-8' ); 00200 } 00201 00202 // Creating root section with namespaces definitions 00203 $mainSection = $this->Document->createElement( 'section' ); 00204 $this->Document->appendChild( $mainSection ); 00205 foreach( array( 'image', 'xhtml', 'custom' ) as $prefix ) 00206 { 00207 $mainSection->setAttributeNS( 'http://www.w3.org/2000/xmlns/', 'xmlns:' . $prefix, $this->Namespaces[$prefix] ); 00208 } 00209 return $this->Document; 00210 } 00211 00212 /*! 00213 \public 00214 Call this function to process your input 00215 */ 00216 function process( $text, $createRootNode = true ) 00217 { 00218 $text = str_replace( "\r", '', $text); 00219 $text = str_replace( "\t", ' ', $text); 00220 // replace unicode chars that will break the XML validity 00221 // see http://www.w3.org/TR/REC-xml/#charsets 00222 $text = preg_replace( '/[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+/u', ' ', $text, -1, $count ); 00223 if ( $count > 0 ) 00224 { 00225 $this->Messages[] = ezpI18n::tr( 00226 'kernel/classes/datatypes/ezxmltext', 00227 "%count invalid character(s) have been found and replaced by a space", 00228 false, 00229 array( '%count' => $count ) 00230 ); 00231 } 00232 if ( !$this->ParseLineBreaks ) 00233 { 00234 $text = str_replace( "\n", '', $text); 00235 } 00236 00237 $this->Document = new $this->DOMDocumentClass( '1.0', 'utf-8' ); 00238 00239 if ( $createRootNode ) 00240 { 00241 $this->createRootNode(); 00242 } 00243 00244 // Perform pass 1 00245 // Parsing the source string 00246 $this->performPass1( $text ); 00247 00248 //$this->Document->formatOutput = true; 00249 $debug = eZDebugSetting::isConditionTrue( 'kernel-datatype-ezxmltext', eZDebug::LEVEL_DEBUG ); 00250 if ( $debug ) 00251 { 00252 eZDebug::writeDebug( $this->Document->saveXML(), eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', 'XML after pass 1' ) ); 00253 } 00254 00255 if ( $this->QuitProcess ) 00256 { 00257 return false; 00258 } 00259 00260 // Perform pass 2 00261 $this->performPass2(); 00262 00263 //$this->Document->formatOutput = true; 00264 if ( $debug ) 00265 { 00266 eZDebug::writeDebug( $this->Document->saveXML(), eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', 'XML after pass 2' ) ); 00267 } 00268 00269 if ( $this->QuitProcess ) 00270 { 00271 return false; 00272 } 00273 00274 return $this->Document; 00275 } 00276 00277 /* 00278 \public 00279 Pass 1: Parsing the source HTML string. 00280 */ 00281 00282 function performPass1( &$data ) 00283 { 00284 $ret = true; 00285 $pos = 0; 00286 00287 if ( $this->Document->documentElement ) 00288 { 00289 do 00290 { 00291 $this->parseTag( $data, $pos, $this->Document->documentElement ); 00292 if ( $this->QuitProcess ) 00293 { 00294 $ret = false; 00295 break; 00296 } 00297 00298 } 00299 while( $pos < strlen( $data ) ); 00300 } 00301 else 00302 { 00303 $tmp = null; 00304 $this->parseTag( $data, $pos, $tmp ); 00305 if ( $this->QuitProcess ) 00306 { 00307 $ret = false; 00308 } 00309 } 00310 return $ret; 00311 } 00312 00313 // The main recursive function for pass 1 00314 00315 function parseTag( &$data, &$pos, &$parent ) 00316 { 00317 // Find tag, determine it's type, name and attributes. 00318 $initialPos = $pos; 00319 00320 if ( $pos >= strlen( $data ) ) 00321 { 00322 return true; 00323 } 00324 $tagBeginPos = strpos( $data, '<', $pos ); 00325 00326 if ( $this->ParseLineBreaks ) 00327 { 00328 // Regard line break as a start tag position 00329 $lineBreakPos = strpos( $data, "\n", $pos ); 00330 if ( $lineBreakPos !== false ) 00331 { 00332 $tagBeginPos = $tagBeginPos === false ? $lineBreakPos : min( $tagBeginPos, $lineBreakPos ); 00333 } 00334 } 00335 00336 $tagName = ''; 00337 $attributes = null; 00338 // If it doesn't begin with '<' then its a text node. 00339 if ( $tagBeginPos != $pos || $tagBeginPos === false ) 00340 { 00341 $pos = $initialPos; 00342 $tagName = $newTagName = '#text'; 00343 $noChildren = true; 00344 00345 if ( !$tagBeginPos ) 00346 { 00347 $tagBeginPos = strlen( $data ); 00348 } 00349 00350 $textContent = substr( $data, $pos, $tagBeginPos - $pos ); 00351 00352 $textContent = $this->washText( $textContent ); 00353 00354 $pos = $tagBeginPos; 00355 if ( $textContent === '' ) 00356 { 00357 return false; 00358 } 00359 } 00360 // Process closing tag. 00361 elseif ( $data[$tagBeginPos] == '<' && $tagBeginPos + 1 < strlen( $data ) && 00362 $data[$tagBeginPos + 1] == '/' ) 00363 { 00364 $tagEndPos = strpos( $data, '>', $tagBeginPos + 1 ); 00365 if ( $tagEndPos === false ) 00366 { 00367 $pos = $tagBeginPos + 1; 00368 00369 $this->handleError( self::ERROR_SYNTAX, ezpI18n::tr( 'kernel/classes/datatypes/ezxmltext', 'Wrong closing tag' ) ); 00370 return false; 00371 } 00372 00373 $pos = $tagEndPos + 1; 00374 $closedTagName = strtolower( trim( substr( $data, $tagBeginPos + 2, $tagEndPos - $tagBeginPos - 2 ) ) ); 00375 00376 // Find matching tag in ParentStack array 00377 $firstLoop = true; 00378 for( $i = count( $this->ParentStack ) - 1; $i >= 0; $i-- ) 00379 { 00380 $parentNames = $this->ParentStack[$i]; 00381 if ( $parentNames[0] == $closedTagName ) 00382 { 00383 array_pop( $this->ParentStack ); 00384 if ( !$firstLoop ) 00385 { 00386 $pos = $tagBeginPos; 00387 return true; 00388 } 00389 // If newTagName was '' we don't break children loop 00390 elseif ( $parentNames[1] !== '' ) 00391 { 00392 return true; 00393 } 00394 else 00395 { 00396 return false; 00397 } 00398 } 00399 $firstLoop = false; 00400 } 00401 00402 $this->handleError( self::ERROR_SYNTAX, ezpI18n::tr( 'kernel/classes/datatypes/ezxmltext', 'Wrong closing tag : </%1>.', false, array( $closedTagName ) ) ); 00403 00404 return false; 00405 } 00406 // Insert <br/> instead of linebreaks 00407 elseif ( $this->ParseLineBreaks && $data[$tagBeginPos] == "\n" ) 00408 { 00409 $newTagName = 'br'; 00410 $noChildren = true; 00411 $pos = $tagBeginPos + 1; 00412 } 00413 // Regular tag: get tag's name and attributes. 00414 else 00415 { 00416 $tagEndPos = strpos( $data, '>', $tagBeginPos ); 00417 if ( $tagEndPos === false ) 00418 { 00419 $pos = $tagBeginPos + 1; 00420 00421 $this->handleError( self::ERROR_SYNTAX, ezpI18n::tr( 'kernel/classes/datatypes/ezxmltext', 'Wrong opening tag' ) ); 00422 return false; 00423 } 00424 00425 $pos = $tagEndPos + 1; 00426 $tagString = substr( $data, $tagBeginPos + 1, $tagEndPos - $tagBeginPos - 1 ); 00427 // Check for final backslash 00428 $noChildren = substr( $tagString, -1, 1 ) == '/' ? true : false; 00429 // Remove final backslash and spaces 00430 $tagString = preg_replace( "/\s*\/$/", "", $tagString ); 00431 00432 $firstSpacePos = strpos( $tagString, ' ' ); 00433 if ( $firstSpacePos === false ) 00434 { 00435 $tagName = strtolower( trim( $tagString ) ); 00436 $attributeString = ''; 00437 } 00438 else 00439 { 00440 $tagName = strtolower( substr( $tagString, 0, $firstSpacePos ) ); 00441 $attributeString = substr( $tagString, $firstSpacePos + 1 ); 00442 $attributeString = trim( $attributeString ); 00443 // Parse attribute string 00444 if ( $attributeString ) 00445 { 00446 $attributes = $this->parseAttributes( $attributeString ); 00447 } 00448 } 00449 00450 // Determine tag's name 00451 if ( isset( $this->InputTags[$tagName] ) ) 00452 { 00453 $thisInputTag = $this->InputTags[$tagName]; 00454 00455 if ( isset( $thisInputTag['name'] ) ) 00456 { 00457 $newTagName = $thisInputTag['name']; 00458 } 00459 else 00460 { 00461 $newTagName = $this->callInputHandler( 'nameHandler', $tagName, $attributes ); 00462 } 00463 } 00464 else 00465 { 00466 if ( $this->XMLSchema->exists( $tagName ) ) 00467 { 00468 $newTagName = $tagName; 00469 } 00470 else 00471 { 00472 $this->handleError( self::ERROR_SYNTAX, ezpI18n::tr( 'kernel/classes/datatypes/ezxmltext', 'Unknown tag: <%1>.', false, array( $tagName ) ) ); 00473 return false; 00474 } 00475 } 00476 00477 // Check 'noChildren' property 00478 if ( isset( $thisInputTag['noChildren'] ) ) 00479 { 00480 $noChildren = true; 00481 } 00482 00483 $thisOutputTag = isset( $this->OutputTags[$newTagName] ) ? $this->OutputTags[$newTagName] : null; 00484 00485 // Implementation of 'autoCloseOn' rule ( Handling of unclosed tags, ex.: <p>, <li> ) 00486 if ( isset( $thisOutputTag['autoCloseOn'] ) && 00487 $parent && 00488 $parent->parentNode instanceof DOMElement && 00489 in_array( $parent->nodeName, $thisOutputTag['autoCloseOn'] ) ) 00490 { 00491 // Wrong nesting: auto-close parent and try to re-parse this tag at higher level 00492 array_pop( $this->ParentStack ); 00493 $pos = $tagBeginPos; 00494 return true; 00495 } 00496 00497 // Append to parent stack 00498 if ( !$noChildren && $newTagName !== false ) 00499 { 00500 $this->ParentStack[] = array( $tagName, $newTagName, $attributeString ); 00501 } 00502 00503 if ( !$newTagName ) 00504 { 00505 // If $newTagName is an empty string then it's not a error 00506 if ( $newTagName === false ) 00507 $this->handleError( self::ERROR_SYNTAX, ezpI18n::tr( 'kernel/classes/datatypes/ezxmltext', "Can't convert tag's name: <%1>.", false, array( $tagName ) ) ); 00508 00509 return false; 00510 } 00511 00512 // wordmatch.ini support 00513 if ( $attributeString ) 00514 { 00515 $attributes = $this->wordMatchSupport( $newTagName, $attributes, $attributeString ); 00516 } 00517 } 00518 00519 // Create text or normal node. 00520 if ( $newTagName == '#text' ) 00521 { 00522 $element = $this->Document->createTextNode( $textContent ); 00523 } 00524 else 00525 { 00526 $element = $this->Document->createElement( $newTagName ); 00527 } 00528 00529 if ( $attributes ) 00530 { 00531 $this->setAttributes( $element, $attributes ); 00532 } 00533 00534 // Append element as a child or set it as root if there is no parent. 00535 if ( $parent ) 00536 { 00537 $parent->appendChild( $element ); 00538 } 00539 else 00540 { 00541 $this->Document->appendChild( $element ); 00542 } 00543 00544 $params = array(); 00545 $params[] =& $data; 00546 $params[] =& $pos; 00547 $params[] =& $tagBeginPos; 00548 $result = $this->callOutputHandler( 'parsingHandler', $element, $params ); 00549 00550 if ( $result === false ) 00551 { 00552 // This tag is already parsed in handler 00553 if ( !$noChildren ) 00554 { 00555 array_pop( $this->ParentStack ); 00556 } 00557 return false; 00558 } 00559 00560 if ( $this->QuitProcess ) 00561 { 00562 return false; 00563 } 00564 00565 // Process children 00566 if ( !$noChildren ) 00567 { 00568 do 00569 { 00570 $parseResult = $this->parseTag( $data, $pos, $element ); 00571 if ( $this->QuitProcess ) 00572 { 00573 return false; 00574 } 00575 } 00576 while( $parseResult !== true ); 00577 } 00578 00579 return false; 00580 } 00581 00582 /* 00583 Helper functions for pass 1 00584 */ 00585 00586 function parseAttributes( $attributeString ) 00587 { 00588 $attributes = array(); 00589 // Valid characters for XML attributes 00590 // @see http://www.w3.org/TR/xml/#NT-Name 00591 $nameStartChar = ':A-Z_a-z\\xC0-\\xD6\\xD8-\\xF6\\xF8-\\x{2FF}\\x{370}-\\x{37D}\\x{37F}-\\x{1FFF}\\x{200C}-\\x{200D}\\x{2070}-\\x{218F}\\x{2C00}-\\x{2FEF}\\x{3001}-\\x{D7FF}\\x{F900}-\\x{FDCF}\\x{FDF0}-\\x{FFFD}\\x{10000}-\\x{EFFFF}'; 00592 if ( 00593 preg_match_all( 00594 "/\s+([$nameStartChar][$nameStartChar\-.0-9\\xB7\\x{0300}-\\x{036F}\\x{203F}-\\x{2040}]*)\s*=\s*(?:(?:\"([^\"]+?)\")|(?:'([^']+?)')|(?: *([^\"'\s]+)\s*))/u", 00595 " " . $attributeString, 00596 $attributeArray, 00597 PREG_SET_ORDER 00598 ) 00599 ) { 00600 foreach ( $attributeArray as $attribute ) 00601 { 00602 // Value will always be at the last position 00603 $value = trim( array_pop( $attribute ) ); 00604 // Value of '0' is valid ( eg. border='0' ) 00605 if ( $value !== '' && $value !== false && $value !== null ) 00606 { 00607 $attributes[strtolower( $attribute[1] )] = $value; 00608 } 00609 } 00610 } 00611 00612 return $attributes; 00613 } 00614 00615 function setAttributes( $element, $attributes ) 00616 { 00617 $thisOutputTag = $this->OutputTags[$element->nodeName]; 00618 00619 foreach( $attributes as $key => $value ) 00620 { 00621 // Convert attribute names 00622 if ( isset( $thisOutputTag['attributes'] ) && 00623 isset( $thisOutputTag['attributes'][$key] ) ) 00624 { 00625 $qualifiedName = $thisOutputTag['attributes'][$key]; 00626 } 00627 else 00628 { 00629 $qualifiedName = $key; 00630 } 00631 00632 // Filter classes 00633 if ( $qualifiedName == 'class' ) 00634 { 00635 $classesList = $this->XMLSchema->getClassesList( $element->nodeName ); 00636 if ( !in_array( $value, $classesList ) ) 00637 { 00638 $this->handleError( self::ERROR_DATA, 00639 ezpI18n::tr( 'kernel/classes/datatypes/ezxmltext', "Class '%1' is not allowed for element <%2> (check content.ini).", 00640 false, array( $value, $element->nodeName ) ) ); 00641 continue; 00642 } 00643 } 00644 00645 // Create attribute nodes 00646 if ( $qualifiedName ) 00647 { 00648 if ( strpos( $qualifiedName, ':' ) ) 00649 { 00650 list( $prefix, $name ) = explode( ':', $qualifiedName ); 00651 if ( isset( $this->Namespaces[$prefix] ) ) 00652 { 00653 $URI = $this->Namespaces[$prefix]; 00654 $element->setAttributeNS( $URI, $qualifiedName, $value ); 00655 } 00656 else 00657 { 00658 eZDebug::writeWarning( "No namespace defined for prefix '$prefix'.", 'eZXML input parser' ); 00659 } 00660 } 00661 else 00662 { 00663 $element->setAttribute( $qualifiedName, $value ); 00664 } 00665 } 00666 } 00667 00668 // Check for required attrs are present 00669 if ( isset( $this->OutputTags[$element->nodeName]['requiredInputAttributes'] ) ) 00670 { 00671 foreach( $this->OutputTags[$element->nodeName]['requiredInputAttributes'] as $reqAttrName ) 00672 { 00673 $presented = false; 00674 foreach( $attributes as $key => $value ) 00675 { 00676 if ( $key == $reqAttrName ) 00677 { 00678 $presented = true; 00679 break; 00680 } 00681 } 00682 if ( !$presented ) 00683 { 00684 $this->handleError( self::ERROR_SCHEMA, 00685 ezpI18n::tr( 'kernel/classes/datatypes/ezxmltext', "Required attribute '%1' is not presented in tag <%2>.", 00686 false, array( $reqAttrName, $element->nodeName ) ) ); 00687 } 00688 } 00689 } 00690 } 00691 00692 function washText( $textContent ) 00693 { 00694 $textContent = $this->entitiesDecode( $textContent ); 00695 00696 if ( !$this->AllowNumericEntities ) 00697 { 00698 $textContent = $this->convertNumericEntities( $textContent ); 00699 } 00700 00701 if ( !$this->AllowMultipleSpaces ) 00702 { 00703 $textContent = preg_replace( "/ {2,}/", " ", $textContent ); 00704 } 00705 00706 return $textContent; 00707 } 00708 00709 function entitiesDecode( $text ) 00710 { 00711 $text = str_replace( ''', "'", $text ); 00712 00713 $text = str_replace( '>', '>', $text ); 00714 $text = str_replace( '<', '<', $text ); 00715 $text = str_replace( ''', "'", $text ); 00716 $text = str_replace( '"', '"', $text ); 00717 $text = str_replace( '&', '&', $text ); 00718 return $text; 00719 } 00720 00721 function convertNumericEntities( $text ) 00722 { 00723 if ( strlen( $text ) < 4 ) 00724 { 00725 return $text; 00726 } 00727 // Convert other HTML entities to the current charset characters. 00728 $codec = eZTextCodec::instance( 'unicode', false ); 00729 $pos = 0; 00730 $domString = ""; 00731 while ( $pos < strlen( $text ) - 1 ) 00732 { 00733 $startPos = $pos; 00734 while( !( $text[$pos] == '&' && $text[$pos + 1] == '#' ) && $pos < strlen( $text ) - 1 ) 00735 { 00736 $pos++; 00737 } 00738 00739 $domString .= substr( $text, $startPos, $pos - $startPos ); 00740 00741 if ( $pos < strlen( $text ) - 1 ) 00742 { 00743 $endPos = strpos( $text, ';', $pos + 2 ); 00744 if ( $endPos === false ) 00745 { 00746 $convertedText .= '&#'; 00747 $pos += 2; 00748 continue; 00749 } 00750 00751 $code = substr( $text, $pos + 2, $endPos - ( $pos + 2 ) ); 00752 $char = $codec->convertString( array( $code ) ); 00753 00754 $pos = $endPos + 1; 00755 $domString .= $char; 00756 } 00757 else 00758 { 00759 $domString .= substr( $text, $pos, 2 ); 00760 } 00761 } 00762 return $domString; 00763 } 00764 00765 /*! 00766 Returns modified attributes parameter 00767 */ 00768 protected function wordMatchSupport( $newTagName, $attributes, $attributeString ) 00769 { 00770 $ini = eZINI::instance( 'wordmatch.ini' ); 00771 if ( $ini->hasVariable( $newTagName, 'MatchString' ) ) 00772 { 00773 $matchArray = $ini->variable( $newTagName, 'MatchString' ); 00774 if ( $matchArray ) 00775 { 00776 foreach ( array_keys( $matchArray ) as $key ) 00777 { 00778 $matchString = $matchArray[$key]; 00779 if ( preg_match( "/$matchString/i", $attributeString ) ) 00780 { 00781 $attributes['class'] = $key; 00782 unset( $attributes['style'] ); 00783 } 00784 } 00785 } 00786 } 00787 return $attributes; 00788 } 00789 00790 00791 /*! 00792 \public 00793 Pass 2: Process the tree, run handlers, rebuild and validate. 00794 */ 00795 00796 function performPass2() 00797 { 00798 $tmp = null; 00799 00800 $this->processSubtree( $this->Document->documentElement, $tmp ); 00801 } 00802 00803 // main recursive function for pass 2 00804 00805 function processSubtree( $element, &$lastHandlerResult ) 00806 { 00807 $ret = null; 00808 $tmp = null; 00809 00810 // Call "Init handler" 00811 $this->callOutputHandler( 'initHandler', $element, $tmp ); 00812 00813 $debug = eZDebugSetting::isConditionTrue( 'kernel-datatype-ezxmltext', eZDebug::LEVEL_DEBUG ); 00814 00815 // Process children 00816 if ( $element->hasChildNodes() ) 00817 { 00818 // Make another copy of children to save primary structure 00819 $childNodes = $element->childNodes; 00820 $childrenCount = $childNodes->length; 00821 00822 // we can not loop directly over the childNodes property, because this will change while we are working on it's parent's children 00823 $children = array(); 00824 foreach ( $childNodes as $childNode ) 00825 { 00826 $children[] = $childNode; 00827 } 00828 00829 $lastResult = null; 00830 $newElements = array(); 00831 foreach ( $children as $child ) 00832 { 00833 if ( $debug ) 00834 { 00835 eZDebug::writeDebug( 'processing children, current child: ' . $child->nodeName, eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', __METHOD__ ) ); 00836 } 00837 00838 $childReturn = $this->processSubtree( $child, $lastResult ); 00839 00840 unset( $lastResult ); 00841 if ( isset( $childReturn['result'] ) ) 00842 { 00843 if ( $debug ) 00844 { 00845 eZDebug::writeDebug( 'return result is set for child ' . $child->nodeName, eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', __METHOD__ ) ); 00846 } 00847 00848 $lastResult = $childReturn['result']; 00849 } 00850 00851 if ( isset( $childReturn['new_elements'] ) ) 00852 { 00853 $newElements = array_merge( $newElements, $childReturn['new_elements'] ); 00854 } 00855 00856 if ( $this->QuitProcess ) 00857 { 00858 return $ret; 00859 } 00860 } 00861 00862 if ( $debug ) 00863 { 00864 eZDebug::writeDebug( $this->Document->saveXML(), 00865 eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', 00866 'XML before processNewElements for element ' . $element->nodeName ) ); 00867 } 00868 00869 // process elements created in children handlers 00870 $this->processNewElements( $newElements ); 00871 00872 if ( $debug ) 00873 { 00874 eZDebug::writeDebug( $this->Document->saveXML(), 00875 eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', 00876 'XML after processNewElements for element ' . $element->nodeName ) ); 00877 } 00878 } 00879 00880 // Call "Structure handler" 00881 if ( $debug ) 00882 { 00883 eZDebug::writeDebug( $this->Document->saveXML(), 00884 eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', 00885 'XML before callOutputHandler structHandler for element ' . $element->nodeName ) ); 00886 } 00887 00888 $ret = $this->callOutputHandler( 'structHandler', $element, $lastHandlerResult ); 00889 00890 if ( $debug ) 00891 { 00892 eZDebug::writeDebug( $this->Document->saveXML(), 00893 eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', 00894 'XML after callOutputHandler structHandler for element ' . $element->nodeName ) ); 00895 eZDebug::writeDebug( $ret, 00896 eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', 00897 'return value of callOutputHandler structHandler for element ' . $element->nodeName ) ); 00898 } 00899 00900 // Process by schema (check if element is allowed to exist) 00901 if ( !$this->processBySchemaPresence( $element ) ) 00902 { 00903 if ( $debug ) 00904 { 00905 eZDebug::writeDebug( $this->Document->saveXML(), 00906 eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', 00907 'XML after failed processBySchemaPresence for element ' . $element->nodeName ) ); 00908 } 00909 return $ret; 00910 } 00911 00912 if ( $debug ) 00913 { 00914 eZDebug::writeDebug( $this->Document->saveXML(), 00915 eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', 00916 'XML after processBySchemaPresence for element ' . $element->nodeName ) ); 00917 } 00918 00919 // Process by schema (check place in the tree) 00920 if ( !$this->processBySchemaTree( $element ) ) 00921 { 00922 if ( $debug ) 00923 { 00924 eZDebug::writeDebug( $this->Document->saveXML(), 00925 eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', 00926 'XML after failed processBySchemaTree for element ' . $element->nodeName ) ); 00927 } 00928 return $ret; 00929 } 00930 00931 if ( $debug ) 00932 { 00933 eZDebug::writeDebug( $this->Document->saveXML(), 00934 eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', 00935 'XML after processBySchemaTree for element ' . $element->nodeName ) ); 00936 } 00937 00938 00939 $tmp = null; 00940 // Call "Publish handler" 00941 $this->callOutputHandler( 'publishHandler', $element, $tmp ); 00942 00943 // Process attributes according to the schema 00944 if ( $element->hasAttributes() ) 00945 { 00946 if ( !$this->XMLSchema->hasAttributes( $element ) ) 00947 { 00948 eZXMLInputParser::removeAllAttributes( $element ); 00949 } 00950 else 00951 { 00952 $this->processAttributesBySchema( $element ); 00953 } 00954 } 00955 return $ret; 00956 } 00957 /* 00958 Helper functions for pass 2 00959 */ 00960 00961 /*! 00962 Removes all attribute nodes from element node $element 00963 */ 00964 function removeAllAttributes( DOMElement $element ) 00965 { 00966 $attribs = $element->attributes; 00967 for ( $i = $attribs->length - 1; $i >= 0; $i-- ) 00968 { 00969 $element->removeAttributeNode( $attribs->item( $i ) ); 00970 } 00971 } 00972 00973 // Check if the element is allowed to exist in this document and remove it if not. 00974 function processBySchemaPresence( $element ) 00975 { 00976 $parent = $element->parentNode; 00977 if ( $parent instanceof DOMElement ) 00978 { 00979 // If this is a foreign element, remove it 00980 if ( !$this->XMLSchema->exists( $element ) ) 00981 { 00982 if ( $element->nodeName == 'custom' ) 00983 { 00984 $this->handleError( self::ERROR_SCHEMA, 00985 ezpI18n::tr( 'kernel/classes/datatypes/ezxmltext', "Custom tag '%1' is not allowed.", 00986 false, array( $element->getAttribute( 'name' ) ) ) ); 00987 } 00988 $element = $parent->removeChild( $element ); 00989 return false; 00990 } 00991 00992 // Delete if children required and no children 00993 // If this is an auto-added element, then do not throw error 00994 00995 if ( $element->nodeType == XML_ELEMENT_NODE && ( $this->XMLSchema->childrenRequired( $element ) || $element->getAttribute( 'children_required' ) ) 00996 && !$element->hasChildNodes() ) 00997 { 00998 $element = $parent->removeChild( $element ); 00999 if ( !$element->getAttributeNS( 'http://ez.no/namespaces/ezpublish3/temporary/', 'new-element' ) ) 01000 { 01001 $this->handleError( self::ERROR_SCHEMA, ezpI18n::tr( 'kernel/classes/datatypes/ezxmltext', "<%1> tag can't be empty.", 01002 false, array( $element->nodeName ) ) ); 01003 return false; 01004 } 01005 } 01006 } 01007 // TODO: break processing of any node that doesn't have parent 01008 // and is not a root node. 01009 elseif ( $element->nodeName != 'section' ) 01010 { 01011 return false; 01012 } 01013 return true; 01014 } 01015 01016 // Check that element has a correct position in the tree and fix it if not. 01017 function processBySchemaTree( $element ) 01018 { 01019 $parent = $element->parentNode; 01020 01021 if ( $parent instanceof DOMElement ) 01022 { 01023 $schemaCheckResult = $this->XMLSchema->check( $parent, $element ); 01024 if ( !$schemaCheckResult ) 01025 { 01026 if ( $schemaCheckResult === false ) 01027 { 01028 // Remove indenting spaces 01029 if ( $element->nodeType == XML_TEXT_NODE && !trim( $element->textContent ) ) 01030 { 01031 $element = $parent->removeChild( $element ); 01032 return false; 01033 } 01034 01035 $elementName = $element->nodeType == XML_ELEMENT_NODE ? '<' . $element->nodeName . '>' : $element->nodeName; 01036 $this->handleError( self::ERROR_SCHEMA, ezpI18n::tr( 'kernel/classes/datatypes/ezxmltext', "%1 is not allowed to be a child of <%2>.", 01037 false, array( $elementName, $parent->nodeName ) ) ); 01038 } 01039 $this->fixSubtree( $element, $element ); 01040 return false; 01041 } 01042 } 01043 // TODO: break processing of any node that doesn't have parent 01044 // and is not a root node. 01045 elseif ( $element->nodeName != 'section' ) 01046 { 01047 return false; 01048 } 01049 return true; 01050 } 01051 01052 // Remove only nodes that don't match schema (recursively) 01053 function fixSubtree( $element, $mainChild ) 01054 { 01055 $parent = $element->parentNode; 01056 $mainParent = $mainChild->parentNode; 01057 while ( $element->hasChildNodes() ) 01058 { 01059 $child = $element->firstChild; 01060 01061 $child = $element->removeChild( $child ); 01062 $child = $mainParent->insertBefore( $child, $mainChild ); 01063 01064 if ( !$this->XMLSchema->check( $mainParent, $child ) ) 01065 { 01066 $this->fixSubtree( $child, $mainChild ); 01067 } 01068 } 01069 $parent->removeChild( $element ); 01070 } 01071 01072 function processAttributesBySchema( $element ) 01073 { 01074 // Remove attributes that don't match schema 01075 $schemaAttributes = $this->XMLSchema->attributes( $element ); 01076 $schemaCustomAttributes = $this->XMLSchema->customAttributes( $element ); 01077 01078 $attributes = $element->attributes; 01079 01080 for ( $i = $attributes->length - 1; $i >=0; $i-- ) 01081 { 01082 $attr = $attributes->item( $i ); 01083 if ( $attr->prefix == 'tmp' ) 01084 { 01085 $element->removeAttributeNode( $attr ); 01086 continue; 01087 } 01088 01089 $allowed = false; 01090 $removeAttr = false; 01091 01092 $fullName = $attr->prefix ? $attr->prefix . ':' . $attr->localName : $attr->nodeName; 01093 01094 // check for allowed custom attributes (3.9) 01095 if ( $attr->prefix == 'custom' && in_array( $attr->localName, $schemaCustomAttributes ) ) 01096 { 01097 $allowed = true; 01098 } 01099 else 01100 { 01101 if ( in_array( $fullName, $schemaAttributes ) ) 01102 { 01103 $allowed = true; 01104 } 01105 elseif ( in_array( $fullName, $schemaCustomAttributes ) ) 01106 { 01107 // add 'custom' prefix if it is not given 01108 $allowed = true; 01109 $removeAttr = true; 01110 $element->setAttributeNS( $this->Namespaces['custom'], 'custom:' . $fullName, $attr->value ); 01111 } 01112 } 01113 01114 if ( !$allowed ) 01115 { 01116 $removeAttr = true; 01117 $this->handleError( self::ERROR_SCHEMA, 01118 ezpI18n::tr( 'kernel/classes/datatypes/ezxmltext', "Attribute '%1' is not allowed in <%2> element.", 01119 false, array( $fullName, $element->nodeName ) ) ); 01120 } 01121 elseif ( $this->RemoveDefaultAttrs ) 01122 { 01123 // Remove attributes having default values 01124 $default = $this->XMLSchema->attrDefaultValue( $element->nodeName, $fullName ); 01125 if ( $attr->value == $default ) 01126 { 01127 $removeAttr = true; 01128 } 01129 } 01130 01131 if ( $removeAttr ) 01132 { 01133 $element->removeAttributeNode( $attr ); 01134 } 01135 } 01136 } 01137 01138 function callInputHandler( $handlerName, $tagName, &$attributes ) 01139 { 01140 $result = null; 01141 $thisInputTag = $this->InputTags[$tagName]; 01142 if ( isset( $thisInputTag[$handlerName] ) ) 01143 { 01144 if ( is_callable( array( $this, $thisInputTag[$handlerName] ) ) ) 01145 { 01146 $result = call_user_func_array( array( $this, $thisInputTag[$handlerName] ), 01147 array( $tagName, &$attributes ) ); 01148 } 01149 else 01150 { 01151 eZDebug::writeWarning( "'$handlerName' input handler for tag <$tagName> doesn't exist: '" . $thisInputTag[$handlerName] . "'.", 'eZXML input parser' ); 01152 } 01153 } 01154 return $result; 01155 } 01156 01157 function callOutputHandler( $handlerName, $element, &$params ) 01158 { 01159 $result = null; 01160 $thisOutputTag = $this->OutputTags[$element->nodeName]; 01161 if ( isset( $thisOutputTag[$handlerName] ) ) 01162 { 01163 if ( is_callable( array( $this, $thisOutputTag[$handlerName] ) ) ) 01164 { 01165 $result = call_user_func_array( array( $this, $thisOutputTag[$handlerName] ), 01166 array( $element, &$params ) ); 01167 } 01168 else 01169 { 01170 eZDebug::writeWarning( "'$handlerName' output handler for tag <$element->nodeName> doesn't exist: '" . $thisOutputTag[$handlerName] . "'.", 'eZXML input parser' ); 01171 } 01172 } 01173 01174 return $result; 01175 } 01176 01177 // Creates new element and adds it to array for further post-processing. 01178 // Use this function if you need to process newly created element (check it by schema 01179 // and call 'structure' and 'publish' handlers) 01180 function createAndPublishElement( $elementName, &$ret ) 01181 { 01182 $element = $this->Document->createElement( $elementName ); 01183 $element->setAttributeNS( 'http://ez.no/namespaces/ezpublish3/temporary/', 'tmp:new-element', 'true' ); 01184 01185 if ( !isset( $ret['new_elements'] ) ) 01186 { 01187 $ret['new_elements'] = array(); 01188 } 01189 01190 $ret['new_elements'][] = $element; 01191 return $element; 01192 } 01193 01194 function processNewElements( $createdElements ) 01195 { 01196 $debug = eZDebugSetting::isConditionTrue( 'kernel-datatype-ezxmltext', eZDebug::LEVEL_DEBUG ); 01197 // Call handlers for newly created elements 01198 foreach ( $createdElements as $element ) 01199 { 01200 if ( $debug ) 01201 { 01202 eZDebug::writeDebug( 'processing new element ' . $element->nodeName, eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext' ) ); 01203 } 01204 01205 $tmp = null; 01206 if ( !$this->processBySchemaPresence( $element ) ) 01207 { 01208 if ( $debug ) 01209 { 01210 eZDebug::writeDebug( $this->Document->saveXML(), 01211 eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', 01212 'xml string after failed processBySchemaPresence for new element ' . $element->nodeName ) ); 01213 } 01214 continue; 01215 } 01216 01217 if ( $debug ) 01218 { 01219 eZDebug::writeDebug( $this->Document->saveXML(), 01220 eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', 01221 'xml string after processBySchemaPresence for new element ' . $element->nodeName ) ); 01222 } 01223 01224 01225 // Call "Structure handler" 01226 $this->callOutputHandler( 'structHandler', $element, $tmp ); 01227 01228 if ( !$this->processBySchemaTree( $element ) ) 01229 { 01230 if ( $debug ) 01231 { 01232 eZDebug::writeDebug( $this->Document->saveXML(), 01233 eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', 01234 'xml string after failed processBySchemaTree for new element ' . $element->nodeName ) ); 01235 } 01236 continue; 01237 } 01238 01239 if ( $debug ) 01240 { 01241 eZDebug::writeDebug( $this->Document->saveXML(), 01242 eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', 01243 'xml string after processBySchemaTree for new element ' . $element->nodeName ) ); 01244 } 01245 01246 01247 $tmp2 = null; 01248 // Call "Publish handler" 01249 $this->callOutputHandler( 'publishHandler', $element, $tmp2 ); 01250 01251 if ( $debug ) 01252 { 01253 eZDebug::writeDebug( $this->Document->saveXML(), 01254 eZDebugSetting::changeLabel( 'kernel-datatype-ezxmltext', 01255 'xml string after callOutputHandler publishHandler for new element ' . $element->nodeName ) ); 01256 } 01257 01258 // Process attributes according to the schema 01259 if( $element->hasAttributes() ) 01260 { 01261 if ( !$this->XMLSchema->hasAttributes( $element ) ) 01262 { 01263 eZXMLInputParser::removeAllAttributes( $element ); 01264 } 01265 else 01266 { 01267 $this->processAttributesBySchema( $element ); 01268 } 01269 } 01270 } 01271 } 01272 01273 /// \public 01274 function getMessages() 01275 { 01276 return $this->Messages; 01277 } 01278 01279 /// \public 01280 function isValid() 01281 { 01282 return $this->IsInputValid; 01283 } 01284 01285 function handleError( $type, $message ) 01286 { 01287 if ( $type & $this->DetectErrorLevel ) 01288 { 01289 $this->IsInputValid = false; 01290 if ( $message ) 01291 { 01292 $this->Messages[] = $message; 01293 } 01294 } 01295 01296 if ( $type & $this->ValidateErrorLevel ) 01297 { 01298 $this->IsInputValid = false; 01299 $this->QuitProcess = true; 01300 } 01301 } 01302 01303 public $DOMDocumentClass = 'DOMDocument'; 01304 01305 public $XMLSchema; 01306 public $Document = null; 01307 public $Messages = array(); 01308 public $eZPublishVersion; 01309 01310 public $ParentStack = array(); 01311 01312 public $ValidateErrorLevel; 01313 public $DetectErrorLevel; 01314 01315 public $IsInputValid = true; 01316 public $QuitProcess = false; 01317 01318 // options that depend on settings 01319 public $TrimSpaces = true; 01320 public $AllowMultipleSpaces = false; 01321 public $AllowNumericEntities = false; 01322 public $StrictHeaders = false; 01323 01324 // options that depend on parameters passed 01325 public $ParseLineBreaks = false; 01326 public $RemoveDefaultAttrs = false; 01327 } 01328 ?>