00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049 include_once( "lib/ezutils/classes/ezdebug.php" );
00050 include_once( "lib/ezxml/classes/ezdomnode.php" );
00051 include_once( "lib/ezxml/classes/ezdomdocument.php" );
00052
00053 define( "EZ_NODE_TYPE_ELEMENT", 1 );
00054 define( "EZ_NODE_TYPE_ATTRIBUTE", 2 );
00055 define( "EZ_NODE_TYPE_TEXT", 3 );
00056 define( "EZ_NODE_TYPE_CDATASECTION", 4 );
00057
00058 class eZXML
00059 {
00060
00061
00062
00063 function eZXML( )
00064 {
00065
00066 }
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076 function &domTree( $xmlDoc, $params = array(), $native = false )
00077 {
00078 if ( !$xmlDoc )
00079 {
00080 $tmp = null;
00081 return $tmp;
00082 }
00083
00084
00085
00086
00087
00088
00089
00090 $xmlDoc = preg_replace('/[\x00-\x08\x0b-\x0c\x0e-\x1f]/', '', $xmlDoc);
00091
00092 if ( $native and function_exists( 'domxml_open_mem' ) )
00093 {
00094 $domDocument = domxml_open_mem( $xmlDoc );
00095 return $domDocument;
00096 }
00097
00098 if ( !isset( $params["TrimWhiteSpace"] ) )
00099 $params["TrimWhiteSpace"] = true;
00100
00101 if ( !isset( $params["SetParentNode"] ) )
00102 $params["SetParentNode"] = false;
00103
00104 $schema = false;
00105 if ( isset( $params["Schema"] ) && get_class( $params["Schema"] ) == "ezschema" )
00106 {
00107 $schema = $params["Schema"];
00108 }
00109 $charset = 'UTF-8';
00110 if ( isset( $params['CharsetConversion'] ) and
00111 !$params['CharsetConversion'] )
00112 $charset = false;
00113 if ( !isset( $params['ConvertSpecialChars'] ) )
00114 {
00115 $params['ConvertSpecialChars'] = true;
00116 }
00117
00118 $TagStack = array();
00119
00120 $xmlAttributes = array();
00121
00122
00123 if ( preg_match( "#<\?xml(.*?)\?>#", $xmlDoc, $matches ) )
00124 {
00125 $xmlAttributeText = $matches[1];
00126 $xmlAttributes = $this->parseAttributes( $xmlAttributeText );
00127 for ( $i = 0; $i < count( $xmlAttributes ); ++$i )
00128 {
00129 $xmlAttribute =& $xmlAttributes[$i];
00130 if ( $xmlAttribute->name() == 'encoding' )
00131 $charset = $xmlAttribute->content();
00132
00133 else if ( $xmlAttribute->name() == 'charset' )
00134 $charset = $xmlAttribute->content();
00135 }
00136 }
00137
00138 if ( $charset !== false )
00139 {
00140 include_once( 'lib/ezi18n/classes/eztextcodec.php' );
00141 $codec =& eZTextCodec::instance( $charset, false, false );
00142 if ( $codec )
00143 {
00144 $xmlDoc = $codec->convertString( $xmlDoc );
00145 }
00146 }
00147
00148 $xmlDoc = preg_replace( "#<\?.*?\?>#", "", $xmlDoc );
00149
00150
00151 $xmlDoc = preg_replace( "%<\!DOCTYPE.*?>%is", "", $xmlDoc );
00152
00153
00154 $xmlDoc = preg_replace( "#\n|\r\n|\r#", "\n", $xmlDoc );
00155
00156
00157 $xmlDoc = $this->stripComments( $xmlDoc );
00158
00159
00160 $domDocument = new eZDOMDocument( '', $params["SetParentNode"] );
00161
00162 $this->DOMDocument =& $domDocument;
00163 $currentNode =& $domDocument;
00164
00165 $defaultNamespace = "";
00166
00167 $pos = 0;
00168 $endTagPos = 0;
00169 while ( $pos < strlen( $xmlDoc ) )
00170 {
00171 $char = $xmlDoc[$pos];
00172 if ( $char == "<" )
00173 {
00174
00175 $endTagPos = strpos( $xmlDoc, ">", $pos );
00176
00177
00178 $tagName = substr( $xmlDoc, $pos + 1, $endTagPos - ( $pos + 1 ) );
00179
00180
00181 if ( $tagName[0] == "/" )
00182 {
00183 $lastNodeArray = array_pop( $TagStack );
00184 $lastTag = $lastNodeArray["TagName"];
00185
00186 $lastNode =& $lastNodeArray["ParentNodeObject"];
00187
00188 unset( $currentNode );
00189 $currentNode =& $lastNode;
00190
00191 $tagName = substr( $tagName, 1, strlen( $tagName ) );
00192
00193
00194 $colonPos = strpos( $tagName, ":" );
00195
00196 if ( $colonPos > 0 )
00197 $tagName = substr( $tagName, $colonPos + 1, strlen( $tagName ) );
00198
00199 if ( $lastTag != $tagName )
00200 {
00201 eZDebug::writeError( "Error parsing XML, unmatched tags $tagName" );
00202 $retVal = false;
00203 return $retVal;
00204 }
00205 else
00206 {
00207
00208 }
00209 }
00210 else
00211 {
00212 $firstSpaceEnd = strpos( $tagName, " " );
00213 $firstNewlineEnd = strpos( $tagName, "\n" );
00214
00215 if ( $firstNewlineEnd != false )
00216 {
00217 if ( $firstSpaceEnd != false )
00218 {
00219 $tagNameEnd = min( $firstSpaceEnd, $firstNewlineEnd );
00220 }
00221 else
00222 {
00223 $tagNameEnd = $firstNewlineEnd;
00224 }
00225 }
00226 else
00227 {
00228 if ( $firstSpaceEnd != false )
00229 {
00230 $tagNameEnd = $firstSpaceEnd;
00231 }
00232 else
00233 {
00234 $tagNameEnd = 0;
00235 }
00236 }
00237
00238 if ( $tagNameEnd > 0 )
00239 {
00240 $justName = substr( $tagName, 0, $tagNameEnd );
00241 }
00242 else
00243 $justName = $tagName;
00244
00245
00246
00247
00248 $colonPos = strpos( $justName, "![CDATA[" ) === false ? strpos( $justName, ":" ) : false;
00249
00250 $prefix = "";
00251 if ( $colonPos > 0 )
00252 {
00253 $prefix = substr( $justName, 0, $colonPos );
00254 $justName = substr( $justName, $colonPos + 1, strlen( $justName ) );
00255 }
00256
00257
00258
00259 if ( $justName[strlen($justName) - 1] == "/" )
00260 {
00261 $justName = substr( $justName, 0, strlen( $justName ) - 1 );
00262 }
00263
00264
00265
00266 unset( $subNode );
00267 $subNode = $domDocument->createElementNode( $justName );
00268
00269
00270 if ( $tagNameEnd > 0 )
00271 {
00272 unset( $attributePart );
00273 $attributePart = substr( $tagName, $tagNameEnd, strlen( $tagName ) );
00274
00275
00276 unset( $attr );
00277 $attr = $this->parseAttributes( $attributePart );
00278
00279 if ( $attr != false )
00280 $subNode->Attributes =& $attr;
00281 }
00282
00283 if ( $prefix != false )
00284 {
00285 $subNode->Prefix = $prefix;
00286
00287
00288 if ( isSet( $this->NamespaceArray[$prefix] ) )
00289 {
00290 $subNode->setNamespaceURI( $this->NamespaceArray[$prefix] );
00291 }
00292 else
00293 {
00294 eZDebug::writeError( "Namespace: $prefix not defined", "eZ xml" );
00295 }
00296 }
00297 else
00298 {
00299
00300 if ( isset( $this->NamespaceStack[0] ) )
00301 {
00302 $subNode->setNamespaceURI( $this->NamespaceStack[0] );
00303 }
00304 }
00305
00306
00307 $cdataSection = "";
00308 $isCDATASection = false;
00309 $cdataPos = strpos( $xmlDoc, "<![CDATA[", $pos );
00310 if ( $cdataPos == $pos && $pos > 0)
00311 {
00312 $isCDATASection = true;
00313 $endTagPos = strpos( $xmlDoc, "]]>", $cdataPos );
00314 if ( $endTagPos == false )
00315 {
00316 eZDebug::writeError( "XML parser error: Closing tag \']]>\' for <![CDATA[ not found" , "eZ xml" );
00317 $endTagPos = strlen($xmlDoc);
00318 }
00319 $cdataSection = substr( $xmlDoc, $cdataPos + 9, $endTagPos - ( $cdataPos + 9 ) );
00320
00321
00322 $subNode->Name = $subNode->LocalName = "#cdata-section";
00323 $subNode->Content = $cdataSection;
00324 $subNode->Type = EZ_NODE_TYPE_CDATASECTION;
00325
00326 $pos = $endTagPos;
00327 $endTagPos += 2;
00328 }
00329 else
00330 {
00331
00332
00333
00334
00335
00336 $domDocument->registerElement( $subNode );
00337 }
00338
00339
00340 $currentNode->appendChild( $subNode );
00341
00342
00343
00344 if ( $isCDATASection == false )
00345 if ( $tagName[strlen($tagName) - 1] != "/" )
00346 {
00347 $TagStack[] = array( "TagName" => $justName, "ParentNodeObject" => &$currentNode );
00348
00349 unset( $currentNode );
00350 $currentNode =& $subNode;
00351 }
00352 }
00353 }
00354
00355 $pos = strpos( $xmlDoc, "<", $pos + 1 );
00356
00357 if ( $pos == false )
00358 {
00359
00360 $pos = strlen( $xmlDoc );
00361 }
00362 else
00363 {
00364
00365 $tagContent = substr( $xmlDoc, $endTagPos + 1, $pos - ( $endTagPos + 1 ) );
00366
00367
00368 $tagContent = preg_replace( "#[\n]+[\s]*$#", "", $tagContent, 1 );
00369
00370 if ( ( $params["TrimWhiteSpace"] == true and trim( $tagContent ) != "" ) or ( $params["TrimWhiteSpace"] == false and $tagContent != "" ) )
00371 {
00372
00373 if ( $params["ConvertSpecialChars"] == true )
00374 {
00375 $tagContent = str_replace(">", ">", $tagContent );
00376 $tagContent = str_replace("<", "<", $tagContent );
00377 $tagContent = str_replace("'", "'", $tagContent );
00378 $tagContent = str_replace(""", '"', $tagContent );
00379 $tagContent = str_replace("&", "&", $tagContent );
00380 }
00381
00382 unset( $subNode );
00383 $subNode = $domDocument->createTextNode( $tagContent );
00384
00385 $domDocument->registerElement( $subNode );
00386 $currentNode->appendChild( $subNode );
00387 }
00388 }
00389 }
00390 if ( !$domDocument->Root )
00391 {
00392 $tmp = null;
00393 return $tmp;
00394 }
00395
00396 return $domDocument;
00397 }
00398
00399
00400
00401
00402
00403 function stripComments( &$str )
00404 {
00405 return preg_replace( "#<\!--.*?-->#s", "", $str );
00406 }
00407
00408
00409
00410
00411
00412 function parseAttributes( $attributeString )
00413 {
00414 $ret = false;
00415
00416 preg_match_all( "/([a-zA-Z0-9:_-]+\s*=\s*(\"|').*?(\\2))/i", $attributeString, $attributeArray );
00417
00418 foreach ( $attributeArray[0] as $attributePart )
00419 {
00420 if ( trim( $attributePart ) != "" && trim( $attributePart ) != "/" )
00421 {
00422 $attributeNamespaceURI = false;
00423 $attributePrefix = false;
00424 $attributeTmpArray = preg_split ("#\s*(=\s*(\"|'))#", $attributePart );
00425
00426 $attributeName = $attributeTmpArray[0];
00427
00428
00429 $colonPos = strpos( $attributeName, ":" );
00430
00431 if ( $colonPos > 0 )
00432 {
00433 $attributePrefix = substr( $attributeName, 0, $colonPos );
00434 $attributeName = substr( $attributeName, $colonPos + 1, strlen( $attributeName ) );
00435 }
00436 else
00437 {
00438 $attributePrefix = false;
00439 }
00440
00441 $attributeValue = $attributeTmpArray[1];
00442
00443
00444 $attributeValue = substr( $attributeValue, 0, strlen( $attributeValue ) - 1);
00445
00446 $attributeValue = str_replace( ">", ">", $attributeValue );
00447 $attributeValue = str_replace( "<", "<", $attributeValue );
00448 $attributeValue = str_replace( "'", "'", $attributeValue );
00449 $attributeValue = str_replace( """, '"', $attributeValue );
00450 $attributeValue = str_replace( "&", "&", $attributeValue );
00451
00452
00453 if ( $attributePrefix == "xmlns" )
00454 {
00455 $attributeNamespaceURI = $attributeValue;
00456 $this->NamespaceArray[$attributeName] = $attributeValue;
00457
00458 $this->DOMDocument->registerNamespaceAlias( $attributeName, $attributeValue );
00459 }
00460
00461
00462 if ( $attributeName == "xmlns" )
00463 {
00464 $attributeNamespaceURI = $attributeValue;
00465
00466
00467 $this->NamespaceStack[] = $attributeNamespaceURI;
00468 }
00469
00470 unset( $attrNode );
00471 $attrNode = new eZDOMNode();
00472 $attrNode->Name = $attributeName;
00473
00474 if ( $attributePrefix != false && $attributePrefix != "xmlns" )
00475 {
00476 $attrNode->Prefix = $attributePrefix;
00477 $attrNode->LocalName = $attributeName;
00478
00479
00480 if ( isSet( $this->NamespaceArray["$attributePrefix"] ) )
00481 {
00482 $attrNode->NamespaceURI = $this->NamespaceArray["$attributePrefix"];
00483 }
00484 else
00485 {
00486 eZDebug::writeError( "Namespace: $attributePrefix not found", "eZ xml" );
00487 }
00488 }
00489 else if ( $attributePrefix == "xmlns" )
00490 {
00491 $attrNode->LocalName = $attributeName;
00492 $attrNode->NamespaceURI = $attributeNamespaceURI;
00493 $attrNode->Prefix = $attributePrefix;
00494 }
00495 else
00496 {
00497
00498 if ( $attributeName == "xmlns" )
00499 {
00500 $attrNode->LocalName = $attributeName;
00501 $attrNode->NamespaceURI = $attributeNamespaceURI;
00502 }
00503 else
00504 {
00505 $attrNode->NamespaceURI = false;
00506 $attrNode->LocalName = false;
00507 }
00508 $attrNode->Prefix = false;
00509 }
00510
00511 $attrNode->Type = EZ_NODE_TYPE_ATTRIBUTE;
00512 $attrNode->Content = $attributeValue;
00513
00514
00515 $ret[] = $attrNode;
00516
00517 }
00518 }
00519 return $ret;
00520 }
00521
00522
00523 var $NamespaceStack = array();
00524
00525
00526 var $NamespaceArray = array();
00527
00528
00529 var $CurrentNameSpace;
00530
00531
00532 var $DOMDocument;
00533 }
00534
00535 ?>