OpenGL GUI: D:/Programming/GUI Editor (Source)/tinyxmlparser.cpp Source File

00001 /* 00002 www.sourceforge.net/projects/tinyxml 00003 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com) 00004 00005 This software is provided 'as-is', without any express or implied 00006 warranty. In no event will the authors be held liable for any 00007 damages arising from the use of this software. 00008 00009 Permission is granted to anyone to use this software for any 00010 purpose, including commercial applications, and to alter it and 00011 redistribute it freely, subject to the following restrictions: 00012 00013 1. The origin of this software must not be misrepresented; you must 00014 not claim that you wrote the original software. If you use this 00015 software in a product, an acknowledgment in the product documentation 00016 would be appreciated but is not required. 00017 00018 2. Altered source versions must be plainly marked as such, and 00019 must not be misrepresented as being the original software. 00020 00021 3. This notice may not be removed or altered from any source 00022 distribution. 00023 */ 00024 00025 #include "stdafx.h" 00026 #include "tinyxml.h" 00027 #include <ctype.h> 00028 00029 //#define DEBUG_PARSER 00030 00031 // Note tha "PutString" hardcodes the same list. This 00032 // is less flexible than it appears. Changing the entries 00033 // or order will break putstring. 00034 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] = 00035 { 00036 { "&", 5, '&' }, 00037 { "<", 4, '<' }, 00038 { ">", 4, '>' }, 00039 { """, 6, '\"' }, 00040 { "'", 6, '\'' } 00041 }; 00042 00043 // Bunch of unicode info at: 00044 // http://www.unicode.org/faq/utf_bom.html 00045 // Including the basic of this table, which determines the #bytes in the 00046 // sequence from the lead byte. 1 placed for invalid sequences -- 00047 // although the result will be junk, pass it through as much as possible. 00048 // Beware of the non-characters in UTF-8: 00049 // ef bb bf (Microsoft "lead bytes") 00050 // ef bf be 00051 // ef bf bf 00052 00053 00054 00055 const int TiXmlBase::utf8ByteTable[256] = 00056 { 00057 // 0 1 2 3 4 5 6 7 8 9 a b c d e f 00058 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00 00059 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10 00060 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20 00061 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30 00062 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 00063 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50 00064 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 00065 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range 00066 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid 00067 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90 00068 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0 00069 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0 00070 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte 00071 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0 00072 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte 00073 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid 00074 }; 00075 00076 00077 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length ) 00078 { 00079 const unsigned long BYTE_MASK = 0xBF; 00080 const unsigned long BYTE_MARK = 0x80; 00081 const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; 00082 00083 if (input < 0x80) 00084 *length = 1; 00085 else if ( input < 0x800 ) 00086 *length = 2; 00087 else if ( input < 0x10000 ) 00088 *length = 3; 00089 else if ( input < 0x200000 ) 00090 *length = 4; 00091 else 00092 { *length = 0; return; } // This code won't covert this correctly anyway. 00093 00094 output += *length; 00095 00096 // Scary scary fall throughs. 00097 switch (*length) 00098 { 00099 case 4: 00100 --output; 00101 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 00102 input >>= 6; 00103 case 3: 00104 --output; 00105 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 00106 input >>= 6; 00107 case 2: 00108 --output; 00109 *output = (char)((input | BYTE_MARK) & BYTE_MASK); 00110 input >>= 6; 00111 case 1: 00112 --output; 00113 *output = (char)(input | FIRST_BYTE_MARK[*length]); 00114 } 00115 } 00116 00117 00118 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding encoding ) 00119 { 00120 // This will only work for low-ascii, everything else is assumed to be a valid 00121 // letter. I'm not sure this is the best approach, but it is quite tricky trying 00122 // to figure out alhabetical vs. not across encoding. So take a very 00123 // conservative approach. 00124 00125 // if ( encoding == TIXML_ENCODING_UTF8 ) 00126 // { 00127 if ( anyByte < 127 ) 00128 return isalpha( anyByte ); 00129 else 00130 return 1; // What else to do? The unicode set is huge...get the english ones right. 00131 // } 00132 // else 00133 // { 00134 // return isalpha( anyByte ); 00135 // } 00136 } 00137 00138 00139 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding encoding ) 00140 { 00141 // This will only work for low-ascii, everything else is assumed to be a valid 00142 // letter. I'm not sure this is the best approach, but it is quite tricky trying 00143 // to figure out alhabetical vs. not across encoding. So take a very 00144 // conservative approach. 00145 00146 // if ( encoding == TIXML_ENCODING_UTF8 ) 00147 // { 00148 if ( anyByte < 127 ) 00149 return isalnum( anyByte ); 00150 else 00151 return 1; // What else to do? The unicode set is huge...get the english ones right. 00152 // } 00153 // else 00154 // { 00155 // return isalnum( anyByte ); 00156 // } 00157 } 00158 00159 00160 class TiXmlParsingData 00161 { 00162 friend class TiXmlDocument; 00163 public: 00164 void Stamp( const char* now, TiXmlEncoding encoding ); 00165 00166 const TiXmlCursor& Cursor() { return cursor; } 00167 00168 private: 00169 // Only used by the document! 00170 TiXmlParsingData( const char* start, int _tabsize, int row, int col ) 00171 { 00172 assert( start ); 00173 stamp = start; 00174 tabsize = _tabsize; 00175 cursor.row = row; 00176 cursor.col = col; 00177 } 00178 00179 TiXmlCursor cursor; 00180 const char* stamp; 00181 int tabsize; 00182 }; 00183 00184 00185 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding ) 00186 { 00187 assert( now ); 00188 00189 // Do nothing if the tabsize is 0. 00190 if ( tabsize < 1 ) 00191 { 00192 return; 00193 } 00194 00195 // Get the current row, column. 00196 int row = cursor.row; 00197 int col = cursor.col; 00198 const char* p = stamp; 00199 assert( p ); 00200 00201 while ( p < now ) 00202 { 00203 // Code contributed by Fletcher Dunn: (modified by lee) 00204 switch (*p) { 00205 case 0: 00206 // We *should* never get here, but in case we do, don't 00207 // advance past the terminating null character, ever 00208 return; 00209 00210 case '\r': 00211 // bump down to the next line 00212 ++row; 00213 col = 0; 00214 // Eat the character 00215 ++p; 00216 00217 // Check for \r\n sequence, and treat this as a single character 00218 if (*p == '\n') { 00219 ++p; 00220 } 00221 break; 00222 00223 case '\n': 00224 // bump down to the next line 00225 ++row; 00226 col = 0; 00227 00228 // Eat the character 00229 ++p; 00230 00231 // Check for \n\r sequence, and treat this as a single 00232 // character. (Yes, this bizarre thing does occur still 00233 // on some arcane platforms...) 00234 if (*p == '\r') { 00235 ++p; 00236 } 00237 break; 00238 00239 case '\t': 00240 // Eat the character 00241 ++p; 00242 00243 // Skip to next tab stop 00244 col = (col / tabsize + 1) * tabsize; 00245 break; 00246 00247 case (char)(0xef): 00248 if ( encoding == TIXML_ENCODING_UTF8 ) 00249 { 00250 if ( *(p+1) && *(p+2) ) 00251 { 00252 // In these cases, don't advance the column. These are 00253 // 0-width spaces. 00254 if ( *(p+1)==(char)(0xbb) && *(p+2)==(char)(0xbf) ) 00255 p += 3; 00256 else if ( *(p+1)==(char)(0xbf) && *(p+2)==(char)(0xbe) ) 00257 p += 3; 00258 else if ( *(p+1)==(char)(0xbf) && *(p+2)==(char)(0xbf) ) 00259 p += 3; 00260 else 00261 { p +=3; ++col; } // A normal character. 00262 } 00263 } 00264 else 00265 { 00266 ++p; 00267 ++col; 00268 } 00269 break; 00270 00271 default: 00272 if ( encoding == TIXML_ENCODING_UTF8 ) 00273 { 00274 // Eat the 1 to 4 byte utf8 character. 00275 int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)]; 00276 if ( step == 0 ) 00277 step = 1; // Error case from bad encoding, but handle gracefully. 00278 p += step; 00279 00280 // Just advance one column, of course. 00281 ++col; 00282 } 00283 else 00284 { 00285 ++p; 00286 ++col; 00287 } 00288 break; 00289 } 00290 } 00291 cursor.row = row; 00292 cursor.col = col; 00293 assert( cursor.row >= -1 ); 00294 assert( cursor.col >= -1 ); 00295 stamp = p; 00296 assert( stamp ); 00297 } 00298 00299 00300 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding ) 00301 { 00302 if ( !p || !*p ) 00303 { 00304 return 0; 00305 } 00306 if ( encoding == TIXML_ENCODING_UTF8 ) 00307 { 00308 while ( *p ) 00309 { 00310 // Skip the stupid Microsoft UTF-8 Byte order marks 00311 if ( *(p+0)==(char) 0xef 00312 && *(p+1)==(char) 0xbb 00313 && *(p+2)==(char) 0xbf ) 00314 { 00315 p += 3; 00316 continue; 00317 } 00318 else if(*(p+0)==(char) 0xef 00319 && *(p+1)==(char) 0xbf 00320 && *(p+2)==(char) 0xbe ) 00321 { 00322 p += 3; 00323 continue; 00324 } 00325 else if(*(p+0)==(char) 0xef 00326 && *(p+1)==(char) 0xbf 00327 && *(p+2)==(char) 0xbf ) 00328 { 00329 p += 3; 00330 continue; 00331 } 00332 00333 if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' ) // Still using old rules for white space. 00334 ++p; 00335 else 00336 break; 00337 } 00338 } 00339 else 00340 { 00341 while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' ) 00342 ++p; 00343 } 00344 00345 return p; 00346 } 00347 00348 #ifdef TIXML_USE_STL 00349 /*static*/ bool TiXmlBase::StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag ) 00350 { 00351 for( ;; ) 00352 { 00353 if ( !in->good() ) return false; 00354 00355 int c = in->peek(); 00356 // At this scope, we can't get to a document. So fail silently. 00357 if ( !IsWhiteSpace( c ) || c <= 0 ) 00358 return true; 00359 00360 *tag += (char) in->get(); 00361 } 00362 } 00363 00364 /*static*/ bool TiXmlBase::StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag ) 00365 { 00366 //assert( character > 0 && character < 128 ); // else it won't work in utf-8 00367 while ( in->good() ) 00368 { 00369 int c = in->peek(); 00370 if ( c == character ) 00371 return true; 00372 if ( c <= 0 ) // Silent failure: can't get document at this scope 00373 return false; 00374 00375 in->get(); 00376 *tag += (char) c; 00377 } 00378 return false; 00379 } 00380 #endif 00381 00382 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding ) 00383 { 00384 *name = ""; 00385 assert( p ); 00386 00387 // Names start with letters or underscores. 00388 // Of course, in unicode, tinyxml has no idea what a letter *is*. The 00389 // algorithm is generous. 00390 // 00391 // After that, they can be letters, underscores, numbers, 00392 // hyphens, or colons. (Colons are valid ony for namespaces, 00393 // but tinyxml can't tell namespaces from names.) 00394 if ( p && *p 00395 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) ) 00396 { 00397 while( p && *p 00398 && ( IsAlphaNum( (unsigned char ) *p, encoding ) 00399 || *p == '_' 00400 || *p == '-' 00401 || *p == '.' 00402 || *p == ':' ) ) 00403 { 00404 (*name) += *p; 00405 ++p; 00406 } 00407 return p; 00408 } 00409 return 0; 00410 } 00411 00412 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding ) 00413 { 00414 // Presume an entity, and pull it out. 00415 TIXML_STRING ent; 00416 int i; 00417 *length = 0; 00418 00419 if ( *(p+1) && *(p+1) == '#' && *(p+2) ) 00420 { 00421 unsigned long ucs = 0; 00422 unsigned delta = 0; 00423 unsigned mult = 1; 00424 00425 if ( *(p+2) == 'x' ) 00426 { 00427 // Hexadecimal. 00428 if ( !*(p+3) ) return 0; 00429 00430 const char* q = p+3; 00431 q = strchr( q, ';' ); 00432 00433 if ( !q || !*q ) return 0; 00434 00435 delta = q-p; 00436 --q; 00437 00438 while ( *q != 'x' ) 00439 { 00440 if ( *q >= '0' && *q <= '9' ) 00441 ucs += mult * (*q - '0'); 00442 else if ( *q >= 'a' && *q <= 'f' ) 00443 ucs += mult * (*q - 'a' + 10); 00444 else if ( *q >= 'A' && *q <= 'F' ) 00445 ucs += mult * (*q - 'A' + 10 ); 00446 else 00447 return 0; 00448 mult *= 16; 00449 --q; 00450 } 00451 } 00452 else 00453 { 00454 // Decimal. 00455 if ( !*(p+2) ) return 0; 00456 00457 const char* q = p+2; 00458 q = strchr( q, ';' ); 00459 00460 if ( !q || !*q ) return 0; 00461 00462 delta = q-p; 00463 --q; 00464 00465 while ( *q != '#' ) 00466 { 00467 if ( *q >= '0' && *q <= '9' ) 00468 ucs += mult * (*q - '0'); 00469 else 00470 return 0; 00471 mult *= 10; 00472 --q; 00473 } 00474 } 00475 if ( encoding == TIXML_ENCODING_UTF8 ) 00476 { 00477 // convert the UCS to UTF-8 00478 ConvertUTF32ToUTF8( ucs, value, length ); 00479 } 00480 else 00481 { 00482 *value = (char)ucs; 00483 *length = 1; 00484 } 00485 return p + delta + 1; 00486 } 00487 00488 // Now try to match it. 00489 for( i=0; i<NUM_ENTITY; ++i ) 00490 { 00491 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 ) 00492 { 00493 assert( strlen( entity[i].str ) == entity[i].strLength ); 00494 *value = entity[i].chr; 00495 *length = 1; 00496 return ( p + entity[i].strLength ); 00497 } 00498 } 00499 00500 // So it wasn't an entity, its unrecognized, or something like that. 00501 *value = *p; // Don't put back the last one, since we return it! 00502 return p+1; 00503 } 00504 00505 00506 bool TiXmlBase::StringEqual( const char* p, 00507 const char* tag, 00508 bool ignoreCase, 00509 TiXmlEncoding encoding ) 00510 { 00511 assert( p ); 00512 assert( tag ); 00513 if ( !p || !*p ) 00514 { 00515 assert( 0 ); 00516 return false; 00517 } 00518 00519 const char* q = p; 00520 00521 if ( ignoreCase ) 00522 { 00523 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) ) 00524 { 00525 ++q; 00526 ++tag; 00527 } 00528 00529 if ( *tag == 0 ) 00530 return true; 00531 } 00532 else 00533 { 00534 while ( *q && *tag && *q == *tag ) 00535 { 00536 ++q; 00537 ++tag; 00538 } 00539 00540 if ( *tag == 0 ) // Have we found the end of the tag, and everything equal? 00541 return true; 00542 } 00543 return false; 00544 } 00545 00546 const char* TiXmlBase::ReadText( const char* p, 00547 TIXML_STRING * text, 00548 bool trimWhiteSpace, 00549 const char* endTag, 00550 bool caseInsensitive, 00551 TiXmlEncoding encoding ) 00552 { 00553 *text = ""; 00554 if ( !trimWhiteSpace // certain tags always keep whitespace 00555 || !condenseWhiteSpace ) // if true, whitespace is always kept 00556 { 00557 // Keep all the white space. 00558 while ( p && *p 00559 && !StringEqual( p, endTag, caseInsensitive, encoding ) 00560 ) 00561 { 00562 int len; 00563 char cArr[4] = { 0, 0, 0, 0 }; 00564 p = GetChar( p, cArr, &len, encoding ); 00565 text->Append( cArr, len ); 00566 } 00567 } 00568 else 00569 { 00570 bool whitespace = false; 00571 00572 // Remove leading white space: 00573 p = SkipWhiteSpace( p, encoding ); 00574 while ( p && *p 00575 && !StringEqual( p, endTag, caseInsensitive, encoding ) ) 00576 { 00577 if ( *p == '\r' || *p == '\n' ) 00578 { 00579 whitespace = true; 00580 ++p; 00581 } 00582 else if ( IsWhiteSpace( *p ) ) 00583 { 00584 whitespace = true; 00585 ++p; 00586 } 00587 else 00588 { 00589 // If we've found whitespace, add it before the 00590 // new character. Any whitespace just becomes a space. 00591 if ( whitespace ) 00592 { 00593 (*text) += ' '; 00594 whitespace = false; 00595 } 00596 int len; 00597 char cArr[4] = { 0, 0, 0, 0 }; 00598 p = GetChar( p, cArr, &len, encoding ); 00599 if ( len == 1 ) 00600 (*text) += cArr[0]; // more efficient 00601 else 00602 text->Append( cArr, len ); 00603 } 00604 } 00605 } 00606 return p + strlen( endTag ); 00607 } 00608 00609 #ifdef TIXML_USE_STL 00610 00611 void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag ) 00612 { 00613 // The basic issue with a document is that we don't know what we're 00614 // streaming. Read something presumed to be a tag (and hope), then 00615 // identify it, and call the appropriate stream method on the tag. 00616 // 00617 // This "pre-streaming" will never read the closing ">" so the 00618 // sub-tag can orient itself. 00619 00620 if ( !StreamTo( in, '<', tag ) ) 00621 { 00622 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); 00623 return; 00624 } 00625 00626 while ( in->good() ) 00627 { 00628 int tagIndex = (int) tag->length(); 00629 while ( in->good() && in->peek() != '>' ) 00630 { 00631 int c = in->get(); 00632 if ( c <= 0 ) 00633 { 00634 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 00635 break; 00636 } 00637 (*tag) += (char) c; 00638 } 00639 00640 if ( in->good() ) 00641 { 00642 // We now have something we presume to be a node of 00643 // some sort. Identify it, and call the node to 00644 // continue streaming. 00645 TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING ); 00646 00647 if ( node ) 00648 { 00649 node->StreamIn( in, tag ); 00650 bool isElement = node->ToElement() != 0; 00651 delete node; 00652 node = 0; 00653 00654 // If this is the root element, we're done. Parsing will be 00655 // done by the >> operator. 00656 if ( isElement ) 00657 { 00658 return; 00659 } 00660 } 00661 else 00662 { 00663 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN ); 00664 return; 00665 } 00666 } 00667 } 00668 // We should have returned sooner. 00669 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN ); 00670 } 00671 00672 #endif 00673 00674 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding ) 00675 { 00676 ClearError(); 00677 00678 // Parse away, at the document level. Since a document 00679 // contains nothing but other tags, most of what happens 00680 // here is skipping white space. 00681 if ( !p || !*p ) 00682 { 00683 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); 00684 return 0; 00685 } 00686 00687 // Note that, for a document, this needs to come 00688 // before the while space skip, so that parsing 00689 // starts from the pointer we are given. 00690 location.Clear(); 00691 if ( prevData ) 00692 { 00693 location.row = prevData->cursor.row; 00694 location.col = prevData->cursor.col; 00695 } 00696 else 00697 { 00698 location.row = 0; 00699 location.col = 0; 00700 } 00701 TiXmlParsingData data( p, TabSize(), location.row, location.col ); 00702 location = data.Cursor(); 00703 00704 if ( encoding == TIXML_ENCODING_UNKNOWN ) 00705 { 00706 // Check for the Microsoft UTF-8 lead bytes. 00707 if ( *(p+0) && *(p+0) == (char)(0xef) 00708 && *(p+1) && *(p+1) == (char)(0xbb) 00709 && *(p+2) && *(p+2) == (char)(0xbf) ) 00710 { 00711 encoding = TIXML_ENCODING_UTF8; 00712 } 00713 } 00714 00715 p = SkipWhiteSpace( p, encoding ); 00716 if ( !p ) 00717 { 00718 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); 00719 return 0; 00720 } 00721 00722 while ( p && *p ) 00723 { 00724 TiXmlNode* node = Identify( p, encoding ); 00725 if ( node ) 00726 { 00727 p = node->Parse( p, &data, encoding ); 00728 LinkEndChild( node ); 00729 } 00730 else 00731 { 00732 break; 00733 } 00734 00735 // Did we get encoding info? 00736 if ( encoding == TIXML_ENCODING_UNKNOWN 00737 && node->ToDeclaration() ) 00738 { 00739 TiXmlDeclaration* dec = node->ToDeclaration(); 00740 const char* enc = dec->Encoding(); 00741 assert( enc ); 00742 00743 if ( *enc == 0 ) 00744 encoding = TIXML_ENCODING_UTF8; 00745 else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) ) 00746 encoding = TIXML_ENCODING_UTF8; 00747 else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) ) 00748 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice 00749 else 00750 encoding = TIXML_ENCODING_LEGACY; 00751 } 00752 00753 p = SkipWhiteSpace( p, encoding ); 00754 } 00755 00756 // All is well. 00757 return p; 00758 } 00759 00760 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding ) 00761 { 00762 // The first error in a chain is more accurate - don't set again! 00763 if ( error ) 00764 return; 00765 00766 assert( err > 0 && err < TIXML_ERROR_STRING_COUNT ); 00767 error = true; 00768 errorId = err; 00769 errorDesc = errorString[ errorId ]; 00770 00771 errorLocation.Clear(); 00772 if ( pError && data ) 00773 { 00774 //TiXmlParsingData data( pError, prevData ); 00775 data->Stamp( pError, encoding ); 00776 errorLocation = data->Cursor(); 00777 } 00778 } 00779 00780 00781 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding ) 00782 { 00783 TiXmlNode* returnNode = 0; 00784 00785 p = SkipWhiteSpace( p, encoding ); 00786 if( !p || !*p || *p != '<' ) 00787 { 00788 return 0; 00789 } 00790 00791 TiXmlDocument* doc = GetDocument(); 00792 p = SkipWhiteSpace( p, encoding ); 00793 00794 if ( !p || !*p ) 00795 { 00796 return 0; 00797 } 00798 00799 // What is this thing? 00800 // - Elements start with a letter or underscore, but xml is reserved. 00801 // - Comments: "; 01282 01283 if ( !StringEqual( p, startTag, false, encoding ) ) 01284 { 01285 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding ); 01286 return 0; 01287 } 01288 p += strlen( startTag ); 01289 p = ReadText( p, &value, false, endTag, false, encoding ); 01290 return p; 01291 } 01292 01293 01294 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 01295 { 01296 p = SkipWhiteSpace( p, encoding ); 01297 if ( !p || !*p ) return 0; 01298 01299 int tabsize = 4; 01300 if ( document ) 01301 tabsize = document->TabSize(); 01302 01303 // TiXmlParsingData data( p, prevData ); 01304 if ( data ) 01305 { 01306 data->Stamp( p, encoding ); 01307 location = data->Cursor(); 01308 } 01309 // Read the name, the '=' and the value. 01310 const char* pErr = p; 01311 p = ReadName( p, &name, encoding ); 01312 if ( !p || !*p ) 01313 { 01314 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding ); 01315 return 0; 01316 } 01317 p = SkipWhiteSpace( p, encoding ); 01318 if ( !p || !*p || *p != '=' ) 01319 { 01320 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); 01321 return 0; 01322 } 01323 01324 ++p; // skip '=' 01325 p = SkipWhiteSpace( p, encoding ); 01326 if ( !p || !*p ) 01327 { 01328 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); 01329 return 0; 01330 } 01331 01332 const char* end; 01333 01334 if ( *p == '\'' ) 01335 { 01336 ++p; 01337 end = "\'"; 01338 p = ReadText( p, &value, false, end, false, encoding ); 01339 } 01340 else if ( *p == '"' ) 01341 { 01342 ++p; 01343 end = "\""; 01344 p = ReadText( p, &value, false, end, false, encoding ); 01345 } 01346 else 01347 { 01348 // All attribute values should be in single or double quotes. 01349 // But this is such a common error that the parser will try 01350 // its best, even without them. 01351 value = ""; 01352 while ( p && *p // existence 01353 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r' // whitespace 01354 && *p != '/' && *p != '>' ) // tag end 01355 { 01356 value += *p; 01357 ++p; 01358 } 01359 } 01360 return p; 01361 } 01362 01363 #ifdef TIXML_USE_STL 01364 void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag ) 01365 { 01366 while ( in->good() ) 01367 { 01368 int c = in->peek(); 01369 if ( c == '<' ) 01370 return; 01371 if ( c <= 0 ) 01372 { 01373 TiXmlDocument* document = GetDocument(); 01374 if ( document ) 01375 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 01376 return; 01377 } 01378 01379 (*tag) += (char) c; 01380 in->get(); 01381 } 01382 } 01383 #endif 01384 01385 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) 01386 { 01387 value = ""; 01388 // TiXmlParsingData data( p, prevData ); 01389 if ( data ) 01390 { 01391 data->Stamp( p, encoding ); 01392 location = data->Cursor(); 01393 } 01394 bool ignoreWhite = true; 01395 01396 const char* end = "<"; 01397 p = ReadText( p, &value, ignoreWhite, end, false, encoding ); 01398 if ( p ) 01399 return p-1; // don't truncate the '<' 01400 return 0; 01401 } 01402 01403 #ifdef TIXML_USE_STL 01404 void TiXmlDeclaration::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag ) 01405 { 01406 while ( in->good() ) 01407 { 01408 int c = in->get(); 01409 if ( c <= 0 ) 01410 { 01411 TiXmlDocument* document = GetDocument(); 01412 if ( document ) 01413 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); 01414 return; 01415 } 01416 (*tag) += (char) c; 01417 01418 if ( c == '>' ) 01419 { 01420 // All is well. 01421 return; 01422 } 01423 } 01424 } 01425 #endif 01426 01427 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding ) 01428 { 01429 p = SkipWhiteSpace( p, _encoding ); 01430 // Find the beginning, find the end, and look for 01431 // the stuff in-between. 01432 TiXmlDocument* document = GetDocument(); 01433 if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) ) 01434 { 01435 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding ); 01436 return 0; 01437 } 01438 // TiXmlParsingData data( p, prevData ); 01439 if ( data ) 01440 { 01441 data->Stamp( p, _encoding ); 01442 location = data->Cursor(); 01443 } 01444 p += 5; 01445 01446 version = ""; 01447 encoding = ""; 01448 standalone = ""; 01449 01450 while ( p && *p ) 01451 { 01452 if ( *p == '>' ) 01453 { 01454 ++p; 01455 return p; 01456 } 01457 01458 p = SkipWhiteSpace( p, _encoding ); 01459 if ( StringEqual( p, "version", true, _encoding ) ) 01460 { 01461 TiXmlAttribute attrib; 01462 p = attrib.Parse( p, data, _encoding ); 01463 version = attrib.pcValue(); 01464 } 01465 else if ( StringEqual( p, "encoding", true, _encoding ) ) 01466 { 01467 TiXmlAttribute attrib; 01468 p = attrib.Parse( p, data, _encoding ); 01469 encoding = attrib.pcValue(); 01470 } 01471 else if ( StringEqual( p, "standalone", true, _encoding ) ) 01472 { 01473 TiXmlAttribute attrib; 01474 p = attrib.Parse( p, data, _encoding ); 01475 standalone = attrib.pcValue(); 01476 } 01477 else 01478 { 01479 // Read over whatever it is. 01480 while( p && *p && *p != '>' && !IsWhiteSpace( *p ) ) 01481 ++p; 01482 } 01483 } 01484 return 0; 01485 } 01486 01487 bool TiXmlText::Blank() const 01488 { 01489 for ( unsigned i=0; i<value.length(); i++ ) 01490 if ( !IsWhiteSpace( value[i] ) ) 01491 return false; 01492 return true; 01493 } 01494