1 /*
2 www.sourceforge.net/projects/tinyxml
3 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
4 
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any
7 damages arising from the use of this software.
8 
9 Permission is granted to anyone to use this software for any
10 purpose, including commercial applications, and to alter it and
11 redistribute it freely, subject to the following restrictions:
12 
13 1. The origin of this software must not be misrepresented; you must
14 not claim that you wrote the original software. If you use this
15 software in a product, an acknowledgment in the product documentation
16 would be appreciated but is not required.
17 
18 2. Altered source versions must be plainly marked as such, and
19 must not be misrepresented as being the original software.
20 
21 3. This notice may not be removed or altered from any source
22 distribution.
23 */
24 
25 #include "tinyxml.h"
26 #include <ctype.h>
27 #include <stddef.h>
28 
29 //#define DEBUG_PARSER
30 
31 // Note tha "PutString" hardcodes the same list. This
32 // is less flexible than it appears. Changing the entries
33 // or order will break putstring.
34 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
35 {
36 	{ "&amp;",  5, '&' },
37 	{ "&lt;",   4, '<' },
38 	{ "&gt;",   4, '>' },
39 	{ "&quot;", 6, '\"' },
40 	{ "&apos;", 6, '\'' }
41 };
42 
43 // Bunch of unicode info at:
44 //		http://www.unicode.org/faq/utf_bom.html
45 // Including the basic of this table, which determines the #bytes in the
46 // sequence from the lead byte. 1 placed for invalid sequences --
47 // although the result will be junk, pass it through as much as possible.
48 // Beware of the non-characters in UTF-8:
49 //				ef bb bf (Microsoft "lead bytes")
50 //				ef bf be
51 //				ef bf bf
52 
53 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
54 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
55 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
56 
57 const int TiXmlBase::utf8ByteTable[256] =
58 {
59 	//	0	1	2	3	4	5	6	7	8	9	a	b	c	d	e	f
60 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x00
61 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x10
62 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x20
63 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x30
64 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x40
65 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x50
66 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x60
67 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x70	End of ASCII range
68 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x80 0x80 to 0xc1 invalid
69 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0x90
70 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0xa0
71 		1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	// 0xb0
72 		1,	1,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	// 0xc0 0xc2 to 0xdf 2 byte
73 		2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	2,	// 0xd0
74 		3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	3,	// 0xe0 0xe0 to 0xef 3 byte
75 		4,	4,	4,	4,	4,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1,	1	// 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
76 };
77 
78 
ConvertUTF32ToUTF8(unsigned long input,char * output,int * length)79 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
80 {
81 	const unsigned long BYTE_MASK = 0xBF;
82 	const unsigned long BYTE_MARK = 0x80;
83 	const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
84 
85 	if (input < 0x80)
86 		*length = 1;
87 	else if ( input < 0x800 )
88 		*length = 2;
89 	else if ( input < 0x10000 )
90 		*length = 3;
91 	else if ( input < 0x200000 )
92 		*length = 4;
93 	else
94 		{ *length = 0; return; }	// This code won't covert this correctly anyway.
95 
96 	output += *length;
97 
98 	// Scary scary fall throughs.
99 	switch (*length)
100 	{
101 		case 4:
102 			--output;
103 			*output = (char)((input | BYTE_MARK) & BYTE_MASK);
104 			input >>= 6;
105 		case 3:
106 			--output;
107 			*output = (char)((input | BYTE_MARK) & BYTE_MASK);
108 			input >>= 6;
109 		case 2:
110 			--output;
111 			*output = (char)((input | BYTE_MARK) & BYTE_MASK);
112 			input >>= 6;
113 		case 1:
114 			--output;
115 			*output = (char)(input | FIRST_BYTE_MARK[*length]);
116 	}
117 }
118 
119 
IsAlpha(unsigned char anyByte,TiXmlEncoding)120 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
121 {
122 	// This will only work for low-ascii, everything else is assumed to be a valid
123 	// letter. I'm not sure this is the best approach, but it is quite tricky trying
124 	// to figure out alhabetical vs. not across encoding. So take a very
125 	// conservative approach.
126 
127 //	if ( encoding == TIXML_ENCODING_UTF8 )
128 //	{
129 		if ( anyByte < 127 )
130 			return isalpha( anyByte );
131 		else
132 			return 1;	// What else to do? The unicode set is huge...get the english ones right.
133 //	}
134 //	else
135 //	{
136 //		return isalpha( anyByte );
137 //	}
138 }
139 
140 
IsAlphaNum(unsigned char anyByte,TiXmlEncoding)141 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
142 {
143 	// This will only work for low-ascii, everything else is assumed to be a valid
144 	// letter. I'm not sure this is the best approach, but it is quite tricky trying
145 	// to figure out alhabetical vs. not across encoding. So take a very
146 	// conservative approach.
147 
148 //	if ( encoding == TIXML_ENCODING_UTF8 )
149 //	{
150 		if ( anyByte < 127 )
151 			return isalnum( anyByte );
152 		else
153 			return 1;	// What else to do? The unicode set is huge...get the english ones right.
154 //	}
155 //	else
156 //	{
157 //		return isalnum( anyByte );
158 //	}
159 }
160 
161 
162 class TiXmlParsingData
163 {
164 	friend class TiXmlDocument;
165   public:
166 	void Stamp( const char* now, TiXmlEncoding encoding );
167 
Cursor()168 	const TiXmlCursor& Cursor()	{ return cursor; }
169 
170   private:
171 	// Only used by the document!
TiXmlParsingData(const char * start,int _tabsize,int row,int col)172 	TiXmlParsingData( const char* start, int _tabsize, int row, int col )
173 	{
174 		assert( start );
175 		stamp = start;
176 		tabsize = _tabsize;
177 		cursor.row = row;
178 		cursor.col = col;
179 	}
180 
181 	TiXmlCursor		cursor;
182 	const char*		stamp;
183 	int				tabsize;
184 };
185 
186 
Stamp(const char * now,TiXmlEncoding encoding)187 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
188 {
189 	assert( now );
190 
191 	// Do nothing if the tabsize is 0.
192 	if ( tabsize < 1 )
193 	{
194 		return;
195 	}
196 
197 	// Get the current row, column.
198 	int row = cursor.row;
199 	int col = cursor.col;
200 	const char* p = stamp;
201 	assert( p );
202 
203 	while ( p < now )
204 	{
205 		// Treat p as unsigned, so we have a happy compiler.
206 		const unsigned char* pU = (const unsigned char*)p;
207 
208 		// Code contributed by Fletcher Dunn: (modified by lee)
209 		switch (*pU) {
210 			case 0:
211 				// We *should* never get here, but in case we do, don't
212 				// advance past the terminating null character, ever
213 				return;
214 
215 			case '\r':
216 				// bump down to the next line
217 				++row;
218 				col = 0;
219 				// Eat the character
220 				++p;
221 
222 				// Check for \r\n sequence, and treat this as a single character
223 				if (*p == '\n') {
224 					++p;
225 				}
226 				break;
227 
228 			case '\n':
229 				// bump down to the next line
230 				++row;
231 				col = 0;
232 
233 				// Eat the character
234 				++p;
235 
236 				// Check for \n\r sequence, and treat this as a single
237 				// character.  (Yes, this bizarre thing does occur still
238 				// on some arcane platforms...)
239 				if (*p == '\r') {
240 					++p;
241 				}
242 				break;
243 
244 			case '\t':
245 				// Eat the character
246 				++p;
247 
248 				// Skip to next tab stop
249 				col = (col / tabsize + 1) * tabsize;
250 				break;
251 
252 			case TIXML_UTF_LEAD_0:
253 				if ( encoding == TIXML_ENCODING_UTF8 )
254 				{
255 					if ( *(p+1) && *(p+2) )
256 					{
257 						// In these cases, don't advance the column. These are
258 						// 0-width spaces.
259 						if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
260 							p += 3;
261 						else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
262 							p += 3;
263 						else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
264 							p += 3;
265 						else
266 							{ p +=3; ++col; }	// A normal character.
267 					}
268 				}
269 				else
270 				{
271 					++p;
272 					++col;
273 				}
274 				break;
275 
276 			default:
277 				if ( encoding == TIXML_ENCODING_UTF8 )
278 				{
279 					// Eat the 1 to 4 byte utf8 character.
280 					int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)];
281 					if ( step == 0 )
282 						step = 1;		// Error case from bad encoding, but handle gracefully.
283 					p += step;
284 
285 					// Just advance one column, of course.
286 					++col;
287 				}
288 				else
289 				{
290 					++p;
291 					++col;
292 				}
293 				break;
294 		}
295 	}
296 	cursor.row = row;
297 	cursor.col = col;
298 	assert( cursor.row >= -1 );
299 	assert( cursor.col >= -1 );
300 	stamp = p;
301 	assert( stamp );
302 }
303 
304 
SkipWhiteSpace(const char * p,TiXmlEncoding encoding)305 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
306 {
307 	if ( !p || !*p )
308 	{
309 		return 0;
310 	}
311 	if ( encoding == TIXML_ENCODING_UTF8 )
312 	{
313 		while ( *p )
314 		{
315 			const unsigned char* pU = (const unsigned char*)p;
316 
317 			// Skip the stupid Microsoft UTF-8 Byte order marks
318 			if (	*(pU+0)==TIXML_UTF_LEAD_0
319 				 && *(pU+1)==TIXML_UTF_LEAD_1
320 				 && *(pU+2)==TIXML_UTF_LEAD_2 )
321 			{
322 				p += 3;
323 				continue;
324 			}
325 			else if(*(pU+0)==TIXML_UTF_LEAD_0
326 				 && *(pU+1)==0xbfU
327 				 && *(pU+2)==0xbeU )
328 			{
329 				p += 3;
330 				continue;
331 			}
332 			else if(*(pU+0)==TIXML_UTF_LEAD_0
333 				 && *(pU+1)==0xbfU
334 				 && *(pU+2)==0xbfU )
335 			{
336 				p += 3;
337 				continue;
338 			}
339 
340 			if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )		// Still using old rules for white space.
341 				++p;
342 			else
343 				break;
344 		}
345 	}
346 	else
347 	{
348 		while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
349 			++p;
350 	}
351 
352 	return p;
353 }
354 
355 #ifdef TIXML_USE_STL
StreamWhiteSpace(TIXML_ISTREAM * in,TIXML_STRING * tag)356 /*static*/ bool TiXmlBase::StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag )
357 {
358 	for( ;; )
359 	{
360 		if ( !in->good() ) return false;
361 
362 		int c = in->peek();
363 		// At this scope, we can't get to a document. So fail silently.
364 		if ( !IsWhiteSpace( c ) || c <= 0 )
365 			return true;
366 
367 		*tag += (char) in->get();
368 	}
369 }
370 
StreamTo(TIXML_ISTREAM * in,int character,TIXML_STRING * tag)371 /*static*/ bool TiXmlBase::StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag )
372 {
373 	//assert( character > 0 && character < 128 );	// else it won't work in utf-8
374 	while ( in->good() )
375 	{
376 		int c = in->peek();
377 		if ( c == character )
378 			return true;
379 		if ( c <= 0 )		// Silent failure: can't get document at this scope
380 			return false;
381 
382 		in->get();
383 		*tag += (char) c;
384 	}
385 	return false;
386 }
387 #endif
388 
ReadName(const char * p,TIXML_STRING * name,TiXmlEncoding encoding)389 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
390 {
391 	*name = "";
392 	assert( p );
393 
394 	// Names start with letters or underscores.
395 	// Of course, in unicode, tinyxml has no idea what a letter *is*. The
396 	// algorithm is generous.
397 	//
398 	// After that, they can be letters, underscores, numbers,
399 	// hyphens, or colons. (Colons are valid ony for namespaces,
400 	// but tinyxml can't tell namespaces from names.)
401 	if (    p && *p
402 		 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
403 	{
404 		while(		p && *p
405 				&&	(		IsAlphaNum( (unsigned char ) *p, encoding )
406 						 || *p == '_'
407 						 || *p == '-'
408 						 || *p == '.'
409 						 || *p == ':' ) )
410 		{
411 			(*name) += *p;
412 			++p;
413 		}
414 		return p;
415 	}
416 	return 0;
417 }
418 
GetEntity(const char * p,char * value,int * length,TiXmlEncoding encoding)419 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
420 {
421 	// Presume an entity, and pull it out.
422     TIXML_STRING ent;
423 	int i;
424 	*length = 0;
425 
426 	if ( *(p+1) && *(p+1) == '#' && *(p+2) )
427 	{
428 		unsigned long ucs = 0;
429 		ptrdiff_t delta = 0;
430 		unsigned mult = 1;
431 
432 		if ( *(p+2) == 'x' )
433 		{
434 			// Hexadecimal.
435 			if ( !*(p+3) ) return 0;
436 
437 			const char* q = p+3;
438 			q = strchr( q, ';' );
439 
440 			if ( !q || !*q ) return 0;
441 
442 			delta = q-p;
443 			--q;
444 
445 			while ( *q != 'x' )
446 			{
447 				if ( *q >= '0' && *q <= '9' )
448 					ucs += mult * (*q - '0');
449 				else if ( *q >= 'a' && *q <= 'f' )
450 					ucs += mult * (*q - 'a' + 10);
451 				else if ( *q >= 'A' && *q <= 'F' )
452 					ucs += mult * (*q - 'A' + 10 );
453 				else
454 					return 0;
455 				mult *= 16;
456 				--q;
457 			}
458 		}
459 		else
460 		{
461 			// Decimal.
462 			if ( !*(p+2) ) return 0;
463 
464 			const char* q = p+2;
465 			q = strchr( q, ';' );
466 
467 			if ( !q || !*q ) return 0;
468 
469 			delta = q-p;
470 			--q;
471 
472 			while ( *q != '#' )
473 			{
474 				if ( *q >= '0' && *q <= '9' )
475 					ucs += mult * (*q - '0');
476 				else
477 					return 0;
478 				mult *= 10;
479 				--q;
480 			}
481 		}
482 		if ( encoding == TIXML_ENCODING_UTF8 )
483 		{
484 			// convert the UCS to UTF-8
485 			ConvertUTF32ToUTF8( ucs, value, length );
486 		}
487 		else
488 		{
489 			*value = (char)ucs;
490 			*length = 1;
491 		}
492 		return p + delta + 1;
493 	}
494 
495 	// Now try to match it.
496 	for( i=0; i<NUM_ENTITY; ++i )
497 	{
498 		if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
499 		{
500 			assert( strlen( entity[i].str ) == entity[i].strLength );
501 			*value = entity[i].chr;
502 			*length = 1;
503 			return ( p + entity[i].strLength );
504 		}
505 	}
506 
507 	// So it wasn't an entity, its unrecognized, or something like that.
508 	*value = *p;	// Don't put back the last one, since we return it!
509 	return p+1;
510 }
511 
512 
StringEqual(const char * p,const char * tag,bool ignoreCase,TiXmlEncoding encoding)513 bool TiXmlBase::StringEqual( const char* p,
514 							 const char* tag,
515 							 bool ignoreCase,
516 							 TiXmlEncoding encoding )
517 {
518 	assert( p );
519 	assert( tag );
520 	if ( !p || !*p )
521 	{
522 		assert( 0 );
523 		return false;
524 	}
525 
526 	const char* q = p;
527 
528 	if ( ignoreCase )
529 	{
530 		while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
531 		{
532 			++q;
533 			++tag;
534 		}
535 
536 		if ( *tag == 0 )
537 			return true;
538 	}
539 	else
540 	{
541 		while ( *q && *tag && *q == *tag )
542 		{
543 			++q;
544 			++tag;
545 		}
546 
547 		if ( *tag == 0 )		// Have we found the end of the tag, and everything equal?
548 			return true;
549 	}
550 	return false;
551 }
552 
ReadText(const char * p,TIXML_STRING * text,bool trimWhiteSpace,const char * endTag,bool caseInsensitive,TiXmlEncoding encoding)553 const char* TiXmlBase::ReadText(	const char* p,
554 									TIXML_STRING * text,
555 									bool trimWhiteSpace,
556 									const char* endTag,
557 									bool caseInsensitive,
558 									TiXmlEncoding encoding )
559 {
560     *text = "";
561 	if (    !trimWhiteSpace			// certain tags always keep whitespace
562 		 || !condenseWhiteSpace )	// if true, whitespace is always kept
563 	{
564 		// Keep all the white space.
565 		while (	   p && *p
566 				&& !StringEqual( p, endTag, caseInsensitive, encoding )
567 			  )
568 		{
569 			int len;
570 			char cArr[4] = { 0, 0, 0, 0 };
571 			p = GetChar( p, cArr, &len, encoding );
572 			text->append( cArr, len );
573 		}
574 	}
575 	else
576 	{
577 		bool whitespace = false;
578 
579 		// Remove leading white space:
580 		p = SkipWhiteSpace( p, encoding );
581 		while (	   p && *p
582 				&& !StringEqual( p, endTag, caseInsensitive, encoding ) )
583 		{
584 			if ( *p == '\r' || *p == '\n' )
585 			{
586 				whitespace = true;
587 				++p;
588 			}
589 			else if ( IsWhiteSpace( *p ) )
590 			{
591 				whitespace = true;
592 				++p;
593 			}
594 			else
595 			{
596 				// If we've found whitespace, add it before the
597 				// new character. Any whitespace just becomes a space.
598 				if ( whitespace )
599 				{
600 					(*text) += ' ';
601 					whitespace = false;
602 				}
603 				int len;
604 				char cArr[4] = { 0, 0, 0, 0 };
605 				p = GetChar( p, cArr, &len, encoding );
606 				if ( len == 1 )
607 					(*text) += cArr[0];	// more efficient
608 				else
609 					text->append( cArr, len );
610 			}
611 		}
612 	}
613 	return p + strlen( endTag );
614 }
615 
616 #ifdef TIXML_USE_STL
617 
StreamIn(TIXML_ISTREAM * in,TIXML_STRING * tag)618 void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
619 {
620 	// The basic issue with a document is that we don't know what we're
621 	// streaming. Read something presumed to be a tag (and hope), then
622 	// identify it, and call the appropriate stream method on the tag.
623 	//
624 	// This "pre-streaming" will never read the closing ">" so the
625 	// sub-tag can orient itself.
626 
627 	if ( !StreamTo( in, '<', tag ) )
628 	{
629 		SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
630 		return;
631 	}
632 
633 	while ( in->good() )
634 	{
635 		int tagIndex = (int) tag->length();
636 		while ( in->good() && in->peek() != '>' )
637 		{
638 			int c = in->get();
639 			if ( c <= 0 )
640 			{
641 				SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
642 				break;
643 			}
644 			(*tag) += (char) c;
645 		}
646 
647 		if ( in->good() )
648 		{
649 			// We now have something we presume to be a node of
650 			// some sort. Identify it, and call the node to
651 			// continue streaming.
652 			TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
653 
654 			if ( node )
655 			{
656 				node->StreamIn( in, tag );
657 				bool isElement = node->ToElement() != 0;
658 				delete node;
659 				node = 0;
660 
661 				// If this is the root element, we're done. Parsing will be
662 				// done by the >> operator.
663 				if ( isElement )
664 				{
665 					return;
666 				}
667 			}
668 			else
669 			{
670 				SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
671 				return;
672 			}
673 		}
674 	}
675 	// We should have returned sooner.
676 	SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
677 }
678 
679 #endif
680 
Parse(const char * p,TiXmlParsingData * prevData,TiXmlEncoding encoding)681 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
682 {
683 	ClearError();
684 
685 	// Parse away, at the document level. Since a document
686 	// contains nothing but other tags, most of what happens
687 	// here is skipping white space.
688 	if ( !p || !*p )
689 	{
690 		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
691 		return 0;
692 	}
693 
694 	// Note that, for a document, this needs to come
695 	// before the while space skip, so that parsing
696 	// starts from the pointer we are given.
697 	location.Clear();
698 	if ( prevData )
699 	{
700 		location.row = prevData->cursor.row;
701 		location.col = prevData->cursor.col;
702 	}
703 	else
704 	{
705 		location.row = 0;
706 		location.col = 0;
707 	}
708 	TiXmlParsingData data( p, TabSize(), location.row, location.col );
709 	location = data.Cursor();
710 
711 	if ( encoding == TIXML_ENCODING_UNKNOWN )
712 	{
713 		// Check for the Microsoft UTF-8 lead bytes.
714 		const unsigned char* pU = (const unsigned char*)p;
715 		if (	*(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
716 			 && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
717 			 && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
718 		{
719 			encoding = TIXML_ENCODING_UTF8;
720 			useMicrosoftBOM = true;
721 		}
722 	}
723 
724     p = SkipWhiteSpace( p, encoding );
725 	if ( !p )
726 	{
727 		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
728 		return 0;
729 	}
730 
731 	while ( p && *p )
732 	{
733 		TiXmlNode* node = Identify( p, encoding );
734 		if ( node )
735 		{
736 			p = node->Parse( p, &data, encoding );
737 			LinkEndChild( node );
738 		}
739 		else
740 		{
741 			break;
742 		}
743 
744 		// Did we get encoding info?
745 		if (    encoding == TIXML_ENCODING_UNKNOWN
746 			 && node->ToDeclaration() )
747 		{
748 			TiXmlDeclaration* dec = node->ToDeclaration();
749 			const char* enc = dec->Encoding();
750 			assert( enc );
751 
752 			if ( *enc == 0 )
753 				encoding = TIXML_ENCODING_UTF8;
754 			else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
755 				encoding = TIXML_ENCODING_UTF8;
756 			else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
757 				encoding = TIXML_ENCODING_UTF8;	// incorrect, but be nice
758 			else
759 				encoding = TIXML_ENCODING_LEGACY;
760 		}
761 
762 		p = SkipWhiteSpace( p, encoding );
763 	}
764 
765 	// Was this empty?
766 	if ( !firstChild ) {
767 		SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
768 		return 0;
769 	}
770 
771 	// All is well.
772 	return p;
773 }
774 
SetError(int err,const char * pError,TiXmlParsingData * data,TiXmlEncoding encoding)775 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
776 {
777 	// The first error in a chain is more accurate - don't set again!
778 	if ( error )
779 		return;
780 
781 	assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
782 	error   = true;
783 	errorId = err;
784 	errorDesc = errorString[ errorId ];
785 
786 	errorLocation.Clear();
787 	if ( pError && data )
788 	{
789 		data->Stamp( pError, encoding );
790 		errorLocation = data->Cursor();
791 	}
792 }
793 
794 
Identify(const char * p,TiXmlEncoding encoding)795 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
796 {
797 	TiXmlNode* returnNode = 0;
798 
799 	p = SkipWhiteSpace( p, encoding );
800 	if( !p || !*p || *p != '<' )
801 	{
802 		return 0;
803 	}
804 
805 	TiXmlDocument* doc = GetDocument();
806 	p = SkipWhiteSpace( p, encoding );
807 
808 	if ( !p || !*p )
809 	{
810 		return 0;
811 	}
812 
813 	// What is this thing?
814 	// - Elements start with a letter or underscore, but xml is reserved.
815 	// - Comments: <!--
816 	// - Decleration: <?xml
817 	// - Everthing else is unknown to tinyxml.
818 	//
819 
820 	const char* xmlHeader = { "<?xml" };
821 	const char* commentHeader = { "<!--" };
822 	const char* dtdHeader = { "<!" };
823 	const char* cdataHeader = { "<![CDATA[" };
824 
825 	if ( StringEqual( p, xmlHeader, true, encoding ) )
826 	{
827 		#ifdef DEBUG_PARSER
828 			TIXML_LOG( "XML parsing Declaration\n" );
829 		#endif
830 		returnNode = new TiXmlDeclaration();
831 	}
832 	else if ( StringEqual( p, commentHeader, false, encoding ) )
833 	{
834 		#ifdef DEBUG_PARSER
835 			TIXML_LOG( "XML parsing Comment\n" );
836 		#endif
837 		returnNode = new TiXmlComment();
838 	}
839 	else if ( StringEqual( p, cdataHeader, false, encoding ) )
840 	{
841 		#ifdef DEBUG_PARSER
842 			TIXML_LOG( "XML parsing CDATA\n" );
843 		#endif
844 		TiXmlText* text = new TiXmlText( "" );
845 		text->SetCDATA( true );
846 		returnNode = text;
847 	}
848 	else if ( StringEqual( p, dtdHeader, false, encoding ) )
849 	{
850 		#ifdef DEBUG_PARSER
851 			TIXML_LOG( "XML parsing Unknown(1)\n" );
852 		#endif
853 		returnNode = new TiXmlUnknown();
854 	}
855 	else if (    IsAlpha( *(p+1), encoding )
856 			  || *(p+1) == '_' )
857 	{
858 		#ifdef DEBUG_PARSER
859 			TIXML_LOG( "XML parsing Element\n" );
860 		#endif
861 		returnNode = new TiXmlElement( "" );
862 	}
863 	else
864 	{
865 		#ifdef DEBUG_PARSER
866 			TIXML_LOG( "XML parsing Unknown(2)\n" );
867 		#endif
868 		returnNode = new TiXmlUnknown();
869 	}
870 
871 	if ( returnNode )
872 	{
873 		// Set the parent, so it can report errors
874 		returnNode->parent = this;
875 	}
876 	else
877 	{
878 		if ( doc )
879 			doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
880 	}
881 	return returnNode;
882 }
883 
884 #ifdef TIXML_USE_STL
885 
StreamIn(TIXML_ISTREAM * in,TIXML_STRING * tag)886 void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
887 {
888 	// We're called with some amount of pre-parsing. That is, some of "this"
889 	// element is in "tag". Go ahead and stream to the closing ">"
890 	while( in->good() )
891 	{
892 		int c = in->get();
893 		if ( c <= 0 )
894 		{
895 			TiXmlDocument* document = GetDocument();
896 			if ( document )
897 				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
898 			return;
899 		}
900 		(*tag) += (char) c ;
901 
902 		if ( c == '>' )
903 			break;
904 	}
905 
906 	if ( tag->length() < 3 ) return;
907 
908 	// Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
909 	// If not, identify and stream.
910 
911 	if (    tag->at( tag->length() - 1 ) == '>'
912 		 && tag->at( tag->length() - 2 ) == '/' )
913 	{
914 		// All good!
915 		return;
916 	}
917 	else if ( tag->at( tag->length() - 1 ) == '>' )
918 	{
919 		// There is more. Could be:
920 		//		text
921 		//		closing tag
922 		//		another node.
923 		for ( ;; )
924 		{
925 			StreamWhiteSpace( in, tag );
926 
927 			// Do we have text?
928 			if ( in->good() && in->peek() != '<' )
929 			{
930 				// Yep, text.
931 				TiXmlText text( "" );
932 				text.StreamIn( in, tag );
933 
934 				// What follows text is a closing tag or another node.
935 				// Go around again and figure it out.
936 				continue;
937 			}
938 
939 			// We now have either a closing tag...or another node.
940 			// We should be at a "<", regardless.
941 			if ( !in->good() ) return;
942 			assert( in->peek() == '<' );
943 			int tagIndex = (int) tag->length();
944 
945 			bool closingTag = false;
946 			bool firstCharFound = false;
947 
948 			for( ;; )
949 			{
950 				if ( !in->good() )
951 					return;
952 
953 				int c = in->peek();
954 				if ( c <= 0 )
955 				{
956 					TiXmlDocument* document = GetDocument();
957 					if ( document )
958 						document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
959 					return;
960 				}
961 
962 				if ( c == '>' )
963 					break;
964 
965 				*tag += (char) c;
966 				in->get();
967 
968 				if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
969 				{
970 					firstCharFound = true;
971 					if ( c == '/' )
972 						closingTag = true;
973 				}
974 			}
975 			// If it was a closing tag, then read in the closing '>' to clean up the input stream.
976 			// If it was not, the streaming will be done by the tag.
977 			if ( closingTag )
978 			{
979 				if ( !in->good() )
980 					return;
981 
982 				int c = in->get();
983 				if ( c <= 0 )
984 				{
985 					TiXmlDocument* document = GetDocument();
986 					if ( document )
987 						document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
988 					return;
989 				}
990 				assert( c == '>' );
991 				*tag += (char) c;
992 
993 				// We are done, once we've found our closing tag.
994 				return;
995 			}
996 			else
997 			{
998 				// If not a closing tag, id it, and stream.
999 				const char* tagloc = tag->c_str() + tagIndex;
1000 				TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
1001 				if ( !node )
1002 					return;
1003 				node->StreamIn( in, tag );
1004 				delete node;
1005 				node = 0;
1006 
1007 				// No return: go around from the beginning: text, closing tag, or node.
1008 			}
1009 		}
1010 	}
1011 }
1012 #endif
1013 
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1014 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1015 {
1016 	p = SkipWhiteSpace( p, encoding );
1017 	TiXmlDocument* document = GetDocument();
1018 
1019 	if ( !p || !*p )
1020 	{
1021 		if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
1022 		return 0;
1023 	}
1024 
1025 	if ( data )
1026 	{
1027 		data->Stamp( p, encoding );
1028 		location = data->Cursor();
1029 	}
1030 
1031 	if ( *p != '<' )
1032 	{
1033 		if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
1034 		return 0;
1035 	}
1036 
1037 	p = SkipWhiteSpace( p+1, encoding );
1038 
1039 	// Read the name.
1040 	const char* pErr = p;
1041 
1042     p = ReadName( p, &value, encoding );
1043 	if ( !p || !*p )
1044 	{
1045 		if ( document )	document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
1046 		return 0;
1047 	}
1048 
1049     TIXML_STRING endTag ("</");
1050 	endTag += value;
1051 	endTag += ">";
1052 
1053 	// Check for and read attributes. Also look for an empty
1054 	// tag or an end tag.
1055 	while ( p && *p )
1056 	{
1057 		pErr = p;
1058 		p = SkipWhiteSpace( p, encoding );
1059 		if ( !p || !*p )
1060 		{
1061 			if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1062 			return 0;
1063 		}
1064 		if ( *p == '/' )
1065 		{
1066 			++p;
1067 			// Empty tag.
1068 			if ( *p  != '>' )
1069 			{
1070 				if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
1071 				return 0;
1072 			}
1073 			return (p+1);
1074 		}
1075 		else if ( *p == '>' )
1076 		{
1077 			// Done with attributes (if there were any.)
1078 			// Read the value -- which can include other
1079 			// elements -- read the end tag, and return.
1080 			++p;
1081 			p = ReadValue( p, data, encoding );		// Note this is an Element method, and will set the error if one happens.
1082 			if ( !p || !*p )
1083 				return 0;
1084 
1085 			// We should find the end tag now
1086 			if ( StringEqual( p, endTag.c_str(), false, encoding ) )
1087 			{
1088 				p += endTag.length();
1089 				return p;
1090 			}
1091 			else
1092 			{
1093 				if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1094 				return 0;
1095 			}
1096 		}
1097 		else
1098 		{
1099 			// Try to read an attribute:
1100 			TiXmlAttribute* attrib = new TiXmlAttribute();
1101 			if ( !attrib )
1102 			{
1103 				if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
1104 				return 0;
1105 			}
1106 
1107 			attrib->SetDocument( document );
1108 			const char* pErr = p;
1109 			p = attrib->Parse( p, data, encoding );
1110 
1111 			if ( !p || !*p )
1112 			{
1113 				if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1114 				delete attrib;
1115 				return 0;
1116 			}
1117 
1118 			// Handle the strange case of double attributes:
1119 			TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
1120 			if ( node )
1121 			{
1122 				node->SetValue( attrib->Value() );
1123 				delete attrib;
1124 				return 0;
1125 			}
1126 
1127 			attributeSet.Add( attrib );
1128 		}
1129 	}
1130 	return p;
1131 }
1132 
1133 
ReadValue(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1134 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1135 {
1136 	TiXmlDocument* document = GetDocument();
1137 
1138 	// Read in text and elements in any order.
1139 	const char* pWithWhiteSpace = p;
1140 	p = SkipWhiteSpace( p, encoding );
1141 
1142 	while ( p && *p )
1143 	{
1144 		if ( *p != '<' )
1145 		{
1146 			// Take what we have, make a text element.
1147 			TiXmlText* textNode = new TiXmlText( "" );
1148 
1149 			if ( !textNode )
1150 			{
1151 				if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
1152 				    return 0;
1153 			}
1154 
1155 			if ( TiXmlBase::IsWhiteSpaceCondensed() )
1156 			{
1157 				p = textNode->Parse( p, data, encoding );
1158 			}
1159 			else
1160 			{
1161 				// Special case: we want to keep the white space
1162 				// so that leading spaces aren't removed.
1163 				p = textNode->Parse( pWithWhiteSpace, data, encoding );
1164 			}
1165 
1166 			if ( !textNode->Blank() )
1167 				LinkEndChild( textNode );
1168 			else
1169 				delete textNode;
1170 		}
1171 		else
1172 		{
1173 			// We hit a '<'
1174 			// Have we hit a new element or an end tag? This could also be
1175 			// a TiXmlText in the "CDATA" style.
1176 			if ( StringEqual( p, "</", false, encoding ) )
1177 			{
1178 				return p;
1179 			}
1180 			else
1181 			{
1182 				TiXmlNode* node = Identify( p, encoding );
1183 				if ( node )
1184 				{
1185 					p = node->Parse( p, data, encoding );
1186 					LinkEndChild( node );
1187 				}
1188 				else
1189 				{
1190 					return 0;
1191 				}
1192 			}
1193 		}
1194 		pWithWhiteSpace = p;
1195 		p = SkipWhiteSpace( p, encoding );
1196 	}
1197 
1198 	if ( !p )
1199 	{
1200 		if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
1201 	}
1202 	return p;
1203 }
1204 
1205 
1206 #ifdef TIXML_USE_STL
StreamIn(TIXML_ISTREAM * in,TIXML_STRING * tag)1207 void TiXmlUnknown::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
1208 {
1209 	while ( in->good() )
1210 	{
1211 		int c = in->get();
1212 		if ( c <= 0 )
1213 		{
1214 			TiXmlDocument* document = GetDocument();
1215 			if ( document )
1216 				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1217 			return;
1218 		}
1219 		(*tag) += (char) c;
1220 
1221 		if ( c == '>' )
1222 		{
1223 			// All is well.
1224 			return;
1225 		}
1226 	}
1227 }
1228 #endif
1229 
1230 
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1231 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1232 {
1233 	TiXmlDocument* document = GetDocument();
1234 	p = SkipWhiteSpace( p, encoding );
1235 
1236 	if ( data )
1237 	{
1238 		data->Stamp( p, encoding );
1239 		location = data->Cursor();
1240 	}
1241 	if ( !p || !*p || *p != '<' )
1242 	{
1243 		if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
1244 		return 0;
1245 	}
1246 	++p;
1247     value = "";
1248 
1249 	while ( p && *p && *p != '>' )
1250 	{
1251 		value += *p;
1252 		++p;
1253 	}
1254 
1255 	if ( !p )
1256 	{
1257 		if ( document )	document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
1258 	}
1259 	if ( *p == '>' )
1260 		return p+1;
1261 	return p;
1262 }
1263 
1264 #ifdef TIXML_USE_STL
StreamIn(TIXML_ISTREAM * in,TIXML_STRING * tag)1265 void TiXmlComment::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
1266 {
1267 	while ( in->good() )
1268 	{
1269 		int c = in->get();
1270 		if ( c <= 0 )
1271 		{
1272 			TiXmlDocument* document = GetDocument();
1273 			if ( document )
1274 				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1275 			return;
1276 		}
1277 
1278 		(*tag) += (char) c;
1279 
1280 		if ( c == '>'
1281 			 && tag->at( tag->length() - 2 ) == '-'
1282 			 && tag->at( tag->length() - 3 ) == '-' )
1283 		{
1284 			// All is well.
1285 			return;
1286 		}
1287 	}
1288 }
1289 #endif
1290 
1291 
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1292 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1293 {
1294 	TiXmlDocument* document = GetDocument();
1295 	value = "";
1296 
1297 	p = SkipWhiteSpace( p, encoding );
1298 
1299 	if ( data )
1300 	{
1301 		data->Stamp( p, encoding );
1302 		location = data->Cursor();
1303 	}
1304 	const char* startTag = "<!--";
1305 	const char* endTag   = "-->";
1306 
1307 	if ( !StringEqual( p, startTag, false, encoding ) )
1308 	{
1309 		document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
1310 		return 0;
1311 	}
1312 	p += strlen( startTag );
1313 	p = ReadText( p, &value, false, endTag, false, encoding );
1314 	return p;
1315 }
1316 
1317 
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1318 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1319 {
1320 	p = SkipWhiteSpace( p, encoding );
1321 	if ( !p || !*p ) return 0;
1322 
1323 	int tabsize = 4;
1324 	if ( document )
1325 		tabsize = document->TabSize();
1326 
1327 	if ( data )
1328 	{
1329 		data->Stamp( p, encoding );
1330 		location = data->Cursor();
1331 	}
1332 	// Read the name, the '=' and the value.
1333 	const char* pErr = p;
1334 	p = ReadName( p, &name, encoding );
1335 	if ( !p || !*p )
1336 	{
1337 		if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1338 		return 0;
1339 	}
1340 	p = SkipWhiteSpace( p, encoding );
1341 	if ( !p || !*p || *p != '=' )
1342 	{
1343 		if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1344 		return 0;
1345 	}
1346 
1347 	++p;	// skip '='
1348 	p = SkipWhiteSpace( p, encoding );
1349 	if ( !p || !*p )
1350 	{
1351 		if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1352 		return 0;
1353 	}
1354 
1355 	const char* end;
1356 
1357 	if ( *p == '\'' )
1358 	{
1359 		++p;
1360 		end = "\'";
1361 		p = ReadText( p, &value, false, end, false, encoding );
1362 	}
1363 	else if ( *p == '"' )
1364 	{
1365 		++p;
1366 		end = "\"";
1367 		p = ReadText( p, &value, false, end, false, encoding );
1368 	}
1369 	else
1370 	{
1371 		// All attribute values should be in single or double quotes.
1372 		// But this is such a common error that the parser will try
1373 		// its best, even without them.
1374 		value = "";
1375 		while (    p && *p										// existence
1376 				&& !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'	// whitespace
1377 				&& *p != '/' && *p != '>' )						// tag end
1378 		{
1379 			value += *p;
1380 			++p;
1381 		}
1382 	}
1383 	return p;
1384 }
1385 
1386 #ifdef TIXML_USE_STL
StreamIn(TIXML_ISTREAM * in,TIXML_STRING * tag)1387 void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
1388 {
1389 	if ( cdata )
1390 	{
1391 		int c = in->get();
1392 		if ( c <= 0 )
1393 		{
1394 			TiXmlDocument* document = GetDocument();
1395 			if ( document )
1396 				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1397 			return;
1398 		}
1399 
1400 		(*tag) += (char) c;
1401 
1402 		if ( c == '>'
1403 			 && tag->at( tag->length() - 2 ) == ']'
1404 			 && tag->at( tag->length() - 3 ) == ']' )
1405 		{
1406 			// All is well.
1407 			return;
1408 		}
1409 	}
1410 	else
1411 	{
1412 		while ( in->good() )
1413 		{
1414 			int c = in->peek();
1415 			if ( c == '<' )
1416 				return;
1417 			if ( c <= 0 )
1418 			{
1419 				TiXmlDocument* document = GetDocument();
1420 				if ( document )
1421 					document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1422 				return;
1423 			}
1424 
1425 			(*tag) += (char) c;
1426 			in->get();
1427 		}
1428 	}
1429 }
1430 #endif
1431 
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding encoding)1432 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1433 {
1434 	value = "";
1435 	TiXmlDocument* document = GetDocument();
1436 
1437 	if ( data )
1438 	{
1439 		data->Stamp( p, encoding );
1440 		location = data->Cursor();
1441 	}
1442 
1443 	const char* const startTag = "<![CDATA[";
1444 	const char* const endTag   = "]]>";
1445 
1446 	if ( cdata || StringEqual( p, startTag, false, encoding ) )
1447 	{
1448 		cdata = true;
1449 
1450 		if ( !StringEqual( p, startTag, false, encoding ) )
1451 		{
1452 			document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
1453 			return 0;
1454 		}
1455 		p += strlen( startTag );
1456 
1457 		// Keep all the white space, ignore the encoding, etc.
1458 		while (	   p && *p
1459 				&& !StringEqual( p, endTag, false, encoding )
1460 			  )
1461 		{
1462 			value += *p;
1463 			++p;
1464 		}
1465 
1466 		TIXML_STRING dummy;
1467 		p = ReadText( p, &dummy, false, endTag, false, encoding );
1468 		return p;
1469 	}
1470 	else
1471 	{
1472 		bool ignoreWhite = true;
1473 
1474 		const char* end = "<";
1475 		p = ReadText( p, &value, ignoreWhite, end, false, encoding );
1476 		if ( p )
1477 			return p-1;	// don't truncate the '<'
1478 		return 0;
1479 	}
1480 }
1481 
1482 #ifdef TIXML_USE_STL
StreamIn(TIXML_ISTREAM * in,TIXML_STRING * tag)1483 void TiXmlDeclaration::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
1484 {
1485 	while ( in->good() )
1486 	{
1487 		int c = in->get();
1488 		if ( c <= 0 )
1489 		{
1490 			TiXmlDocument* document = GetDocument();
1491 			if ( document )
1492 				document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1493 			return;
1494 		}
1495 		(*tag) += (char) c;
1496 
1497 		if ( c == '>' )
1498 		{
1499 			// All is well.
1500 			return;
1501 		}
1502 	}
1503 }
1504 #endif
1505 
Parse(const char * p,TiXmlParsingData * data,TiXmlEncoding _encoding)1506 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
1507 {
1508 	p = SkipWhiteSpace( p, _encoding );
1509 	// Find the beginning, find the end, and look for
1510 	// the stuff in-between.
1511 	TiXmlDocument* document = GetDocument();
1512 	if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
1513 	{
1514 		if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
1515 		return 0;
1516 	}
1517 	if ( data )
1518 	{
1519 		data->Stamp( p, _encoding );
1520 		location = data->Cursor();
1521 	}
1522 	p += 5;
1523 
1524 	version = "";
1525 	encoding = "";
1526 	standalone = "";
1527 
1528 	while ( p && *p )
1529 	{
1530 		if ( *p == '>' )
1531 		{
1532 			++p;
1533 			return p;
1534 		}
1535 
1536 		p = SkipWhiteSpace( p, _encoding );
1537 		if ( StringEqual( p, "version", true, _encoding ) )
1538 		{
1539 			TiXmlAttribute attrib;
1540 			p = attrib.Parse( p, data, _encoding );
1541 			version = attrib.Value();
1542 		}
1543 		else if ( StringEqual( p, "encoding", true, _encoding ) )
1544 		{
1545 			TiXmlAttribute attrib;
1546 			p = attrib.Parse( p, data, _encoding );
1547 			encoding = attrib.Value();
1548 		}
1549 		else if ( StringEqual( p, "standalone", true, _encoding ) )
1550 		{
1551 			TiXmlAttribute attrib;
1552 			p = attrib.Parse( p, data, _encoding );
1553 			standalone = attrib.Value();
1554 		}
1555 		else
1556 		{
1557 			// Read over whatever it is.
1558 			while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
1559 				++p;
1560 		}
1561 	}
1562 	return 0;
1563 }
1564 
Blank() const1565 bool TiXmlText::Blank() const
1566 {
1567 	for ( unsigned i=0; i<value.length(); i++ )
1568 		if ( !IsWhiteSpace( value[i] ) )
1569 			return false;
1570 	return true;
1571 }
1572 
1573