1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "xml_int.h"
8 
9 #include "core/include/fxcrt/fx_ext.h"
10 #include "core/include/fxcrt/fx_xml.h"
11 
~CXML_Parser()12 CXML_Parser::~CXML_Parser() {
13   if (m_bOwnedStream) {
14     m_pDataAcc->Release();
15   }
16 }
Init(uint8_t * pBuffer,size_t size)17 FX_BOOL CXML_Parser::Init(uint8_t* pBuffer, size_t size) {
18   m_pDataAcc = new CXML_DataBufAcc(pBuffer, size);
19   return Init(TRUE);
20 }
Init(IFX_FileRead * pFileRead)21 FX_BOOL CXML_Parser::Init(IFX_FileRead* pFileRead) {
22   m_pDataAcc = new CXML_DataStmAcc(pFileRead);
23   return Init(TRUE);
24 }
Init(IFX_BufferRead * pBuffer)25 FX_BOOL CXML_Parser::Init(IFX_BufferRead* pBuffer) {
26   if (!pBuffer) {
27     return FALSE;
28   }
29   m_pDataAcc = pBuffer;
30   return Init(FALSE);
31 }
Init(FX_BOOL bOwndedStream)32 FX_BOOL CXML_Parser::Init(FX_BOOL bOwndedStream) {
33   m_bOwnedStream = bOwndedStream;
34   m_nOffset = 0;
35   return ReadNextBlock();
36 }
ReadNextBlock()37 FX_BOOL CXML_Parser::ReadNextBlock() {
38   if (!m_pDataAcc->ReadNextBlock()) {
39     return FALSE;
40   }
41   m_pBuffer = m_pDataAcc->GetBlockBuffer();
42   m_dwBufferSize = m_pDataAcc->GetBlockSize();
43   m_nBufferOffset = m_pDataAcc->GetBlockOffset();
44   m_dwIndex = 0;
45   return m_dwBufferSize > 0;
46 }
IsEOF()47 FX_BOOL CXML_Parser::IsEOF() {
48   if (!m_pDataAcc->IsEOF()) {
49     return FALSE;
50   }
51   return m_dwIndex >= m_dwBufferSize;
52 }
53 #define FXCRTM_XML_CHARTYPE_Normal 0x00
54 #define FXCRTM_XML_CHARTYPE_SpaceChar 0x01
55 #define FXCRTM_XML_CHARTYPE_Letter 0x02
56 #define FXCRTM_XML_CHARTYPE_Digital 0x04
57 #define FXCRTM_XML_CHARTYPE_NameIntro 0x08
58 #define FXCRTM_XML_CHARTYPE_NameChar 0x10
59 #define FXCRTM_XML_CHARTYPE_HexDigital 0x20
60 #define FXCRTM_XML_CHARTYPE_HexLowerLetter 0x40
61 #define FXCRTM_XML_CHARTYPE_HexUpperLetter 0x60
62 #define FXCRTM_XML_CHARTYPE_HexChar 0x60
63 uint8_t g_FXCRT_XML_ByteTypes[256] = {
64     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
65     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
66     0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00,
67     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x00,
68     0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x08, 0x00,
69     0x00, 0x00, 0x00, 0x00, 0x00, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x1A,
70     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
71     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x18,
72     0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
73     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
74     0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x1A, 0x1A, 0x1A,
75     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
76     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
77     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
78     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
79     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
80     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
81     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
82     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
83     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
84     0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A,
85     0x1A, 0x1A, 0x01, 0x01,
86 };
g_FXCRT_XML_IsWhiteSpace(uint8_t ch)87 FX_BOOL g_FXCRT_XML_IsWhiteSpace(uint8_t ch) {
88   return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_SpaceChar) != 0;
89 }
g_FXCRT_XML_IsLetter(uint8_t ch)90 FX_BOOL g_FXCRT_XML_IsLetter(uint8_t ch) {
91   return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Letter) != 0;
92 }
g_FXCRT_XML_IsDigital(uint8_t ch)93 FX_BOOL g_FXCRT_XML_IsDigital(uint8_t ch) {
94   return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Digital) != 0;
95 }
g_FXCRT_XML_IsNameIntro(uint8_t ch)96 FX_BOOL g_FXCRT_XML_IsNameIntro(uint8_t ch) {
97   return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameIntro) != 0;
98 }
g_FXCRT_XML_IsNameChar(uint8_t ch)99 FX_BOOL g_FXCRT_XML_IsNameChar(uint8_t ch) {
100   return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameChar) != 0;
101 }
g_FXCRT_XML_IsHexChar(uint8_t ch)102 FX_BOOL g_FXCRT_XML_IsHexChar(uint8_t ch) {
103   return (g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_HexChar) != 0;
104 }
SkipWhiteSpaces()105 void CXML_Parser::SkipWhiteSpaces() {
106   m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
107   if (IsEOF()) {
108     return;
109   }
110   do {
111     while (m_dwIndex < m_dwBufferSize &&
112            g_FXCRT_XML_IsWhiteSpace(m_pBuffer[m_dwIndex])) {
113       m_dwIndex++;
114     }
115     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
116     if (m_dwIndex < m_dwBufferSize || IsEOF()) {
117       break;
118     }
119   } while (ReadNextBlock());
120 }
GetName(CFX_ByteString & space,CFX_ByteString & name)121 void CXML_Parser::GetName(CFX_ByteString& space, CFX_ByteString& name) {
122   m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
123   if (IsEOF()) {
124     return;
125   }
126   CFX_ByteTextBuf buf;
127   uint8_t ch;
128   do {
129     while (m_dwIndex < m_dwBufferSize) {
130       ch = m_pBuffer[m_dwIndex];
131       if (ch == ':') {
132         space = buf.GetByteString();
133         buf.Clear();
134       } else if (g_FXCRT_XML_IsNameChar(ch)) {
135         buf.AppendChar(ch);
136       } else {
137         break;
138       }
139       m_dwIndex++;
140     }
141     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
142     if (m_dwIndex < m_dwBufferSize || IsEOF()) {
143       break;
144     }
145   } while (ReadNextBlock());
146   name = buf.GetByteString();
147 }
SkipLiterals(const CFX_ByteStringC & str)148 void CXML_Parser::SkipLiterals(const CFX_ByteStringC& str) {
149   m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
150   if (IsEOF()) {
151     return;
152   }
153   int32_t i = 0, iLen = str.GetLength();
154   do {
155     while (m_dwIndex < m_dwBufferSize) {
156       if (str.GetAt(i) != m_pBuffer[m_dwIndex++]) {
157         i = 0;
158       } else {
159         i++;
160         if (i == iLen) {
161           break;
162         }
163       }
164     }
165     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
166     if (i == iLen) {
167       return;
168     }
169     if (m_dwIndex < m_dwBufferSize || IsEOF()) {
170       break;
171     }
172   } while (ReadNextBlock());
173   while (!m_pDataAcc->IsEOF()) {
174     ReadNextBlock();
175     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwBufferSize;
176   }
177   m_dwIndex = m_dwBufferSize;
178 }
GetCharRef()179 FX_DWORD CXML_Parser::GetCharRef() {
180   m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
181   if (IsEOF()) {
182     return 0;
183   }
184   uint8_t ch;
185   int32_t iState = 0;
186   CFX_ByteTextBuf buf;
187   FX_DWORD code = 0;
188   do {
189     while (m_dwIndex < m_dwBufferSize) {
190       ch = m_pBuffer[m_dwIndex];
191       switch (iState) {
192         case 0:
193           if (ch == '#') {
194             m_dwIndex++;
195             iState = 2;
196             break;
197           }
198           iState = 1;
199         case 1:
200           m_dwIndex++;
201           if (ch == ';') {
202             CFX_ByteStringC ref = buf.GetByteString();
203             if (ref == "gt") {
204               code = '>';
205             } else if (ref == "lt") {
206               code = '<';
207             } else if (ref == "amp") {
208               code = '&';
209             } else if (ref == "apos") {
210               code = '\'';
211             } else if (ref == "quot") {
212               code = '"';
213             }
214             iState = 10;
215             break;
216           }
217           buf.AppendByte(ch);
218           break;
219         case 2:
220           if (ch == 'x') {
221             m_dwIndex++;
222             iState = 4;
223             break;
224           }
225           iState = 3;
226         case 3:
227           m_dwIndex++;
228           if (ch == ';') {
229             iState = 10;
230             break;
231           }
232           if (g_FXCRT_XML_IsDigital(ch))
233             code = code * 10 + FXSYS_toDecimalDigit(ch);
234           break;
235         case 4:
236           m_dwIndex++;
237           if (ch == ';') {
238             iState = 10;
239             break;
240           }
241           uint8_t nHex =
242               g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_HexChar;
243           if (nHex) {
244             if (nHex == FXCRTM_XML_CHARTYPE_HexDigital) {
245               code = (code << 4) + FXSYS_toDecimalDigit(ch);
246             } else if (nHex == FXCRTM_XML_CHARTYPE_HexLowerLetter) {
247               code = (code << 4) + ch - 87;
248             } else {
249               code = (code << 4) + ch - 55;
250             }
251           }
252           break;
253       }
254       if (iState == 10) {
255         break;
256       }
257     }
258     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
259     if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) {
260       break;
261     }
262   } while (ReadNextBlock());
263   return code;
264 }
GetAttrValue(CFX_WideString & value)265 void CXML_Parser::GetAttrValue(CFX_WideString& value) {
266   m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
267   if (IsEOF()) {
268     return;
269   }
270   CFX_UTF8Decoder decoder;
271   uint8_t mark = 0, ch = 0;
272   do {
273     while (m_dwIndex < m_dwBufferSize) {
274       ch = m_pBuffer[m_dwIndex];
275       if (mark == 0) {
276         if (ch != '\'' && ch != '"') {
277           return;
278         }
279         mark = ch;
280         m_dwIndex++;
281         ch = 0;
282         continue;
283       }
284       m_dwIndex++;
285       if (ch == mark) {
286         break;
287       }
288       if (ch == '&') {
289         decoder.AppendChar(GetCharRef());
290         if (IsEOF()) {
291           value = decoder.GetResult();
292           return;
293         }
294       } else {
295         decoder.Input(ch);
296       }
297     }
298     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
299     if (ch == mark || m_dwIndex < m_dwBufferSize || IsEOF()) {
300       break;
301     }
302   } while (ReadNextBlock());
303   value = decoder.GetResult();
304 }
GetTagName(CFX_ByteString & space,CFX_ByteString & name,FX_BOOL & bEndTag,FX_BOOL bStartTag)305 void CXML_Parser::GetTagName(CFX_ByteString& space,
306                              CFX_ByteString& name,
307                              FX_BOOL& bEndTag,
308                              FX_BOOL bStartTag) {
309   m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
310   if (IsEOF()) {
311     return;
312   }
313   bEndTag = FALSE;
314   uint8_t ch;
315   int32_t iState = bStartTag ? 1 : 0;
316   do {
317     while (m_dwIndex < m_dwBufferSize) {
318       ch = m_pBuffer[m_dwIndex];
319       switch (iState) {
320         case 0:
321           m_dwIndex++;
322           if (ch != '<') {
323             break;
324           }
325           iState = 1;
326           break;
327         case 1:
328           if (ch == '?') {
329             m_dwIndex++;
330             SkipLiterals("?>");
331             iState = 0;
332             break;
333           } else if (ch == '!') {
334             m_dwIndex++;
335             SkipLiterals("-->");
336             iState = 0;
337             break;
338           }
339           if (ch == '/') {
340             m_dwIndex++;
341             GetName(space, name);
342             bEndTag = TRUE;
343           } else {
344             GetName(space, name);
345             bEndTag = FALSE;
346           }
347           return;
348       }
349     }
350     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
351     if (m_dwIndex < m_dwBufferSize || IsEOF()) {
352       break;
353     }
354   } while (ReadNextBlock());
355 }
ParseElement(CXML_Element * pParent,FX_BOOL bStartTag)356 CXML_Element* CXML_Parser::ParseElement(CXML_Element* pParent,
357                                         FX_BOOL bStartTag) {
358   m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
359   if (IsEOF()) {
360     return NULL;
361   }
362   CFX_ByteString tag_name, tag_space;
363   FX_BOOL bEndTag;
364   GetTagName(tag_space, tag_name, bEndTag, bStartTag);
365   if (tag_name.IsEmpty() || bEndTag) {
366     return NULL;
367   }
368   CXML_Element* pElement = new CXML_Element;
369   pElement->m_pParent = pParent;
370   pElement->SetTag(tag_space, tag_name);
371   do {
372     CFX_ByteString attr_space, attr_name;
373     while (m_dwIndex < m_dwBufferSize) {
374       SkipWhiteSpaces();
375       if (IsEOF()) {
376         break;
377       }
378       if (!g_FXCRT_XML_IsNameIntro(m_pBuffer[m_dwIndex])) {
379         break;
380       }
381       GetName(attr_space, attr_name);
382       SkipWhiteSpaces();
383       if (IsEOF()) {
384         break;
385       }
386       if (m_pBuffer[m_dwIndex] != '=') {
387         break;
388       }
389       m_dwIndex++;
390       SkipWhiteSpaces();
391       if (IsEOF()) {
392         break;
393       }
394       CFX_WideString attr_value;
395       GetAttrValue(attr_value);
396       pElement->m_AttrMap.SetAt(attr_space, attr_name, attr_value);
397     }
398     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
399     if (m_dwIndex < m_dwBufferSize || IsEOF()) {
400       break;
401     }
402   } while (ReadNextBlock());
403   SkipWhiteSpaces();
404   if (IsEOF()) {
405     return pElement;
406   }
407   uint8_t ch = m_pBuffer[m_dwIndex++];
408   if (ch == '/') {
409     m_dwIndex++;
410     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
411     return pElement;
412   }
413   if (ch != '>') {
414     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
415     delete pElement;
416     return NULL;
417   }
418   SkipWhiteSpaces();
419   if (IsEOF()) {
420     return pElement;
421   }
422   CFX_UTF8Decoder decoder;
423   CFX_WideTextBuf content;
424   FX_BOOL bCDATA = FALSE;
425   int32_t iState = 0;
426   do {
427     while (m_dwIndex < m_dwBufferSize) {
428       ch = m_pBuffer[m_dwIndex++];
429       switch (iState) {
430         case 0:
431           if (ch == '<') {
432             iState = 1;
433           } else if (ch == '&') {
434             decoder.ClearStatus();
435             decoder.AppendChar(GetCharRef());
436           } else {
437             decoder.Input(ch);
438           }
439           break;
440         case 1:
441           if (ch == '!') {
442             iState = 2;
443           } else if (ch == '?') {
444             SkipLiterals("?>");
445             SkipWhiteSpaces();
446             iState = 0;
447           } else if (ch == '/') {
448             CFX_ByteString space, name;
449             GetName(space, name);
450             SkipWhiteSpaces();
451             m_dwIndex++;
452             iState = 10;
453           } else {
454             content << decoder.GetResult();
455             CFX_WideString dataStr = content.GetWideString();
456             if (!bCDATA && !m_bSaveSpaceChars) {
457               dataStr.TrimRight(L" \t\r\n");
458             }
459             InsertContentSegment(bCDATA, dataStr, pElement);
460             content.Clear();
461             decoder.Clear();
462             bCDATA = FALSE;
463             iState = 0;
464             m_dwIndex--;
465             CXML_Element* pSubElement = ParseElement(pElement, TRUE);
466             if (!pSubElement) {
467               break;
468             }
469             pSubElement->m_pParent = pElement;
470             pElement->m_Children.Add((void*)CXML_Element::Element);
471             pElement->m_Children.Add(pSubElement);
472             SkipWhiteSpaces();
473           }
474           break;
475         case 2:
476           if (ch == '[') {
477             SkipLiterals("]]>");
478           } else if (ch == '-') {
479             m_dwIndex++;
480             SkipLiterals("-->");
481           } else {
482             SkipLiterals(">");
483           }
484           decoder.Clear();
485           SkipWhiteSpaces();
486           iState = 0;
487           break;
488       }
489       if (iState == 10) {
490         break;
491       }
492     }
493     m_nOffset = m_nBufferOffset + (FX_FILESIZE)m_dwIndex;
494     if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) {
495       break;
496     }
497   } while (ReadNextBlock());
498   content << decoder.GetResult();
499   CFX_WideString dataStr = content.GetWideString();
500   if (!m_bSaveSpaceChars) {
501     dataStr.TrimRight(L" \t\r\n");
502   }
503   InsertContentSegment(bCDATA, dataStr, pElement);
504   content.Clear();
505   decoder.Clear();
506   bCDATA = FALSE;
507   return pElement;
508 }
InsertContentSegment(FX_BOOL bCDATA,const CFX_WideStringC & content,CXML_Element * pElement)509 void CXML_Parser::InsertContentSegment(FX_BOOL bCDATA,
510                                        const CFX_WideStringC& content,
511                                        CXML_Element* pElement) {
512   if (content.IsEmpty()) {
513     return;
514   }
515   CXML_Content* pContent = new CXML_Content;
516   pContent->Set(bCDATA, content);
517   pElement->m_Children.Add((void*)CXML_Element::Content);
518   pElement->m_Children.Add(pContent);
519 }
XML_ContinueParse(CXML_Parser & parser,FX_BOOL bSaveSpaceChars,FX_FILESIZE * pParsedSize)520 static CXML_Element* XML_ContinueParse(CXML_Parser& parser,
521                                        FX_BOOL bSaveSpaceChars,
522                                        FX_FILESIZE* pParsedSize) {
523   parser.m_bSaveSpaceChars = bSaveSpaceChars;
524   CXML_Element* pElement = parser.ParseElement(NULL, FALSE);
525   if (pParsedSize) {
526     *pParsedSize = parser.m_nOffset;
527   }
528   return pElement;
529 }
Parse(const void * pBuffer,size_t size,FX_BOOL bSaveSpaceChars,FX_FILESIZE * pParsedSize)530 CXML_Element* CXML_Element::Parse(const void* pBuffer,
531                                   size_t size,
532                                   FX_BOOL bSaveSpaceChars,
533                                   FX_FILESIZE* pParsedSize) {
534   CXML_Parser parser;
535   if (!parser.Init((uint8_t*)pBuffer, size)) {
536     return NULL;
537   }
538   return XML_ContinueParse(parser, bSaveSpaceChars, pParsedSize);
539 }
Parse(IFX_FileRead * pFile,FX_BOOL bSaveSpaceChars,FX_FILESIZE * pParsedSize)540 CXML_Element* CXML_Element::Parse(IFX_FileRead* pFile,
541                                   FX_BOOL bSaveSpaceChars,
542                                   FX_FILESIZE* pParsedSize) {
543   CXML_Parser parser;
544   if (!parser.Init(pFile)) {
545     return NULL;
546   }
547   return XML_ContinueParse(parser, bSaveSpaceChars, pParsedSize);
548 }
Parse(IFX_BufferRead * pBuffer,FX_BOOL bSaveSpaceChars,FX_FILESIZE * pParsedSize)549 CXML_Element* CXML_Element::Parse(IFX_BufferRead* pBuffer,
550                                   FX_BOOL bSaveSpaceChars,
551                                   FX_FILESIZE* pParsedSize) {
552   CXML_Parser parser;
553   if (!parser.Init(pBuffer)) {
554     return NULL;
555   }
556   return XML_ContinueParse(parser, bSaveSpaceChars, pParsedSize);
557 }
CXML_Element()558 CXML_Element::CXML_Element() : m_QSpaceName(), m_TagName(), m_AttrMap() {}
CXML_Element(const CFX_ByteStringC & qSpace,const CFX_ByteStringC & tagName)559 CXML_Element::CXML_Element(const CFX_ByteStringC& qSpace,
560                            const CFX_ByteStringC& tagName)
561     : m_QSpaceName(), m_TagName(), m_AttrMap() {
562   m_QSpaceName = qSpace;
563   m_TagName = tagName;
564 }
CXML_Element(const CFX_ByteStringC & qTagName)565 CXML_Element::CXML_Element(const CFX_ByteStringC& qTagName)
566     : m_pParent(NULL), m_QSpaceName(), m_TagName(), m_AttrMap() {
567   SetTag(qTagName);
568 }
~CXML_Element()569 CXML_Element::~CXML_Element() {
570   Empty();
571 }
Empty()572 void CXML_Element::Empty() {
573   RemoveChildren();
574 }
RemoveChildren()575 void CXML_Element::RemoveChildren() {
576   for (int i = 0; i < m_Children.GetSize(); i += 2) {
577     ChildType type = (ChildType)(uintptr_t)m_Children.GetAt(i);
578     if (type == Content) {
579       CXML_Content* content = (CXML_Content*)m_Children.GetAt(i + 1);
580       delete content;
581     } else if (type == Element) {
582       CXML_Element* child = (CXML_Element*)m_Children.GetAt(i + 1);
583       child->RemoveChildren();
584       delete child;
585     }
586   }
587   m_Children.RemoveAll();
588 }
GetTagName(FX_BOOL bQualified) const589 CFX_ByteString CXML_Element::GetTagName(FX_BOOL bQualified) const {
590   if (!bQualified || m_QSpaceName.IsEmpty()) {
591     return m_TagName;
592   }
593   CFX_ByteString bsTag = m_QSpaceName;
594   bsTag += ":";
595   bsTag += m_TagName;
596   return bsTag;
597 }
GetNamespace(FX_BOOL bQualified) const598 CFX_ByteString CXML_Element::GetNamespace(FX_BOOL bQualified) const {
599   if (bQualified) {
600     return m_QSpaceName;
601   }
602   return GetNamespaceURI(m_QSpaceName);
603 }
GetNamespaceURI(const CFX_ByteStringC & qName) const604 CFX_ByteString CXML_Element::GetNamespaceURI(
605     const CFX_ByteStringC& qName) const {
606   const CFX_WideString* pwsSpace;
607   const CXML_Element* pElement = this;
608   do {
609     if (qName.IsEmpty()) {
610       pwsSpace = pElement->m_AttrMap.Lookup("", "xmlns");
611     } else {
612       pwsSpace = pElement->m_AttrMap.Lookup("xmlns", qName);
613     }
614     if (pwsSpace) {
615       break;
616     }
617     pElement = pElement->GetParent();
618   } while (pElement);
619   return pwsSpace ? FX_UTF8Encode(*pwsSpace) : CFX_ByteString();
620 }
GetAttrByIndex(int index,CFX_ByteString & space,CFX_ByteString & name,CFX_WideString & value) const621 void CXML_Element::GetAttrByIndex(int index,
622                                   CFX_ByteString& space,
623                                   CFX_ByteString& name,
624                                   CFX_WideString& value) const {
625   if (index < 0 || index >= m_AttrMap.GetSize()) {
626     return;
627   }
628   CXML_AttrItem& item = m_AttrMap.GetAt(index);
629   space = item.m_QSpaceName;
630   name = item.m_AttrName;
631   value = item.m_Value;
632 }
HasAttr(const CFX_ByteStringC & name) const633 FX_BOOL CXML_Element::HasAttr(const CFX_ByteStringC& name) const {
634   CFX_ByteStringC bsSpace, bsName;
635   FX_XML_SplitQualifiedName(name, bsSpace, bsName);
636   return m_AttrMap.Lookup(bsSpace, bsName) != NULL;
637 }
GetAttrValue(const CFX_ByteStringC & name,CFX_WideString & attribute) const638 FX_BOOL CXML_Element::GetAttrValue(const CFX_ByteStringC& name,
639                                    CFX_WideString& attribute) const {
640   CFX_ByteStringC bsSpace, bsName;
641   FX_XML_SplitQualifiedName(name, bsSpace, bsName);
642   return GetAttrValue(bsSpace, bsName, attribute);
643 }
GetAttrValue(const CFX_ByteStringC & space,const CFX_ByteStringC & name,CFX_WideString & attribute) const644 FX_BOOL CXML_Element::GetAttrValue(const CFX_ByteStringC& space,
645                                    const CFX_ByteStringC& name,
646                                    CFX_WideString& attribute) const {
647   const CFX_WideString* pValue = m_AttrMap.Lookup(space, name);
648   if (pValue) {
649     attribute = *pValue;
650     return TRUE;
651   }
652   return FALSE;
653 }
GetAttrInteger(const CFX_ByteStringC & name,int & attribute) const654 FX_BOOL CXML_Element::GetAttrInteger(const CFX_ByteStringC& name,
655                                      int& attribute) const {
656   CFX_ByteStringC bsSpace, bsName;
657   FX_XML_SplitQualifiedName(name, bsSpace, bsName);
658   const CFX_WideString* pwsValue = m_AttrMap.Lookup(bsSpace, bsName);
659   if (pwsValue) {
660     attribute = pwsValue->GetInteger();
661     return TRUE;
662   }
663   return FALSE;
664 }
GetAttrInteger(const CFX_ByteStringC & space,const CFX_ByteStringC & name,int & attribute) const665 FX_BOOL CXML_Element::GetAttrInteger(const CFX_ByteStringC& space,
666                                      const CFX_ByteStringC& name,
667                                      int& attribute) const {
668   const CFX_WideString* pwsValue = m_AttrMap.Lookup(space, name);
669   if (pwsValue) {
670     attribute = pwsValue->GetInteger();
671     return TRUE;
672   }
673   return FALSE;
674 }
GetAttrFloat(const CFX_ByteStringC & name,FX_FLOAT & attribute) const675 FX_BOOL CXML_Element::GetAttrFloat(const CFX_ByteStringC& name,
676                                    FX_FLOAT& attribute) const {
677   CFX_ByteStringC bsSpace, bsName;
678   FX_XML_SplitQualifiedName(name, bsSpace, bsName);
679   return GetAttrFloat(bsSpace, bsName, attribute);
680 }
GetAttrFloat(const CFX_ByteStringC & space,const CFX_ByteStringC & name,FX_FLOAT & attribute) const681 FX_BOOL CXML_Element::GetAttrFloat(const CFX_ByteStringC& space,
682                                    const CFX_ByteStringC& name,
683                                    FX_FLOAT& attribute) const {
684   const CFX_WideString* pValue = m_AttrMap.Lookup(space, name);
685   if (pValue) {
686     attribute = pValue->GetFloat();
687     return TRUE;
688   }
689   return FALSE;
690 }
CountChildren() const691 FX_DWORD CXML_Element::CountChildren() const {
692   return m_Children.GetSize() / 2;
693 }
GetChildType(FX_DWORD index) const694 CXML_Element::ChildType CXML_Element::GetChildType(FX_DWORD index) const {
695   index <<= 1;
696   if (index >= (FX_DWORD)m_Children.GetSize()) {
697     return Invalid;
698   }
699   return (ChildType)(uintptr_t)m_Children.GetAt(index);
700 }
GetContent(FX_DWORD index) const701 CFX_WideString CXML_Element::GetContent(FX_DWORD index) const {
702   index <<= 1;
703   if (index >= (FX_DWORD)m_Children.GetSize() ||
704       (ChildType)(uintptr_t)m_Children.GetAt(index) != Content) {
705     return CFX_WideString();
706   }
707   CXML_Content* pContent = (CXML_Content*)m_Children.GetAt(index + 1);
708   if (pContent) {
709     return pContent->m_Content;
710   }
711   return CFX_WideString();
712 }
GetElement(FX_DWORD index) const713 CXML_Element* CXML_Element::GetElement(FX_DWORD index) const {
714   index <<= 1;
715   if (index >= (FX_DWORD)m_Children.GetSize() ||
716       (ChildType)(uintptr_t)m_Children.GetAt(index) != Element) {
717     return NULL;
718   }
719   return (CXML_Element*)m_Children.GetAt(index + 1);
720 }
CountElements(const CFX_ByteStringC & space,const CFX_ByteStringC & tag) const721 FX_DWORD CXML_Element::CountElements(const CFX_ByteStringC& space,
722                                      const CFX_ByteStringC& tag) const {
723   int count = 0;
724   for (int i = 0; i < m_Children.GetSize(); i += 2) {
725     ChildType type = (ChildType)(uintptr_t)m_Children.GetAt(i);
726     if (type != Element) {
727       continue;
728     }
729     CXML_Element* pKid = (CXML_Element*)m_Children.GetAt(i + 1);
730     if ((space.IsEmpty() || pKid->m_QSpaceName == space) &&
731         pKid->m_TagName == tag) {
732       count++;
733     }
734   }
735   return count;
736 }
GetElement(const CFX_ByteStringC & space,const CFX_ByteStringC & tag,int index) const737 CXML_Element* CXML_Element::GetElement(const CFX_ByteStringC& space,
738                                        const CFX_ByteStringC& tag,
739                                        int index) const {
740   if (index < 0) {
741     return NULL;
742   }
743   for (int i = 0; i < m_Children.GetSize(); i += 2) {
744     ChildType type = (ChildType)(uintptr_t)m_Children.GetAt(i);
745     if (type != Element) {
746       continue;
747     }
748     CXML_Element* pKid = (CXML_Element*)m_Children.GetAt(i + 1);
749     if ((!space.IsEmpty() && pKid->m_QSpaceName != space) ||
750         pKid->m_TagName != tag) {
751       continue;
752     }
753     if (index-- == 0) {
754       return pKid;
755     }
756   }
757   return NULL;
758 }
FindElement(CXML_Element * pChild) const759 FX_DWORD CXML_Element::FindElement(CXML_Element* pChild) const {
760   for (int i = 0; i < m_Children.GetSize(); i += 2) {
761     if ((ChildType)(uintptr_t)m_Children.GetAt(i) == Element &&
762         (CXML_Element*)m_Children.GetAt(i + 1) == pChild) {
763       return (FX_DWORD)(i >> 1);
764     }
765   }
766   return (FX_DWORD)-1;
767 }
Lookup(const CFX_ByteStringC & space,const CFX_ByteStringC & name) const768 const CFX_WideString* CXML_AttrMap::Lookup(const CFX_ByteStringC& space,
769                                            const CFX_ByteStringC& name) const {
770   if (!m_pMap) {
771     return NULL;
772   }
773   for (int i = 0; i < m_pMap->GetSize(); i++) {
774     CXML_AttrItem& item = GetAt(i);
775     if ((space.IsEmpty() || item.m_QSpaceName == space) &&
776         item.m_AttrName == name) {
777       return &item.m_Value;
778     }
779   }
780   return NULL;
781 }
SetAt(const CFX_ByteStringC & space,const CFX_ByteStringC & name,const CFX_WideStringC & value)782 void CXML_AttrMap::SetAt(const CFX_ByteStringC& space,
783                          const CFX_ByteStringC& name,
784                          const CFX_WideStringC& value) {
785   for (int i = 0; i < GetSize(); i++) {
786     CXML_AttrItem& item = GetAt(i);
787     if ((space.IsEmpty() || item.m_QSpaceName == space) &&
788         item.m_AttrName == name) {
789       item.m_Value = value;
790       return;
791     }
792   }
793   if (!m_pMap) {
794     m_pMap = new CFX_ObjectArray<CXML_AttrItem>;
795   }
796   CXML_AttrItem* pItem = (CXML_AttrItem*)m_pMap->AddSpace();
797   if (!pItem) {
798     return;
799   }
800   pItem->m_QSpaceName = space;
801   pItem->m_AttrName = name;
802   pItem->m_Value = value;
803 }
RemoveAt(const CFX_ByteStringC & space,const CFX_ByteStringC & name)804 void CXML_AttrMap::RemoveAt(const CFX_ByteStringC& space,
805                             const CFX_ByteStringC& name) {
806   if (!m_pMap) {
807     return;
808   }
809   for (int i = 0; i < m_pMap->GetSize(); i++) {
810     CXML_AttrItem& item = GetAt(i);
811     if ((space.IsEmpty() || item.m_QSpaceName == space) &&
812         item.m_AttrName == name) {
813       m_pMap->RemoveAt(i);
814       return;
815     }
816   }
817 }
GetSize() const818 int CXML_AttrMap::GetSize() const {
819   return m_pMap ? m_pMap->GetSize() : 0;
820 }
GetAt(int index) const821 CXML_AttrItem& CXML_AttrMap::GetAt(int index) const {
822   return (*m_pMap)[index];
823 }
RemoveAll()824 void CXML_AttrMap::RemoveAll() {
825   if (!m_pMap) {
826     return;
827   }
828   m_pMap->RemoveAll();
829   delete m_pMap;
830   m_pMap = NULL;
831 }
832