1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "xfa/fde/xml/fde_xml_imp.h"
8 
9 #include <algorithm>
10 #include <utility>
11 
12 #include "core/fxcrt/fx_ext.h"
13 #include "core/fxcrt/fx_safe_types.h"
14 #include "third_party/base/stl_util.h"
15 #include "xfa/fgas/crt/fgas_codepage.h"
16 
17 namespace {
18 
19 const uint32_t kMaxCharRange = 0x10ffff;
20 
21 const uint16_t g_XMLValidCharRange[][2] = {{0x09, 0x09},
22                                            {0x0A, 0x0A},
23                                            {0x0D, 0x0D},
24                                            {0x20, 0xD7FF},
25                                            {0xE000, 0xFFFD}};
26 
FDE_IsXMLWhiteSpace(FX_WCHAR ch)27 bool FDE_IsXMLWhiteSpace(FX_WCHAR ch) {
28   return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09;
29 }
30 
31 struct FDE_XMLNAMECHAR {
32   uint16_t wStart;
33   uint16_t wEnd;
34   bool bStartChar;
35 };
36 
37 const FDE_XMLNAMECHAR g_XMLNameChars[] = {
38     {L'-', L'.', false},    {L'0', L'9', false},     {L':', L':', false},
39     {L'A', L'Z', true},     {L'_', L'_', true},      {L'a', L'z', true},
40     {0xB7, 0xB7, false},    {0xC0, 0xD6, true},      {0xD8, 0xF6, true},
41     {0xF8, 0x02FF, true},   {0x0300, 0x036F, false}, {0x0370, 0x037D, true},
42     {0x037F, 0x1FFF, true}, {0x200C, 0x200D, true},  {0x203F, 0x2040, false},
43     {0x2070, 0x218F, true}, {0x2C00, 0x2FEF, true},  {0x3001, 0xD7FF, true},
44     {0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true},
45 };
46 
FDE_IsXMLNameChar(FX_WCHAR ch,bool bFirstChar)47 bool FDE_IsXMLNameChar(FX_WCHAR ch, bool bFirstChar) {
48   int32_t iStart = 0;
49   int32_t iEnd = FX_ArraySize(g_XMLNameChars) - 1;
50   while (iStart <= iEnd) {
51     int32_t iMid = (iStart + iEnd) / 2;
52     if (ch < g_XMLNameChars[iMid].wStart) {
53       iEnd = iMid - 1;
54     } else if (ch > g_XMLNameChars[iMid].wEnd) {
55       iStart = iMid + 1;
56     } else {
57       return bFirstChar ? g_XMLNameChars[iMid].bStartChar : true;
58     }
59   }
60   return false;
61 }
62 
63 }  // namespace
64 
FDE_IsXMLValidChar(FX_WCHAR ch)65 bool FDE_IsXMLValidChar(FX_WCHAR ch) {
66   int32_t iStart = 0;
67   int32_t iEnd = FX_ArraySize(g_XMLValidCharRange) - 1;
68   while (iStart <= iEnd) {
69     int32_t iMid = (iStart + iEnd) / 2;
70     if (ch < g_XMLValidCharRange[iMid][0]) {
71       iEnd = iMid - 1;
72     } else if (ch > g_XMLValidCharRange[iMid][1]) {
73       iStart = iMid + 1;
74     } else {
75       return true;
76     }
77   }
78   return false;
79 }
80 
CFDE_XMLNode()81 CFDE_XMLNode::CFDE_XMLNode()
82     : m_pParent(nullptr),
83       m_pChild(nullptr),
84       m_pPrior(nullptr),
85       m_pNext(nullptr) {}
86 
GetType() const87 FDE_XMLNODETYPE CFDE_XMLNode::GetType() const {
88   return FDE_XMLNODE_Unknown;
89 }
90 
~CFDE_XMLNode()91 CFDE_XMLNode::~CFDE_XMLNode() {
92   DeleteChildren();
93 }
94 
DeleteChildren()95 void CFDE_XMLNode::DeleteChildren() {
96   CFDE_XMLNode* pChild = m_pChild;
97   while (pChild) {
98     CFDE_XMLNode* pNext = pChild->m_pNext;
99     delete pChild;
100     pChild = pNext;
101   }
102   m_pChild = nullptr;
103 }
104 
CountChildNodes() const105 int32_t CFDE_XMLNode::CountChildNodes() const {
106   int32_t iCount = 0;
107   CFDE_XMLNode* pChild = m_pChild;
108   while (pChild) {
109     iCount++;
110     pChild = pChild->m_pNext;
111   }
112   return iCount;
113 }
114 
GetChildNode(int32_t index) const115 CFDE_XMLNode* CFDE_XMLNode::GetChildNode(int32_t index) const {
116   CFDE_XMLNode* pChild = m_pChild;
117   while (pChild) {
118     if (index == 0) {
119       return pChild;
120     }
121     index--;
122     pChild = pChild->m_pNext;
123   }
124   return nullptr;
125 }
126 
GetChildNodeIndex(CFDE_XMLNode * pNode) const127 int32_t CFDE_XMLNode::GetChildNodeIndex(CFDE_XMLNode* pNode) const {
128   int32_t index = 0;
129   CFDE_XMLNode* pChild = m_pChild;
130   while (pChild) {
131     if (pChild == pNode) {
132       return index;
133     }
134     index++;
135     pChild = pChild->m_pNext;
136   }
137   return -1;
138 }
139 
GetPath(const FX_WCHAR * pPath,int32_t iLength,bool bQualifiedName) const140 CFDE_XMLNode* CFDE_XMLNode::GetPath(const FX_WCHAR* pPath,
141                                     int32_t iLength,
142                                     bool bQualifiedName) const {
143   ASSERT(pPath);
144   if (iLength < 0) {
145     iLength = FXSYS_wcslen(pPath);
146   }
147   if (iLength == 0) {
148     return nullptr;
149   }
150   CFX_WideString csPath;
151   const FX_WCHAR* pStart = pPath;
152   const FX_WCHAR* pEnd = pPath + iLength;
153   FX_WCHAR ch;
154   while (pStart < pEnd) {
155     ch = *pStart++;
156     if (ch == L'/') {
157       break;
158     } else {
159       csPath += ch;
160     }
161   }
162   iLength -= pStart - pPath;
163   CFDE_XMLNode* pFind = nullptr;
164   if (csPath.GetLength() < 1) {
165     pFind = GetNodeItem(CFDE_XMLNode::Root);
166   } else if (csPath.Compare(L"..") == 0) {
167     pFind = m_pParent;
168   } else if (csPath.Compare(L".") == 0) {
169     pFind = (CFDE_XMLNode*)this;
170   } else {
171     CFX_WideString wsTag;
172     CFDE_XMLNode* pNode = m_pChild;
173     while (pNode) {
174       if (pNode->GetType() == FDE_XMLNODE_Element) {
175         if (bQualifiedName) {
176           ((CFDE_XMLElement*)pNode)->GetTagName(wsTag);
177         } else {
178           ((CFDE_XMLElement*)pNode)->GetLocalTagName(wsTag);
179         }
180         if (wsTag.Compare(csPath) == 0) {
181           if (iLength < 1) {
182             pFind = pNode;
183           } else {
184             pFind = pNode->GetPath(pStart, iLength, bQualifiedName);
185           }
186           if (pFind)
187             return pFind;
188         }
189       }
190       pNode = pNode->m_pNext;
191     }
192   }
193   if (!pFind || iLength < 1)
194     return pFind;
195   return pFind->GetPath(pStart, iLength, bQualifiedName);
196 }
197 
InsertChildNode(CFDE_XMLNode * pNode,int32_t index)198 int32_t CFDE_XMLNode::InsertChildNode(CFDE_XMLNode* pNode, int32_t index) {
199   pNode->m_pParent = this;
200   if (!m_pChild) {
201     m_pChild = pNode;
202     pNode->m_pPrior = nullptr;
203     pNode->m_pNext = nullptr;
204     return 0;
205   }
206   if (index == 0) {
207     pNode->m_pNext = m_pChild;
208     pNode->m_pPrior = nullptr;
209     m_pChild->m_pPrior = pNode;
210     m_pChild = pNode;
211     return 0;
212   }
213   int32_t iCount = 0;
214   CFDE_XMLNode* pFind = m_pChild;
215   while (++iCount != index && pFind->m_pNext) {
216     pFind = pFind->m_pNext;
217   }
218   pNode->m_pPrior = pFind;
219   pNode->m_pNext = pFind->m_pNext;
220   if (pFind->m_pNext)
221     pFind->m_pNext->m_pPrior = pNode;
222   pFind->m_pNext = pNode;
223   return iCount;
224 }
225 
RemoveChildNode(CFDE_XMLNode * pNode)226 void CFDE_XMLNode::RemoveChildNode(CFDE_XMLNode* pNode) {
227   ASSERT(m_pChild && pNode);
228   if (m_pChild == pNode) {
229     m_pChild = pNode->m_pNext;
230   } else {
231     pNode->m_pPrior->m_pNext = pNode->m_pNext;
232   }
233   if (pNode->m_pNext)
234     pNode->m_pNext->m_pPrior = pNode->m_pPrior;
235   pNode->m_pParent = nullptr;
236   pNode->m_pNext = nullptr;
237   pNode->m_pPrior = nullptr;
238 }
239 
GetNodeItem(CFDE_XMLNode::NodeItem eItem) const240 CFDE_XMLNode* CFDE_XMLNode::GetNodeItem(CFDE_XMLNode::NodeItem eItem) const {
241   switch (eItem) {
242     case CFDE_XMLNode::Root: {
243       CFDE_XMLNode* pParent = (CFDE_XMLNode*)this;
244       while (pParent->m_pParent) {
245         pParent = pParent->m_pParent;
246       }
247       return pParent;
248     }
249     case CFDE_XMLNode::Parent:
250       return m_pParent;
251     case CFDE_XMLNode::FirstSibling: {
252       CFDE_XMLNode* pItem = (CFDE_XMLNode*)this;
253       while (pItem->m_pPrior) {
254         pItem = pItem->m_pPrior;
255       }
256       return pItem == (CFDE_XMLNode*)this ? nullptr : pItem;
257     }
258     case CFDE_XMLNode::PriorSibling:
259       return m_pPrior;
260     case CFDE_XMLNode::NextSibling:
261       return m_pNext;
262     case CFDE_XMLNode::LastSibling: {
263       CFDE_XMLNode* pItem = (CFDE_XMLNode*)this;
264       while (pItem->m_pNext)
265         pItem = pItem->m_pNext;
266       return pItem == (CFDE_XMLNode*)this ? nullptr : pItem;
267     }
268     case CFDE_XMLNode::FirstNeighbor: {
269       CFDE_XMLNode* pParent = (CFDE_XMLNode*)this;
270       while (pParent->m_pParent)
271         pParent = pParent->m_pParent;
272       return pParent == (CFDE_XMLNode*)this ? nullptr : pParent;
273     }
274     case CFDE_XMLNode::PriorNeighbor: {
275       if (!m_pPrior)
276         return m_pParent;
277 
278       CFDE_XMLNode* pItem = m_pPrior;
279       while (pItem->m_pChild) {
280         pItem = pItem->m_pChild;
281         while (pItem->m_pNext)
282           pItem = pItem->m_pNext;
283       }
284       return pItem;
285     }
286     case CFDE_XMLNode::NextNeighbor: {
287       if (m_pChild)
288         return m_pChild;
289       if (m_pNext)
290         return m_pNext;
291       CFDE_XMLNode* pItem = m_pParent;
292       while (pItem) {
293         if (pItem->m_pNext)
294           return pItem->m_pNext;
295         pItem = pItem->m_pParent;
296       }
297       return nullptr;
298     }
299     case CFDE_XMLNode::LastNeighbor: {
300       CFDE_XMLNode* pItem = (CFDE_XMLNode*)this;
301       while (pItem->m_pParent) {
302         pItem = pItem->m_pParent;
303       }
304       while (true) {
305         while (pItem->m_pNext)
306           pItem = pItem->m_pNext;
307         if (!pItem->m_pChild)
308           break;
309         pItem = pItem->m_pChild;
310       }
311       return pItem == (CFDE_XMLNode*)this ? nullptr : pItem;
312     }
313     case CFDE_XMLNode::FirstChild:
314       return m_pChild;
315     case CFDE_XMLNode::LastChild: {
316       if (!m_pChild)
317         return nullptr;
318 
319       CFDE_XMLNode* pChild = m_pChild;
320       while (pChild->m_pNext)
321         pChild = pChild->m_pNext;
322       return pChild;
323     }
324     default:
325       break;
326   }
327   return nullptr;
328 }
329 
GetNodeLevel() const330 int32_t CFDE_XMLNode::GetNodeLevel() const {
331   int32_t iLevel = 0;
332   const CFDE_XMLNode* pItem = m_pParent;
333   while (pItem) {
334     iLevel++;
335     pItem = pItem->m_pParent;
336   }
337   return iLevel;
338 }
339 
InsertNodeItem(CFDE_XMLNode::NodeItem eItem,CFDE_XMLNode * pNode)340 bool CFDE_XMLNode::InsertNodeItem(CFDE_XMLNode::NodeItem eItem,
341                                   CFDE_XMLNode* pNode) {
342   switch (eItem) {
343     case CFDE_XMLNode::NextSibling: {
344       pNode->m_pParent = m_pParent;
345       pNode->m_pNext = m_pNext;
346       pNode->m_pPrior = this;
347       if (m_pNext) {
348         m_pNext->m_pPrior = pNode;
349       }
350       m_pNext = pNode;
351       return true;
352     }
353     case CFDE_XMLNode::PriorSibling: {
354       pNode->m_pParent = m_pParent;
355       pNode->m_pNext = this;
356       pNode->m_pPrior = m_pPrior;
357       if (m_pPrior) {
358         m_pPrior->m_pNext = pNode;
359       } else if (m_pParent) {
360         m_pParent->m_pChild = pNode;
361       }
362       m_pPrior = pNode;
363       return true;
364     }
365     default:
366       return false;
367   }
368 }
369 
RemoveNodeItem(CFDE_XMLNode::NodeItem eItem)370 CFDE_XMLNode* CFDE_XMLNode::RemoveNodeItem(CFDE_XMLNode::NodeItem eItem) {
371   CFDE_XMLNode* pNode = nullptr;
372   switch (eItem) {
373     case CFDE_XMLNode::NextSibling:
374       if (m_pNext) {
375         pNode = m_pNext;
376         m_pNext = pNode->m_pNext;
377         if (m_pNext) {
378           m_pNext->m_pPrior = this;
379         }
380         pNode->m_pParent = nullptr;
381         pNode->m_pNext = nullptr;
382         pNode->m_pPrior = nullptr;
383       }
384       break;
385     default:
386       break;
387   }
388   return pNode;
389 }
390 
Clone(bool bRecursive)391 CFDE_XMLNode* CFDE_XMLNode::Clone(bool bRecursive) {
392   return nullptr;
393 }
394 
SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream> & pXMLStream)395 void CFDE_XMLNode::SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream) {
396   CFDE_XMLNode* pNode = (CFDE_XMLNode*)this;
397   switch (pNode->GetType()) {
398     case FDE_XMLNODE_Instruction: {
399       CFX_WideString ws;
400       CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode;
401       if (pInstruction->m_wsTarget.CompareNoCase(L"xml") == 0) {
402         ws = L"<?xml version=\"1.0\" encoding=\"";
403         uint16_t wCodePage = pXMLStream->GetCodePage();
404         if (wCodePage == FX_CODEPAGE_UTF16LE) {
405           ws += L"UTF-16";
406         } else if (wCodePage == FX_CODEPAGE_UTF16BE) {
407           ws += L"UTF-16be";
408         } else {
409           ws += L"UTF-8";
410         }
411         ws += L"\"?>";
412         pXMLStream->WriteString(ws.c_str(), ws.GetLength());
413       } else {
414         ws.Format(L"<?%s", pInstruction->m_wsTarget.c_str());
415         pXMLStream->WriteString(ws.c_str(), ws.GetLength());
416         std::vector<CFX_WideString>& attributes = pInstruction->m_Attributes;
417         int32_t i;
418         int32_t iCount = pdfium::CollectionSize<int32_t>(attributes);
419         CFX_WideString wsValue;
420         for (i = 0; i < iCount; i += 2) {
421           ws = L" ";
422           ws += attributes[i];
423           ws += L"=\"";
424           wsValue = attributes[i + 1];
425           wsValue.Replace(L"&", L"&amp;");
426           wsValue.Replace(L"<", L"&lt;");
427           wsValue.Replace(L">", L"&gt;");
428           wsValue.Replace(L"\'", L"&apos;");
429           wsValue.Replace(L"\"", L"&quot;");
430           ws += wsValue;
431           ws += L"\"";
432           pXMLStream->WriteString(ws.c_str(), ws.GetLength());
433         }
434         std::vector<CFX_WideString>& targetdata = pInstruction->m_TargetData;
435         iCount = pdfium::CollectionSize<int32_t>(targetdata);
436         for (i = 0; i < iCount; i++) {
437           ws = L" \"";
438           ws += targetdata[i];
439           ws += L"\"";
440           pXMLStream->WriteString(ws.c_str(), ws.GetLength());
441         }
442         ws = L"?>";
443         pXMLStream->WriteString(ws.c_str(), ws.GetLength());
444       }
445     } break;
446     case FDE_XMLNODE_Element: {
447       CFX_WideString ws;
448       ws = L"<";
449       ws += ((CFDE_XMLElement*)pNode)->m_wsTag;
450       pXMLStream->WriteString(ws.c_str(), ws.GetLength());
451       std::vector<CFX_WideString>& attributes =
452           static_cast<CFDE_XMLElement*>(pNode)->m_Attributes;
453       int32_t iCount = pdfium::CollectionSize<int32_t>(attributes);
454       CFX_WideString wsValue;
455       for (int32_t i = 0; i < iCount; i += 2) {
456         ws = L" ";
457         ws += attributes[i];
458         ws += L"=\"";
459         wsValue = attributes[i + 1];
460         wsValue.Replace(L"&", L"&amp;");
461         wsValue.Replace(L"<", L"&lt;");
462         wsValue.Replace(L">", L"&gt;");
463         wsValue.Replace(L"\'", L"&apos;");
464         wsValue.Replace(L"\"", L"&quot;");
465         ws += wsValue;
466         ws += L"\"";
467         pXMLStream->WriteString(ws.c_str(), ws.GetLength());
468       }
469       if (pNode->m_pChild) {
470         ws = L"\n>";
471         pXMLStream->WriteString(ws.c_str(), ws.GetLength());
472         CFDE_XMLNode* pChild = pNode->m_pChild;
473         while (pChild) {
474           pChild->SaveXMLNode(pXMLStream);
475           pChild = pChild->m_pNext;
476         }
477         ws = L"</";
478         ws += ((CFDE_XMLElement*)pNode)->m_wsTag;
479         ws += L"\n>";
480       } else {
481         ws = L"\n/>";
482       }
483       pXMLStream->WriteString(ws.c_str(), ws.GetLength());
484     } break;
485     case FDE_XMLNODE_Text: {
486       CFX_WideString ws = ((CFDE_XMLText*)pNode)->m_wsText;
487       ws.Replace(L"&", L"&amp;");
488       ws.Replace(L"<", L"&lt;");
489       ws.Replace(L">", L"&gt;");
490       ws.Replace(L"\'", L"&apos;");
491       ws.Replace(L"\"", L"&quot;");
492       pXMLStream->WriteString(ws.c_str(), ws.GetLength());
493     } break;
494     case FDE_XMLNODE_CharData: {
495       CFX_WideString ws = L"<![CDATA[";
496       ws += ((CFDE_XMLCharData*)pNode)->m_wsCharData;
497       ws += L"]]>";
498       pXMLStream->WriteString(ws.c_str(), ws.GetLength());
499     } break;
500     case FDE_XMLNODE_Unknown:
501       break;
502     default:
503       break;
504   }
505 }
506 
CloneChildren(CFDE_XMLNode * pClone)507 void CFDE_XMLNode::CloneChildren(CFDE_XMLNode* pClone) {
508   if (!m_pChild) {
509     return;
510   }
511   CFDE_XMLNode* pNext = m_pChild;
512   CFDE_XMLNode* pCloneNext = pNext->Clone(true);
513   pClone->InsertChildNode(pCloneNext);
514   pNext = pNext->m_pNext;
515   while (pNext) {
516     CFDE_XMLNode* pChild = pNext->Clone(true);
517     pCloneNext->InsertNodeItem(CFDE_XMLNode::NextSibling, pChild);
518     pCloneNext = pChild;
519     pNext = pNext->m_pNext;
520   }
521 }
522 
CFDE_XMLInstruction(const CFX_WideString & wsTarget)523 CFDE_XMLInstruction::CFDE_XMLInstruction(const CFX_WideString& wsTarget)
524     : m_wsTarget(wsTarget) {
525   ASSERT(m_wsTarget.GetLength() > 0);
526 }
527 
GetType() const528 FDE_XMLNODETYPE CFDE_XMLInstruction::GetType() const {
529   return FDE_XMLNODE_Instruction;
530 }
531 
Clone(bool bRecursive)532 CFDE_XMLNode* CFDE_XMLInstruction::Clone(bool bRecursive) {
533   CFDE_XMLInstruction* pClone = new CFDE_XMLInstruction(m_wsTarget);
534   if (!pClone)
535     return nullptr;
536 
537   pClone->m_Attributes = m_Attributes;
538   pClone->m_TargetData = m_TargetData;
539   if (bRecursive)
540     CloneChildren(pClone);
541 
542   return pClone;
543 }
544 
CountAttributes() const545 int32_t CFDE_XMLInstruction::CountAttributes() const {
546   return pdfium::CollectionSize<int32_t>(m_Attributes) / 2;
547 }
548 
GetAttribute(int32_t index,CFX_WideString & wsAttriName,CFX_WideString & wsAttriValue) const549 bool CFDE_XMLInstruction::GetAttribute(int32_t index,
550                                        CFX_WideString& wsAttriName,
551                                        CFX_WideString& wsAttriValue) const {
552   int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
553   ASSERT(index > -1 && index < iCount / 2);
554   for (int32_t i = 0; i < iCount; i += 2) {
555     if (index == 0) {
556       wsAttriName = m_Attributes[i];
557       wsAttriValue = m_Attributes[i + 1];
558       return true;
559     }
560     index--;
561   }
562   return false;
563 }
564 
HasAttribute(const FX_WCHAR * pwsAttriName) const565 bool CFDE_XMLInstruction::HasAttribute(const FX_WCHAR* pwsAttriName) const {
566   int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
567   for (int32_t i = 0; i < iCount; i += 2) {
568     if (m_Attributes[i].Compare(pwsAttriName) == 0) {
569       return true;
570     }
571   }
572   return false;
573 }
574 
GetString(const FX_WCHAR * pwsAttriName,CFX_WideString & wsAttriValue,const FX_WCHAR * pwsDefValue) const575 void CFDE_XMLInstruction::GetString(const FX_WCHAR* pwsAttriName,
576                                     CFX_WideString& wsAttriValue,
577                                     const FX_WCHAR* pwsDefValue) const {
578   int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
579   for (int32_t i = 0; i < iCount; i += 2) {
580     if (m_Attributes[i].Compare(pwsAttriName) == 0) {
581       wsAttriValue = m_Attributes[i + 1];
582       return;
583     }
584   }
585   wsAttriValue = pwsDefValue;
586 }
587 
SetString(const CFX_WideString & wsAttriName,const CFX_WideString & wsAttriValue)588 void CFDE_XMLInstruction::SetString(const CFX_WideString& wsAttriName,
589                                     const CFX_WideString& wsAttriValue) {
590   ASSERT(wsAttriName.GetLength() > 0);
591   int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
592   for (int32_t i = 0; i < iCount; i += 2) {
593     if (m_Attributes[i].Compare(wsAttriName) == 0) {
594       m_Attributes[i] = wsAttriName;
595       m_Attributes[i + 1] = wsAttriValue;
596       return;
597     }
598   }
599   m_Attributes.push_back(wsAttriName);
600   m_Attributes.push_back(wsAttriValue);
601 }
602 
GetInteger(const FX_WCHAR * pwsAttriName,int32_t iDefValue) const603 int32_t CFDE_XMLInstruction::GetInteger(const FX_WCHAR* pwsAttriName,
604                                         int32_t iDefValue) const {
605   int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
606   for (int32_t i = 0; i < iCount; i += 2) {
607     if (m_Attributes[i].Compare(pwsAttriName) == 0) {
608       return FXSYS_wtoi(m_Attributes[i + 1].c_str());
609     }
610   }
611   return iDefValue;
612 }
613 
SetInteger(const FX_WCHAR * pwsAttriName,int32_t iAttriValue)614 void CFDE_XMLInstruction::SetInteger(const FX_WCHAR* pwsAttriName,
615                                      int32_t iAttriValue) {
616   CFX_WideString wsValue;
617   wsValue.Format(L"%d", iAttriValue);
618   SetString(pwsAttriName, wsValue);
619 }
620 
GetFloat(const FX_WCHAR * pwsAttriName,FX_FLOAT fDefValue) const621 FX_FLOAT CFDE_XMLInstruction::GetFloat(const FX_WCHAR* pwsAttriName,
622                                        FX_FLOAT fDefValue) const {
623   int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
624   for (int32_t i = 0; i < iCount; i += 2) {
625     if (m_Attributes[i].Compare(pwsAttriName) == 0) {
626       return FXSYS_wcstof(m_Attributes[i + 1].c_str(), -1, nullptr);
627     }
628   }
629   return fDefValue;
630 }
631 
SetFloat(const FX_WCHAR * pwsAttriName,FX_FLOAT fAttriValue)632 void CFDE_XMLInstruction::SetFloat(const FX_WCHAR* pwsAttriName,
633                                    FX_FLOAT fAttriValue) {
634   CFX_WideString wsValue;
635   wsValue.Format(L"%f", fAttriValue);
636   SetString(pwsAttriName, wsValue);
637 }
638 
RemoveAttribute(const FX_WCHAR * pwsAttriName)639 void CFDE_XMLInstruction::RemoveAttribute(const FX_WCHAR* pwsAttriName) {
640   int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
641   for (int32_t i = 0; i < iCount; i += 2) {
642     if (m_Attributes[i].Compare(pwsAttriName) == 0) {
643       m_Attributes.erase(m_Attributes.begin() + i,
644                          m_Attributes.begin() + i + 2);
645       return;
646     }
647   }
648 }
649 
CountData() const650 int32_t CFDE_XMLInstruction::CountData() const {
651   return pdfium::CollectionSize<int32_t>(m_TargetData);
652 }
653 
GetData(int32_t index,CFX_WideString & wsData) const654 bool CFDE_XMLInstruction::GetData(int32_t index, CFX_WideString& wsData) const {
655   if (index < 0 || index >= pdfium::CollectionSize<int32_t>(m_TargetData))
656     return false;
657 
658   wsData = m_TargetData[index];
659   return true;
660 }
661 
AppendData(const CFX_WideString & wsData)662 void CFDE_XMLInstruction::AppendData(const CFX_WideString& wsData) {
663   m_TargetData.push_back(wsData);
664 }
665 
RemoveData(int32_t index)666 void CFDE_XMLInstruction::RemoveData(int32_t index) {
667   if (index < 0 || index >= pdfium::CollectionSize<int32_t>(m_TargetData))
668     return;
669 
670   m_TargetData.erase(m_TargetData.begin() + index);
671 }
672 
~CFDE_XMLInstruction()673 CFDE_XMLInstruction::~CFDE_XMLInstruction() {}
674 
CFDE_XMLElement(const CFX_WideString & wsTag)675 CFDE_XMLElement::CFDE_XMLElement(const CFX_WideString& wsTag)
676     : CFDE_XMLNode(), m_wsTag(wsTag), m_Attributes() {
677   ASSERT(m_wsTag.GetLength() > 0);
678 }
679 
~CFDE_XMLElement()680 CFDE_XMLElement::~CFDE_XMLElement() {}
681 
GetType() const682 FDE_XMLNODETYPE CFDE_XMLElement::GetType() const {
683   return FDE_XMLNODE_Element;
684 }
685 
Clone(bool bRecursive)686 CFDE_XMLNode* CFDE_XMLElement::Clone(bool bRecursive) {
687   CFDE_XMLElement* pClone = new CFDE_XMLElement(m_wsTag);
688   if (!pClone)
689     return nullptr;
690 
691   pClone->m_Attributes = m_Attributes;
692   if (bRecursive) {
693     CloneChildren(pClone);
694   } else {
695     CFX_WideString wsText;
696     CFDE_XMLNode* pChild = m_pChild;
697     while (pChild) {
698       switch (pChild->GetType()) {
699         case FDE_XMLNODE_Text:
700           wsText += ((CFDE_XMLText*)pChild)->m_wsText;
701           break;
702         default:
703           break;
704       }
705       pChild = pChild->m_pNext;
706     }
707     pClone->SetTextData(wsText);
708   }
709   return pClone;
710 }
711 
GetTagName(CFX_WideString & wsTag) const712 void CFDE_XMLElement::GetTagName(CFX_WideString& wsTag) const {
713   wsTag = m_wsTag;
714 }
715 
GetLocalTagName(CFX_WideString & wsTag) const716 void CFDE_XMLElement::GetLocalTagName(CFX_WideString& wsTag) const {
717   FX_STRSIZE iFind = m_wsTag.Find(L':', 0);
718   if (iFind < 0) {
719     wsTag = m_wsTag;
720   } else {
721     wsTag = m_wsTag.Right(m_wsTag.GetLength() - iFind - 1);
722   }
723 }
724 
GetNamespacePrefix(CFX_WideString & wsPrefix) const725 void CFDE_XMLElement::GetNamespacePrefix(CFX_WideString& wsPrefix) const {
726   FX_STRSIZE iFind = m_wsTag.Find(L':', 0);
727   if (iFind < 0) {
728     wsPrefix.clear();
729   } else {
730     wsPrefix = m_wsTag.Left(iFind);
731   }
732 }
733 
GetNamespaceURI(CFX_WideString & wsNamespace) const734 void CFDE_XMLElement::GetNamespaceURI(CFX_WideString& wsNamespace) const {
735   CFX_WideString wsAttri(L"xmlns"), wsPrefix;
736   GetNamespacePrefix(wsPrefix);
737   if (wsPrefix.GetLength() > 0) {
738     wsAttri += L":";
739     wsAttri += wsPrefix;
740   }
741   wsNamespace.clear();
742   CFDE_XMLNode* pNode = (CFDE_XMLNode*)this;
743   while (pNode) {
744     if (pNode->GetType() != FDE_XMLNODE_Element) {
745       break;
746     }
747     CFDE_XMLElement* pElement = (CFDE_XMLElement*)pNode;
748     if (!pElement->HasAttribute(wsAttri.c_str())) {
749       pNode = pNode->GetNodeItem(CFDE_XMLNode::Parent);
750       continue;
751     }
752     pElement->GetString(wsAttri.c_str(), wsNamespace);
753     break;
754   }
755 }
756 
CountAttributes() const757 int32_t CFDE_XMLElement::CountAttributes() const {
758   return pdfium::CollectionSize<int32_t>(m_Attributes) / 2;
759 }
760 
GetAttribute(int32_t index,CFX_WideString & wsAttriName,CFX_WideString & wsAttriValue) const761 bool CFDE_XMLElement::GetAttribute(int32_t index,
762                                    CFX_WideString& wsAttriName,
763                                    CFX_WideString& wsAttriValue) const {
764   int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
765   ASSERT(index > -1 && index < iCount / 2);
766   for (int32_t i = 0; i < iCount; i += 2) {
767     if (index == 0) {
768       wsAttriName = m_Attributes[i];
769       wsAttriValue = m_Attributes[i + 1];
770       return true;
771     }
772     index--;
773   }
774   return false;
775 }
776 
HasAttribute(const FX_WCHAR * pwsAttriName) const777 bool CFDE_XMLElement::HasAttribute(const FX_WCHAR* pwsAttriName) const {
778   int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
779   for (int32_t i = 0; i < iCount; i += 2) {
780     if (m_Attributes[i].Compare(pwsAttriName) == 0)
781       return true;
782   }
783   return false;
784 }
785 
GetString(const FX_WCHAR * pwsAttriName,CFX_WideString & wsAttriValue,const FX_WCHAR * pwsDefValue) const786 void CFDE_XMLElement::GetString(const FX_WCHAR* pwsAttriName,
787                                 CFX_WideString& wsAttriValue,
788                                 const FX_WCHAR* pwsDefValue) const {
789   int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
790   for (int32_t i = 0; i < iCount; i += 2) {
791     if (m_Attributes[i].Compare(pwsAttriName) == 0) {
792       wsAttriValue = m_Attributes[i + 1];
793       return;
794     }
795   }
796   wsAttriValue = pwsDefValue;
797 }
798 
SetString(const CFX_WideString & wsAttriName,const CFX_WideString & wsAttriValue)799 void CFDE_XMLElement::SetString(const CFX_WideString& wsAttriName,
800                                 const CFX_WideString& wsAttriValue) {
801   ASSERT(wsAttriName.GetLength() > 0);
802   int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
803   for (int32_t i = 0; i < iCount; i += 2) {
804     if (m_Attributes[i].Compare(wsAttriName) == 0) {
805       m_Attributes[i] = wsAttriName;
806       m_Attributes[i + 1] = wsAttriValue;
807       return;
808     }
809   }
810   m_Attributes.push_back(wsAttriName);
811   m_Attributes.push_back(wsAttriValue);
812 }
813 
GetInteger(const FX_WCHAR * pwsAttriName,int32_t iDefValue) const814 int32_t CFDE_XMLElement::GetInteger(const FX_WCHAR* pwsAttriName,
815                                     int32_t iDefValue) const {
816   int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
817   for (int32_t i = 0; i < iCount; i += 2) {
818     if (m_Attributes[i].Compare(pwsAttriName) == 0) {
819       return FXSYS_wtoi(m_Attributes[i + 1].c_str());
820     }
821   }
822   return iDefValue;
823 }
824 
SetInteger(const FX_WCHAR * pwsAttriName,int32_t iAttriValue)825 void CFDE_XMLElement::SetInteger(const FX_WCHAR* pwsAttriName,
826                                  int32_t iAttriValue) {
827   CFX_WideString wsValue;
828   wsValue.Format(L"%d", iAttriValue);
829   SetString(pwsAttriName, wsValue);
830 }
831 
GetFloat(const FX_WCHAR * pwsAttriName,FX_FLOAT fDefValue) const832 FX_FLOAT CFDE_XMLElement::GetFloat(const FX_WCHAR* pwsAttriName,
833                                    FX_FLOAT fDefValue) const {
834   int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
835   for (int32_t i = 0; i < iCount; i += 2) {
836     if (m_Attributes[i].Compare(pwsAttriName) == 0) {
837       return FXSYS_wcstof(m_Attributes[i + 1].c_str(), -1, nullptr);
838     }
839   }
840   return fDefValue;
841 }
842 
SetFloat(const FX_WCHAR * pwsAttriName,FX_FLOAT fAttriValue)843 void CFDE_XMLElement::SetFloat(const FX_WCHAR* pwsAttriName,
844                                FX_FLOAT fAttriValue) {
845   CFX_WideString wsValue;
846   wsValue.Format(L"%f", fAttriValue);
847   SetString(pwsAttriName, wsValue);
848 }
849 
RemoveAttribute(const FX_WCHAR * pwsAttriName)850 void CFDE_XMLElement::RemoveAttribute(const FX_WCHAR* pwsAttriName) {
851   int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes);
852   for (int32_t i = 0; i < iCount; i += 2) {
853     if (m_Attributes[i].Compare(pwsAttriName) == 0) {
854       m_Attributes.erase(m_Attributes.begin() + i,
855                          m_Attributes.begin() + i + 2);
856       return;
857     }
858   }
859 }
860 
GetTextData(CFX_WideString & wsText) const861 void CFDE_XMLElement::GetTextData(CFX_WideString& wsText) const {
862   CFX_WideTextBuf buffer;
863   CFDE_XMLNode* pChild = m_pChild;
864   while (pChild) {
865     switch (pChild->GetType()) {
866       case FDE_XMLNODE_Text:
867         buffer << ((CFDE_XMLText*)pChild)->m_wsText;
868         break;
869       case FDE_XMLNODE_CharData:
870         buffer << ((CFDE_XMLCharData*)pChild)->m_wsCharData;
871         break;
872       default:
873         break;
874     }
875     pChild = pChild->m_pNext;
876   }
877   wsText = buffer.AsStringC();
878 }
879 
SetTextData(const CFX_WideString & wsText)880 void CFDE_XMLElement::SetTextData(const CFX_WideString& wsText) {
881   if (wsText.GetLength() < 1) {
882     return;
883   }
884   InsertChildNode(new CFDE_XMLText(wsText));
885 }
886 
CFDE_XMLText(const CFX_WideString & wsText)887 CFDE_XMLText::CFDE_XMLText(const CFX_WideString& wsText)
888     : CFDE_XMLNode(), m_wsText(wsText) {}
889 
GetType() const890 FDE_XMLNODETYPE CFDE_XMLText::GetType() const {
891   return FDE_XMLNODE_Text;
892 }
893 
Clone(bool bRecursive)894 CFDE_XMLNode* CFDE_XMLText::Clone(bool bRecursive) {
895   CFDE_XMLText* pClone = new CFDE_XMLText(m_wsText);
896   return pClone;
897 }
898 
~CFDE_XMLText()899 CFDE_XMLText::~CFDE_XMLText() {}
900 
CFDE_XMLCharData(const CFX_WideString & wsCData)901 CFDE_XMLCharData::CFDE_XMLCharData(const CFX_WideString& wsCData)
902     : CFDE_XMLDeclaration(), m_wsCharData(wsCData) {}
903 
GetType() const904 FDE_XMLNODETYPE CFDE_XMLCharData::GetType() const {
905   return FDE_XMLNODE_CharData;
906 }
907 
Clone(bool bRecursive)908 CFDE_XMLNode* CFDE_XMLCharData::Clone(bool bRecursive) {
909   CFDE_XMLCharData* pClone = new CFDE_XMLCharData(m_wsCharData);
910   return pClone;
911 }
912 
~CFDE_XMLCharData()913 CFDE_XMLCharData::~CFDE_XMLCharData() {}
914 
CFDE_XMLDoc()915 CFDE_XMLDoc::CFDE_XMLDoc() : m_pRoot(nullptr) {
916   Reset(true);
917   CFDE_XMLInstruction* pXML = new CFDE_XMLInstruction(L"xml");
918   m_pRoot->InsertChildNode(pXML);
919 }
920 
~CFDE_XMLDoc()921 CFDE_XMLDoc::~CFDE_XMLDoc() {
922   Reset(false);
923 }
924 
Reset(bool bInitRoot)925 void CFDE_XMLDoc::Reset(bool bInitRoot) {
926   m_iStatus = 0;
927   m_pStream = nullptr;
928   if (bInitRoot) {
929     if (m_pRoot)
930       m_pRoot->DeleteChildren();
931     else
932       m_pRoot = new CFDE_XMLNode;
933   } else {
934     delete m_pRoot;
935     m_pRoot = nullptr;
936   }
937   ReleaseParser();
938 }
939 
ReleaseParser()940 void CFDE_XMLDoc::ReleaseParser() {
941   m_pXMLParser.reset();
942 }
943 
LoadXML(std::unique_ptr<IFDE_XMLParser> pXMLParser)944 bool CFDE_XMLDoc::LoadXML(std::unique_ptr<IFDE_XMLParser> pXMLParser) {
945   if (!pXMLParser)
946     return false;
947 
948   Reset(true);
949   m_pXMLParser = std::move(pXMLParser);
950   return true;
951 }
952 
DoLoad(IFX_Pause * pPause)953 int32_t CFDE_XMLDoc::DoLoad(IFX_Pause* pPause) {
954   if (m_iStatus < 100)
955     m_iStatus = m_pXMLParser->DoParser(pPause);
956 
957   return m_iStatus;
958 }
959 
CloseXML()960 void CFDE_XMLDoc::CloseXML() {
961   ReleaseParser();
962 }
963 
SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream> & pXMLStream,CFDE_XMLNode * pINode)964 void CFDE_XMLDoc::SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream,
965                               CFDE_XMLNode* pINode) {
966   CFDE_XMLNode* pNode = (CFDE_XMLNode*)pINode;
967   switch (pNode->GetType()) {
968     case FDE_XMLNODE_Instruction: {
969       CFX_WideString ws;
970       CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode;
971       if (pInstruction->m_wsTarget.CompareNoCase(L"xml") == 0) {
972         ws = L"<?xml version=\"1.0\" encoding=\"";
973         uint16_t wCodePage = pXMLStream->GetCodePage();
974         if (wCodePage == FX_CODEPAGE_UTF16LE) {
975           ws += L"UTF-16";
976         } else if (wCodePage == FX_CODEPAGE_UTF16BE) {
977           ws += L"UTF-16be";
978         } else {
979           ws += L"UTF-8";
980         }
981         ws += L"\"?>";
982         pXMLStream->WriteString(ws.c_str(), ws.GetLength());
983       } else {
984         ws.Format(L"<?%s", pInstruction->m_wsTarget.c_str());
985         pXMLStream->WriteString(ws.c_str(), ws.GetLength());
986         std::vector<CFX_WideString>& attributes = pInstruction->m_Attributes;
987         int32_t i;
988         int32_t iCount = pdfium::CollectionSize<int32_t>(attributes);
989         CFX_WideString wsValue;
990         for (i = 0; i < iCount; i += 2) {
991           ws = L" ";
992           ws += attributes[i];
993           ws += L"=\"";
994           wsValue = attributes[i + 1];
995           wsValue.Replace(L"&", L"&amp;");
996           wsValue.Replace(L"<", L"&lt;");
997           wsValue.Replace(L">", L"&gt;");
998           wsValue.Replace(L"\'", L"&apos;");
999           wsValue.Replace(L"\"", L"&quot;");
1000           ws += wsValue;
1001           ws += L"\"";
1002           pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1003         }
1004         std::vector<CFX_WideString>& targetdata = pInstruction->m_TargetData;
1005         iCount = pdfium::CollectionSize<int32_t>(targetdata);
1006         for (i = 0; i < iCount; i++) {
1007           ws = L" \"";
1008           ws += targetdata[i];
1009           ws += L"\"";
1010           pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1011         }
1012         ws = L"?>";
1013         pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1014       }
1015     } break;
1016     case FDE_XMLNODE_Element: {
1017       CFX_WideString ws;
1018       ws = L"<";
1019       ws += ((CFDE_XMLElement*)pNode)->m_wsTag;
1020       pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1021       std::vector<CFX_WideString>& attributes =
1022           static_cast<CFDE_XMLElement*>(pNode)->m_Attributes;
1023       int32_t iCount = pdfium::CollectionSize<int32_t>(attributes);
1024       CFX_WideString wsValue;
1025       for (int32_t i = 0; i < iCount; i += 2) {
1026         ws = L" ";
1027         ws += attributes[i];
1028         ws += L"=\"";
1029         wsValue = attributes[i + 1];
1030         wsValue.Replace(L"&", L"&amp;");
1031         wsValue.Replace(L"<", L"&lt;");
1032         wsValue.Replace(L">", L"&gt;");
1033         wsValue.Replace(L"\'", L"&apos;");
1034         wsValue.Replace(L"\"", L"&quot;");
1035         ws += wsValue;
1036         ws += L"\"";
1037         pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1038       }
1039       if (pNode->m_pChild) {
1040         ws = L"\n>";
1041         pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1042         CFDE_XMLNode* pChild = pNode->m_pChild;
1043         while (pChild) {
1044           SaveXMLNode(pXMLStream, static_cast<CFDE_XMLNode*>(pChild));
1045           pChild = pChild->m_pNext;
1046         }
1047         ws = L"</";
1048         ws += ((CFDE_XMLElement*)pNode)->m_wsTag;
1049         ws += L"\n>";
1050       } else {
1051         ws = L"\n/>";
1052       }
1053       pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1054     } break;
1055     case FDE_XMLNODE_Text: {
1056       CFX_WideString ws = ((CFDE_XMLText*)pNode)->m_wsText;
1057       ws.Replace(L"&", L"&amp;");
1058       ws.Replace(L"<", L"&lt;");
1059       ws.Replace(L">", L"&gt;");
1060       ws.Replace(L"\'", L"&apos;");
1061       ws.Replace(L"\"", L"&quot;");
1062       pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1063     } break;
1064     case FDE_XMLNODE_CharData: {
1065       CFX_WideString ws = L"<![CDATA[";
1066       ws += ((CFDE_XMLCharData*)pNode)->m_wsCharData;
1067       ws += L"]]>";
1068       pXMLStream->WriteString(ws.c_str(), ws.GetLength());
1069     } break;
1070     case FDE_XMLNODE_Unknown:
1071       break;
1072     default:
1073       break;
1074   }
1075 }
1076 
SaveXML(CFX_RetainPtr<IFGAS_Stream> & pXMLStream,bool bSaveBOM)1077 void CFDE_XMLDoc::SaveXML(CFX_RetainPtr<IFGAS_Stream>& pXMLStream,
1078                           bool bSaveBOM) {
1079   if (!pXMLStream || pXMLStream == m_pStream) {
1080     m_pStream->Seek(FX_STREAMSEEK_Begin, 0);
1081     pXMLStream = m_pStream;
1082   }
1083   ASSERT((pXMLStream->GetAccessModes() & FX_STREAMACCESS_Text) != 0);
1084   ASSERT((pXMLStream->GetAccessModes() & FX_STREAMACCESS_Write) != 0);
1085   uint16_t wCodePage = pXMLStream->GetCodePage();
1086   if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE &&
1087       wCodePage != FX_CODEPAGE_UTF8) {
1088     wCodePage = FX_CODEPAGE_UTF8;
1089     pXMLStream->SetCodePage(wCodePage);
1090   }
1091   if (bSaveBOM) {
1092     pXMLStream->WriteString(L"\xFEFF", 1);
1093   }
1094   CFDE_XMLNode* pNode = m_pRoot->m_pChild;
1095   while (pNode) {
1096     SaveXMLNode(pXMLStream, static_cast<CFDE_XMLNode*>(pNode));
1097     pNode = pNode->m_pNext;
1098   }
1099   if (pXMLStream == m_pStream) {
1100     int32_t iPos = pXMLStream->GetPosition();
1101     pXMLStream->SetLength(iPos);
1102   }
1103 }
1104 
CFDE_BlockBuffer(int32_t iAllocStep)1105 CFDE_BlockBuffer::CFDE_BlockBuffer(int32_t iAllocStep)
1106     : m_iDataLength(0),
1107       m_iBufferSize(0),
1108       m_iAllocStep(iAllocStep),
1109       m_iStartPosition(0) {}
1110 
~CFDE_BlockBuffer()1111 CFDE_BlockBuffer::~CFDE_BlockBuffer() {
1112   ClearBuffer();
1113 }
1114 
GetAvailableBlock(int32_t & iIndexInBlock)1115 FX_WCHAR* CFDE_BlockBuffer::GetAvailableBlock(int32_t& iIndexInBlock) {
1116   iIndexInBlock = 0;
1117   if (!m_BlockArray.GetSize()) {
1118     return nullptr;
1119   }
1120   int32_t iRealIndex = m_iStartPosition + m_iDataLength;
1121   if (iRealIndex == m_iBufferSize) {
1122     FX_WCHAR* pBlock = FX_Alloc(FX_WCHAR, m_iAllocStep);
1123     m_BlockArray.Add(pBlock);
1124     m_iBufferSize += m_iAllocStep;
1125     return pBlock;
1126   }
1127   iIndexInBlock = iRealIndex % m_iAllocStep;
1128   return m_BlockArray[iRealIndex / m_iAllocStep];
1129 }
1130 
InitBuffer(int32_t iBufferSize)1131 bool CFDE_BlockBuffer::InitBuffer(int32_t iBufferSize) {
1132   ClearBuffer();
1133   int32_t iNumOfBlock = (iBufferSize - 1) / m_iAllocStep + 1;
1134   for (int32_t i = 0; i < iNumOfBlock; i++) {
1135     m_BlockArray.Add(FX_Alloc(FX_WCHAR, m_iAllocStep));
1136   }
1137   m_iBufferSize = iNumOfBlock * m_iAllocStep;
1138   return true;
1139 }
1140 
SetTextChar(int32_t iIndex,FX_WCHAR ch)1141 void CFDE_BlockBuffer::SetTextChar(int32_t iIndex, FX_WCHAR ch) {
1142   if (iIndex < 0) {
1143     return;
1144   }
1145   int32_t iRealIndex = m_iStartPosition + iIndex;
1146   int32_t iBlockIndex = iRealIndex / m_iAllocStep;
1147   int32_t iInnerIndex = iRealIndex % m_iAllocStep;
1148   int32_t iBlockSize = m_BlockArray.GetSize();
1149   if (iBlockIndex >= iBlockSize) {
1150     int32_t iNewBlocks = iBlockIndex - iBlockSize + 1;
1151     do {
1152       FX_WCHAR* pBlock = FX_Alloc(FX_WCHAR, m_iAllocStep);
1153       m_BlockArray.Add(pBlock);
1154       m_iBufferSize += m_iAllocStep;
1155     } while (--iNewBlocks);
1156   }
1157   FX_WCHAR* pTextData = m_BlockArray[iBlockIndex];
1158   *(pTextData + iInnerIndex) = ch;
1159   if (m_iDataLength <= iIndex) {
1160     m_iDataLength = iIndex + 1;
1161   }
1162 }
1163 
DeleteTextChars(int32_t iCount,bool bDirection)1164 int32_t CFDE_BlockBuffer::DeleteTextChars(int32_t iCount, bool bDirection) {
1165   if (iCount <= 0) {
1166     return m_iDataLength;
1167   }
1168   if (iCount >= m_iDataLength) {
1169     Reset(false);
1170     return 0;
1171   }
1172   if (bDirection) {
1173     m_iStartPosition += iCount;
1174     m_iDataLength -= iCount;
1175   } else {
1176     m_iDataLength -= iCount;
1177   }
1178   return m_iDataLength;
1179 }
1180 
GetTextData(CFX_WideString & wsTextData,int32_t iStart,int32_t iLength) const1181 void CFDE_BlockBuffer::GetTextData(CFX_WideString& wsTextData,
1182                                    int32_t iStart,
1183                                    int32_t iLength) const {
1184   wsTextData.clear();
1185   int32_t iMaybeDataLength = m_iBufferSize - 1 - m_iStartPosition;
1186   if (iStart < 0 || iStart > iMaybeDataLength) {
1187     return;
1188   }
1189   if (iLength == -1 || iLength > iMaybeDataLength) {
1190     iLength = iMaybeDataLength;
1191   }
1192   if (iLength <= 0) {
1193     return;
1194   }
1195   FX_WCHAR* pBuf = wsTextData.GetBuffer(iLength);
1196   if (!pBuf) {
1197     return;
1198   }
1199   int32_t iStartBlockIndex = 0;
1200   int32_t iStartInnerIndex = 0;
1201   TextDataIndex2BufIndex(iStart, iStartBlockIndex, iStartInnerIndex);
1202   int32_t iEndBlockIndex = 0;
1203   int32_t iEndInnerIndex = 0;
1204   TextDataIndex2BufIndex(iStart + iLength, iEndBlockIndex, iEndInnerIndex);
1205   int32_t iPointer = 0;
1206   for (int32_t i = iStartBlockIndex; i <= iEndBlockIndex; i++) {
1207     int32_t iBufferPointer = 0;
1208     int32_t iCopyLength = m_iAllocStep;
1209     if (i == iStartBlockIndex) {
1210       iCopyLength -= iStartInnerIndex;
1211       iBufferPointer = iStartInnerIndex;
1212     }
1213     if (i == iEndBlockIndex) {
1214       iCopyLength -= ((m_iAllocStep - 1) - iEndInnerIndex);
1215     }
1216     FX_WCHAR* pBlockBuf = m_BlockArray[i];
1217     FXSYS_memcpy(pBuf + iPointer, pBlockBuf + iBufferPointer,
1218                  iCopyLength * sizeof(FX_WCHAR));
1219     iPointer += iCopyLength;
1220   }
1221   wsTextData.ReleaseBuffer(iLength);
1222 }
1223 
TextDataIndex2BufIndex(const int32_t iIndex,int32_t & iBlockIndex,int32_t & iInnerIndex) const1224 void CFDE_BlockBuffer::TextDataIndex2BufIndex(const int32_t iIndex,
1225                                               int32_t& iBlockIndex,
1226                                               int32_t& iInnerIndex) const {
1227   ASSERT(iIndex >= 0);
1228   int32_t iRealIndex = m_iStartPosition + iIndex;
1229   iBlockIndex = iRealIndex / m_iAllocStep;
1230   iInnerIndex = iRealIndex % m_iAllocStep;
1231 }
1232 
ClearBuffer()1233 void CFDE_BlockBuffer::ClearBuffer() {
1234   m_iBufferSize = 0;
1235   int32_t iSize = m_BlockArray.GetSize();
1236   for (int32_t i = 0; i < iSize; i++) {
1237     FX_Free(m_BlockArray[i]);
1238   }
1239   m_BlockArray.RemoveAll();
1240 }
1241 
CFDE_XMLSyntaxParser()1242 CFDE_XMLSyntaxParser::CFDE_XMLSyntaxParser()
1243     : m_pStream(nullptr),
1244       m_iXMLPlaneSize(-1),
1245       m_iCurrentPos(0),
1246       m_iCurrentNodeNum(-1),
1247       m_iLastNodeNum(-1),
1248       m_iParsedChars(0),
1249       m_iParsedBytes(0),
1250       m_pBuffer(nullptr),
1251       m_iBufferChars(0),
1252       m_bEOS(false),
1253       m_pStart(nullptr),
1254       m_pEnd(nullptr),
1255       m_XMLNodeStack(16),
1256       m_iAllocStep(m_BlockBuffer.GetAllocStep()),
1257       m_iDataLength(m_BlockBuffer.GetDataLengthRef()),
1258       m_pCurrentBlock(nullptr),
1259       m_iIndexInBlock(0),
1260       m_iTextDataLength(0),
1261       m_syntaxParserResult(FDE_XmlSyntaxResult::None),
1262       m_syntaxParserState(FDE_XmlSyntaxState::Text),
1263       m_wQuotationMark(0),
1264       m_iEntityStart(-1),
1265       m_SkipStack(16) {
1266   m_CurNode.iNodeNum = -1;
1267   m_CurNode.eNodeType = FDE_XMLNODE_Unknown;
1268 }
1269 
Init(const CFX_RetainPtr<IFGAS_Stream> & pStream,int32_t iXMLPlaneSize,int32_t iTextDataSize)1270 void CFDE_XMLSyntaxParser::Init(const CFX_RetainPtr<IFGAS_Stream>& pStream,
1271                                 int32_t iXMLPlaneSize,
1272                                 int32_t iTextDataSize) {
1273   ASSERT(!m_pStream && !m_pBuffer);
1274   ASSERT(pStream && iXMLPlaneSize > 0);
1275   int32_t iStreamLength = pStream->GetLength();
1276   ASSERT(iStreamLength > 0);
1277   m_pStream = pStream;
1278   m_iXMLPlaneSize = std::min(iXMLPlaneSize, iStreamLength);
1279   uint8_t bom[4];
1280   m_iCurrentPos = m_pStream->GetBOM(bom);
1281   ASSERT(!m_pBuffer);
1282 
1283   FX_SAFE_INT32 alloc_size_safe = m_iXMLPlaneSize;
1284   alloc_size_safe += 1;  // For NUL.
1285   if (!alloc_size_safe.IsValid() || alloc_size_safe.ValueOrDie() <= 0) {
1286     m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1287     return;
1288   }
1289 
1290   m_pBuffer = FX_Alloc(
1291       FX_WCHAR, pdfium::base::ValueOrDieForType<size_t>(alloc_size_safe));
1292   m_pStart = m_pEnd = m_pBuffer;
1293   ASSERT(!m_BlockBuffer.IsInitialized());
1294   m_BlockBuffer.InitBuffer();
1295   m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1296   m_iParsedBytes = m_iParsedChars = 0;
1297   m_iBufferChars = 0;
1298 }
1299 
DoSyntaxParse()1300 FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() {
1301   if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error ||
1302       m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) {
1303     return m_syntaxParserResult;
1304   }
1305   ASSERT(m_pStream && m_pBuffer && m_BlockBuffer.IsInitialized());
1306   int32_t iStreamLength = m_pStream->GetLength();
1307   int32_t iPos;
1308 
1309   FDE_XmlSyntaxResult syntaxParserResult = FDE_XmlSyntaxResult::None;
1310   while (true) {
1311     if (m_pStart >= m_pEnd) {
1312       if (m_bEOS || m_iCurrentPos >= iStreamLength) {
1313         m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString;
1314         return m_syntaxParserResult;
1315       }
1316       m_iParsedChars += (m_pEnd - m_pBuffer);
1317       m_iParsedBytes = m_iCurrentPos;
1318       if (m_pStream->GetPosition() != m_iCurrentPos) {
1319         m_pStream->Seek(FX_STREAMSEEK_Begin, m_iCurrentPos);
1320       }
1321       m_iBufferChars =
1322           m_pStream->ReadString(m_pBuffer, m_iXMLPlaneSize, m_bEOS);
1323       iPos = m_pStream->GetPosition();
1324       if (m_iBufferChars < 1) {
1325         m_iCurrentPos = iStreamLength;
1326         m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString;
1327         return m_syntaxParserResult;
1328       }
1329       m_iCurrentPos = iPos;
1330       m_pStart = m_pBuffer;
1331       m_pEnd = m_pBuffer + m_iBufferChars;
1332     }
1333 
1334     while (m_pStart < m_pEnd) {
1335       FX_WCHAR ch = *m_pStart;
1336       switch (m_syntaxParserState) {
1337         case FDE_XmlSyntaxState::Text:
1338           if (ch == L'<') {
1339             if (m_iDataLength > 0) {
1340               m_iTextDataLength = m_iDataLength;
1341               m_BlockBuffer.Reset();
1342               m_pCurrentBlock =
1343                   m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1344               m_iEntityStart = -1;
1345               syntaxParserResult = FDE_XmlSyntaxResult::Text;
1346             } else {
1347               m_pStart++;
1348               m_syntaxParserState = FDE_XmlSyntaxState::Node;
1349             }
1350           } else {
1351             ParseTextChar(ch);
1352           }
1353           break;
1354         case FDE_XmlSyntaxState::Node:
1355           if (ch == L'!') {
1356             m_pStart++;
1357             m_syntaxParserState = FDE_XmlSyntaxState::SkipCommentOrDecl;
1358           } else if (ch == L'/') {
1359             m_pStart++;
1360             m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
1361           } else if (ch == L'?') {
1362             m_iLastNodeNum++;
1363             m_iCurrentNodeNum = m_iLastNodeNum;
1364             m_CurNode.iNodeNum = m_iLastNodeNum;
1365             m_CurNode.eNodeType = FDE_XMLNODE_Instruction;
1366             m_XMLNodeStack.Push(m_CurNode);
1367             m_pStart++;
1368             m_syntaxParserState = FDE_XmlSyntaxState::Target;
1369             syntaxParserResult = FDE_XmlSyntaxResult::InstructionOpen;
1370           } else {
1371             m_iLastNodeNum++;
1372             m_iCurrentNodeNum = m_iLastNodeNum;
1373             m_CurNode.iNodeNum = m_iLastNodeNum;
1374             m_CurNode.eNodeType = FDE_XMLNODE_Element;
1375             m_XMLNodeStack.Push(m_CurNode);
1376             m_syntaxParserState = FDE_XmlSyntaxState::Tag;
1377             syntaxParserResult = FDE_XmlSyntaxResult::ElementOpen;
1378           }
1379           break;
1380         case FDE_XmlSyntaxState::Target:
1381         case FDE_XmlSyntaxState::Tag:
1382           if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) {
1383             if (m_iDataLength < 1) {
1384               m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1385               return m_syntaxParserResult;
1386             } else {
1387               m_iTextDataLength = m_iDataLength;
1388               m_BlockBuffer.Reset();
1389               m_pCurrentBlock =
1390                   m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1391               if (m_syntaxParserState != FDE_XmlSyntaxState::Target) {
1392                 syntaxParserResult = FDE_XmlSyntaxResult::TagName;
1393               } else {
1394                 syntaxParserResult = FDE_XmlSyntaxResult::TargetName;
1395               }
1396               m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
1397             }
1398           } else {
1399             if (m_iIndexInBlock == m_iAllocStep) {
1400               m_pCurrentBlock =
1401                   m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1402               if (!m_pCurrentBlock) {
1403                 return FDE_XmlSyntaxResult::Error;
1404               }
1405             }
1406             m_pCurrentBlock[m_iIndexInBlock++] = ch;
1407             m_iDataLength++;
1408             m_pStart++;
1409           }
1410           break;
1411         case FDE_XmlSyntaxState::AttriName:
1412           if (m_iDataLength < 1 && FDE_IsXMLWhiteSpace(ch)) {
1413             m_pStart++;
1414             break;
1415           }
1416           if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) {
1417             if (m_iDataLength < 1) {
1418               if (m_CurNode.eNodeType == FDE_XMLNODE_Element) {
1419                 if (ch == L'>' || ch == L'/') {
1420                   m_syntaxParserState = FDE_XmlSyntaxState::BreakElement;
1421                   break;
1422                 }
1423               } else if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) {
1424                 if (ch == L'?') {
1425                   m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
1426                   m_pStart++;
1427                 } else {
1428                   m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
1429                 }
1430                 break;
1431               }
1432               m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1433               return m_syntaxParserResult;
1434             } else {
1435               if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) {
1436                 if (ch != '=' && !FDE_IsXMLWhiteSpace(ch)) {
1437                   m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
1438                   break;
1439                 }
1440               }
1441               m_iTextDataLength = m_iDataLength;
1442               m_BlockBuffer.Reset();
1443               m_pCurrentBlock =
1444                   m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1445               m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign;
1446               syntaxParserResult = FDE_XmlSyntaxResult::AttriName;
1447             }
1448           } else {
1449             if (m_iIndexInBlock == m_iAllocStep) {
1450               m_pCurrentBlock =
1451                   m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1452               if (!m_pCurrentBlock) {
1453                 return FDE_XmlSyntaxResult::Error;
1454               }
1455             }
1456             m_pCurrentBlock[m_iIndexInBlock++] = ch;
1457             m_iDataLength++;
1458             m_pStart++;
1459           }
1460           break;
1461         case FDE_XmlSyntaxState::AttriEqualSign:
1462           if (FDE_IsXMLWhiteSpace(ch)) {
1463             m_pStart++;
1464             break;
1465           }
1466           if (ch != L'=') {
1467             if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) {
1468               m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
1469               break;
1470             }
1471             m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1472             return m_syntaxParserResult;
1473           } else {
1474             m_syntaxParserState = FDE_XmlSyntaxState::AttriQuotation;
1475             m_pStart++;
1476           }
1477           break;
1478         case FDE_XmlSyntaxState::AttriQuotation:
1479           if (FDE_IsXMLWhiteSpace(ch)) {
1480             m_pStart++;
1481             break;
1482           }
1483           if (ch != L'\"' && ch != L'\'') {
1484             m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1485             return m_syntaxParserResult;
1486           } else {
1487             m_wQuotationMark = ch;
1488             m_syntaxParserState = FDE_XmlSyntaxState::AttriValue;
1489             m_pStart++;
1490           }
1491           break;
1492         case FDE_XmlSyntaxState::AttriValue:
1493           if (ch == m_wQuotationMark) {
1494             if (m_iEntityStart > -1) {
1495               m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1496               return m_syntaxParserResult;
1497             }
1498             m_iTextDataLength = m_iDataLength;
1499             m_wQuotationMark = 0;
1500             m_BlockBuffer.Reset();
1501             m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1502             m_pStart++;
1503             m_syntaxParserState = FDE_XmlSyntaxState::AttriName;
1504             syntaxParserResult = FDE_XmlSyntaxResult::AttriValue;
1505           } else {
1506             ParseTextChar(ch);
1507           }
1508           break;
1509         case FDE_XmlSyntaxState::CloseInstruction:
1510           if (ch != L'>') {
1511             if (m_iIndexInBlock == m_iAllocStep) {
1512               m_pCurrentBlock =
1513                   m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1514               if (!m_pCurrentBlock) {
1515                 return FDE_XmlSyntaxResult::Error;
1516               }
1517             }
1518             m_pCurrentBlock[m_iIndexInBlock++] = ch;
1519             m_iDataLength++;
1520             m_syntaxParserState = FDE_XmlSyntaxState::TargetData;
1521           } else if (m_iDataLength > 0) {
1522             m_iTextDataLength = m_iDataLength;
1523             m_BlockBuffer.Reset();
1524             m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1525             syntaxParserResult = FDE_XmlSyntaxResult::TargetData;
1526           } else {
1527             m_pStart++;
1528             FDE_XMLNODE* pXMLNode = m_XMLNodeStack.GetTopElement();
1529             if (!pXMLNode) {
1530               m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1531               return m_syntaxParserResult;
1532             }
1533             m_XMLNodeStack.Pop();
1534             pXMLNode = m_XMLNodeStack.GetTopElement();
1535             if (pXMLNode) {
1536               m_CurNode = *pXMLNode;
1537             } else {
1538               m_CurNode.iNodeNum = -1;
1539               m_CurNode.eNodeType = FDE_XMLNODE_Unknown;
1540             }
1541             m_iCurrentNodeNum = m_CurNode.iNodeNum;
1542             m_BlockBuffer.Reset();
1543             m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1544             m_syntaxParserState = FDE_XmlSyntaxState::Text;
1545             syntaxParserResult = FDE_XmlSyntaxResult::InstructionClose;
1546           }
1547           break;
1548         case FDE_XmlSyntaxState::BreakElement:
1549           if (ch == L'>') {
1550             m_syntaxParserState = FDE_XmlSyntaxState::Text;
1551             syntaxParserResult = FDE_XmlSyntaxResult::ElementBreak;
1552           } else if (ch == L'/') {
1553             m_syntaxParserState = FDE_XmlSyntaxState::CloseElement;
1554           } else {
1555             m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1556             return m_syntaxParserResult;
1557           }
1558           m_pStart++;
1559           break;
1560         case FDE_XmlSyntaxState::CloseElement:
1561           if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) {
1562             if (ch == L'>') {
1563               FDE_XMLNODE* pXMLNode = m_XMLNodeStack.GetTopElement();
1564               if (!pXMLNode) {
1565                 m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1566                 return m_syntaxParserResult;
1567               }
1568               m_XMLNodeStack.Pop();
1569               pXMLNode = m_XMLNodeStack.GetTopElement();
1570               if (pXMLNode) {
1571                 m_CurNode = *pXMLNode;
1572               } else {
1573                 m_CurNode.iNodeNum = -1;
1574                 m_CurNode.eNodeType = FDE_XMLNODE_Unknown;
1575               }
1576               m_iCurrentNodeNum = m_CurNode.iNodeNum;
1577               m_iTextDataLength = m_iDataLength;
1578               m_BlockBuffer.Reset();
1579               m_pCurrentBlock =
1580                   m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1581               m_syntaxParserState = FDE_XmlSyntaxState::Text;
1582               syntaxParserResult = FDE_XmlSyntaxResult::ElementClose;
1583             } else if (!FDE_IsXMLWhiteSpace(ch)) {
1584               m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1585               return m_syntaxParserResult;
1586             }
1587           } else {
1588             if (m_iIndexInBlock == m_iAllocStep) {
1589               m_pCurrentBlock =
1590                   m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1591               if (!m_pCurrentBlock) {
1592                 return FDE_XmlSyntaxResult::Error;
1593               }
1594             }
1595             m_pCurrentBlock[m_iIndexInBlock++] = ch;
1596             m_iDataLength++;
1597           }
1598           m_pStart++;
1599           break;
1600         case FDE_XmlSyntaxState::SkipCommentOrDecl:
1601           if (FXSYS_wcsnicmp(m_pStart, L"--", 2) == 0) {
1602             m_pStart += 2;
1603             m_syntaxParserState = FDE_XmlSyntaxState::SkipComment;
1604           } else if (FXSYS_wcsnicmp(m_pStart, L"[CDATA[", 7) == 0) {
1605             m_pStart += 7;
1606             m_syntaxParserState = FDE_XmlSyntaxState::SkipCData;
1607           } else {
1608             m_syntaxParserState = FDE_XmlSyntaxState::SkipDeclNode;
1609             m_SkipChar = L'>';
1610             m_SkipStack.Push(L'>');
1611           }
1612           break;
1613         case FDE_XmlSyntaxState::SkipCData: {
1614           if (FXSYS_wcsnicmp(m_pStart, L"]]>", 3) == 0) {
1615             m_pStart += 3;
1616             syntaxParserResult = FDE_XmlSyntaxResult::CData;
1617             m_iTextDataLength = m_iDataLength;
1618             m_BlockBuffer.Reset();
1619             m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1620             m_syntaxParserState = FDE_XmlSyntaxState::Text;
1621           } else {
1622             if (m_iIndexInBlock == m_iAllocStep) {
1623               m_pCurrentBlock =
1624                   m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1625               if (!m_pCurrentBlock)
1626                 return FDE_XmlSyntaxResult::Error;
1627             }
1628             m_pCurrentBlock[m_iIndexInBlock++] = ch;
1629             m_iDataLength++;
1630             m_pStart++;
1631           }
1632           break;
1633         }
1634         case FDE_XmlSyntaxState::SkipDeclNode:
1635           if (m_SkipChar == L'\'' || m_SkipChar == L'\"') {
1636             m_pStart++;
1637             if (ch != m_SkipChar)
1638               break;
1639 
1640             m_SkipStack.Pop();
1641             uint32_t* pDWord = m_SkipStack.GetTopElement();
1642             if (!pDWord)
1643               m_syntaxParserState = FDE_XmlSyntaxState::Text;
1644             else
1645               m_SkipChar = (FX_WCHAR)*pDWord;
1646           } else {
1647             switch (ch) {
1648               case L'<':
1649                 m_SkipChar = L'>';
1650                 m_SkipStack.Push(L'>');
1651                 break;
1652               case L'[':
1653                 m_SkipChar = L']';
1654                 m_SkipStack.Push(L']');
1655                 break;
1656               case L'(':
1657                 m_SkipChar = L')';
1658                 m_SkipStack.Push(L')');
1659                 break;
1660               case L'\'':
1661                 m_SkipChar = L'\'';
1662                 m_SkipStack.Push(L'\'');
1663                 break;
1664               case L'\"':
1665                 m_SkipChar = L'\"';
1666                 m_SkipStack.Push(L'\"');
1667                 break;
1668               default:
1669                 if (ch == m_SkipChar) {
1670                   m_SkipStack.Pop();
1671                   uint32_t* pDWord = m_SkipStack.GetTopElement();
1672                   if (!pDWord) {
1673                     if (m_iDataLength >= 9) {
1674                       CFX_WideString wsHeader;
1675                       m_BlockBuffer.GetTextData(wsHeader, 0, 7);
1676                     }
1677                     m_iTextDataLength = m_iDataLength;
1678                     m_BlockBuffer.Reset();
1679                     m_pCurrentBlock =
1680                         m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1681                     m_syntaxParserState = FDE_XmlSyntaxState::Text;
1682                   } else {
1683                     m_SkipChar = static_cast<FX_WCHAR>(*pDWord);
1684                   }
1685                 }
1686                 break;
1687             }
1688             if (m_SkipStack.GetSize() > 0) {
1689               if (m_iIndexInBlock == m_iAllocStep) {
1690                 m_pCurrentBlock =
1691                     m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1692                 if (!m_pCurrentBlock) {
1693                   return FDE_XmlSyntaxResult::Error;
1694                 }
1695               }
1696               m_pCurrentBlock[m_iIndexInBlock++] = ch;
1697               m_iDataLength++;
1698             }
1699             m_pStart++;
1700           }
1701           break;
1702         case FDE_XmlSyntaxState::SkipComment:
1703           if (FXSYS_wcsnicmp(m_pStart, L"-->", 3) == 0) {
1704             m_pStart += 2;
1705             m_syntaxParserState = FDE_XmlSyntaxState::Text;
1706           }
1707 
1708           m_pStart++;
1709           break;
1710         case FDE_XmlSyntaxState::TargetData:
1711           if (FDE_IsXMLWhiteSpace(ch)) {
1712             if (m_iDataLength < 1) {
1713               m_pStart++;
1714               break;
1715             } else if (m_wQuotationMark == 0) {
1716               m_iTextDataLength = m_iDataLength;
1717               m_wQuotationMark = 0;
1718               m_BlockBuffer.Reset();
1719               m_pCurrentBlock =
1720                   m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1721               m_pStart++;
1722               syntaxParserResult = FDE_XmlSyntaxResult::TargetData;
1723               break;
1724             }
1725           }
1726           if (ch == '?') {
1727             m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction;
1728             m_pStart++;
1729           } else if (ch == '\"') {
1730             if (m_wQuotationMark == 0) {
1731               m_wQuotationMark = ch;
1732               m_pStart++;
1733             } else if (ch == m_wQuotationMark) {
1734               m_iTextDataLength = m_iDataLength;
1735               m_wQuotationMark = 0;
1736               m_BlockBuffer.Reset();
1737               m_pCurrentBlock =
1738                   m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1739               m_pStart++;
1740               syntaxParserResult = FDE_XmlSyntaxResult::TargetData;
1741             } else {
1742               m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
1743               return m_syntaxParserResult;
1744             }
1745           } else {
1746             if (m_iIndexInBlock == m_iAllocStep) {
1747               m_pCurrentBlock =
1748                   m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1749               if (!m_pCurrentBlock) {
1750                 return FDE_XmlSyntaxResult::Error;
1751               }
1752             }
1753             m_pCurrentBlock[m_iIndexInBlock++] = ch;
1754             m_iDataLength++;
1755             m_pStart++;
1756           }
1757           break;
1758         default:
1759           break;
1760       }
1761       if (syntaxParserResult != FDE_XmlSyntaxResult::None)
1762         return syntaxParserResult;
1763     }
1764   }
1765   return FDE_XmlSyntaxResult::Text;
1766 }
1767 
~CFDE_XMLSyntaxParser()1768 CFDE_XMLSyntaxParser::~CFDE_XMLSyntaxParser() {
1769   m_pCurrentBlock = nullptr;
1770   FX_Free(m_pBuffer);
1771 }
1772 
GetStatus() const1773 int32_t CFDE_XMLSyntaxParser::GetStatus() const {
1774   if (!m_pStream)
1775     return -1;
1776 
1777   int32_t iStreamLength = m_pStream->GetLength();
1778   if (iStreamLength < 1)
1779     return 100;
1780 
1781   if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error)
1782     return -1;
1783 
1784   if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString)
1785     return 100;
1786   return m_iParsedBytes * 100 / iStreamLength;
1787 }
1788 
FX_GetUTF8EncodeLength(const FX_WCHAR * pSrc,int32_t iSrcLen)1789 static int32_t FX_GetUTF8EncodeLength(const FX_WCHAR* pSrc, int32_t iSrcLen) {
1790   uint32_t unicode = 0;
1791   int32_t iDstNum = 0;
1792   while (iSrcLen-- > 0) {
1793     unicode = *pSrc++;
1794     int nbytes = 0;
1795     if ((uint32_t)unicode < 0x80) {
1796       nbytes = 1;
1797     } else if ((uint32_t)unicode < 0x800) {
1798       nbytes = 2;
1799     } else if ((uint32_t)unicode < 0x10000) {
1800       nbytes = 3;
1801     } else if ((uint32_t)unicode < 0x200000) {
1802       nbytes = 4;
1803     } else if ((uint32_t)unicode < 0x4000000) {
1804       nbytes = 5;
1805     } else {
1806       nbytes = 6;
1807     }
1808     iDstNum += nbytes;
1809   }
1810   return iDstNum;
1811 }
1812 
GetCurrentBinaryPos() const1813 FX_FILESIZE CFDE_XMLSyntaxParser::GetCurrentBinaryPos() const {
1814   if (!m_pStream)
1815     return 0;
1816 
1817   int32_t nSrcLen = m_pStart - m_pBuffer;
1818   int32_t nDstLen = FX_GetUTF8EncodeLength(m_pBuffer, nSrcLen);
1819   return m_iParsedBytes + nDstLen;
1820 }
1821 
ParseTextChar(FX_WCHAR character)1822 void CFDE_XMLSyntaxParser::ParseTextChar(FX_WCHAR character) {
1823   if (m_iIndexInBlock == m_iAllocStep) {
1824     m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1825     if (!m_pCurrentBlock) {
1826       return;
1827     }
1828   }
1829   m_pCurrentBlock[m_iIndexInBlock++] = character;
1830   m_iDataLength++;
1831   if (m_iEntityStart > -1 && character == L';') {
1832     CFX_WideString csEntity;
1833     m_BlockBuffer.GetTextData(csEntity, m_iEntityStart + 1,
1834                               (m_iDataLength - 1) - m_iEntityStart - 1);
1835     int32_t iLen = csEntity.GetLength();
1836     if (iLen > 0) {
1837       if (csEntity[0] == L'#') {
1838         uint32_t ch = 0;
1839         FX_WCHAR w;
1840         if (iLen > 1 && csEntity[1] == L'x') {
1841           for (int32_t i = 2; i < iLen; i++) {
1842             w = csEntity[i];
1843             if (w >= L'0' && w <= L'9') {
1844               ch = (ch << 4) + w - L'0';
1845             } else if (w >= L'A' && w <= L'F') {
1846               ch = (ch << 4) + w - 55;
1847             } else if (w >= L'a' && w <= L'f') {
1848               ch = (ch << 4) + w - 87;
1849             } else {
1850               break;
1851             }
1852           }
1853         } else {
1854           for (int32_t i = 1; i < iLen; i++) {
1855             w = csEntity[i];
1856             if (w < L'0' || w > L'9')
1857               break;
1858             ch = ch * 10 + w - L'0';
1859           }
1860         }
1861         if (ch > kMaxCharRange)
1862           ch = ' ';
1863 
1864         character = static_cast<FX_WCHAR>(ch);
1865         if (character != 0) {
1866           m_BlockBuffer.SetTextChar(m_iEntityStart, character);
1867           m_iEntityStart++;
1868         }
1869       } else {
1870         if (csEntity.Compare(L"amp") == 0) {
1871           m_BlockBuffer.SetTextChar(m_iEntityStart, L'&');
1872           m_iEntityStart++;
1873         } else if (csEntity.Compare(L"lt") == 0) {
1874           m_BlockBuffer.SetTextChar(m_iEntityStart, L'<');
1875           m_iEntityStart++;
1876         } else if (csEntity.Compare(L"gt") == 0) {
1877           m_BlockBuffer.SetTextChar(m_iEntityStart, L'>');
1878           m_iEntityStart++;
1879         } else if (csEntity.Compare(L"apos") == 0) {
1880           m_BlockBuffer.SetTextChar(m_iEntityStart, L'\'');
1881           m_iEntityStart++;
1882         } else if (csEntity.Compare(L"quot") == 0) {
1883           m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"');
1884           m_iEntityStart++;
1885         }
1886       }
1887     }
1888     m_BlockBuffer.DeleteTextChars(m_iDataLength - m_iEntityStart, false);
1889     m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock);
1890     m_iEntityStart = -1;
1891   } else {
1892     if (m_iEntityStart < 0 && character == L'&') {
1893       m_iEntityStart = m_iDataLength - 1;
1894     }
1895   }
1896   m_pStart++;
1897 }
1898