1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "xfa/fxfa/parser/cxfa_simple_parser.h"
8 
9 #include <utility>
10 #include <vector>
11 
12 #include "core/fxcrt/cfx_checksumcontext.h"
13 #include "core/fxcrt/cfx_seekablestreamproxy.h"
14 #include "core/fxcrt/cfx_widetextbuf.h"
15 #include "core/fxcrt/fx_codepage.h"
16 #include "core/fxcrt/fx_extension.h"
17 #include "core/fxcrt/xml/cfx_xmlchardata.h"
18 #include "core/fxcrt/xml/cfx_xmldoc.h"
19 #include "core/fxcrt/xml/cfx_xmlelement.h"
20 #include "core/fxcrt/xml/cfx_xmlinstruction.h"
21 #include "core/fxcrt/xml/cfx_xmlnode.h"
22 #include "core/fxcrt/xml/cfx_xmlparser.h"
23 #include "core/fxcrt/xml/cfx_xmltext.h"
24 #include "fxjs/xfa/cjx_object.h"
25 #include "third_party/base/logging.h"
26 #include "third_party/base/ptr_util.h"
27 #include "xfa/fxfa/fxfa.h"
28 #include "xfa/fxfa/parser/cxfa_document.h"
29 #include "xfa/fxfa/parser/cxfa_node.h"
30 #include "xfa/fxfa/parser/cxfa_subform.h"
31 #include "xfa/fxfa/parser/cxfa_template.h"
32 #include "xfa/fxfa/parser/xfa_basic_data.h"
33 #include "xfa/fxfa/parser/xfa_utils.h"
34 
35 namespace {
36 
37 struct PacketInfo {
38   uint32_t hash;
39   const wchar_t* name;
40   XFA_PacketType packet_type;
41   const wchar_t* uri;
42   uint32_t flags;
43 };
44 const PacketInfo PacketData[] = {
45     {0x0, nullptr, XFA_PacketType::User, nullptr,
46      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTMANY},
47     {0x811929d, L"sourceSet", XFA_PacketType::SourceSet,
48      L"http://www.xfa.org/schema/xfa-source-set/",
49      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
50     {0xb843dba, L"pdf", XFA_PacketType::Pdf, L"http://ns.adobe.com/xdp/pdf/",
51      XFA_XDPPACKET_FLAGS_COMPLETEMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
52     {0xc56afbf, L"xdc", XFA_PacketType::Xdc, L"http://www.xfa.org/schema/xdc/",
53      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
54     {0xc56afcc, L"xdp", XFA_PacketType::Xdp, L"http://ns.adobe.com/xdp/",
55      XFA_XDPPACKET_FLAGS_COMPLETEMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
56     {0x132a8fbc, L"xmpmeta", XFA_PacketType::Xmpmeta,
57      L"http://ns.adobe.com/xmpmeta/",
58      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTMANY},
59     {0x48d004a8, L"xfdf", XFA_PacketType::Xfdf, L"http://ns.adobe.com/xfdf/",
60      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
61     {0x4e1e39b6, L"config", XFA_PacketType::Config,
62      L"http://www.xfa.org/schema/xci/",
63      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
64     {0x5473b6dc, L"localeSet", XFA_PacketType::LocaleSet,
65      L"http://www.xfa.org/schema/xfa-locale-set/",
66      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
67     {0x6038580a, L"stylesheet", XFA_PacketType::Stylesheet,
68      L"http://www.w3.org/1999/XSL/Transform",
69      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTMANY},
70     {0x803550fc, L"template", XFA_PacketType::Template,
71      L"http://www.xfa.org/schema/xfa-template/",
72      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
73     {0x8b036f32, L"signature", XFA_PacketType::Signature,
74      L"http://www.w3.org/2000/09/xmldsig#",
75      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
76     {0x99b95079, L"datasets", XFA_PacketType::Datasets,
77      L"http://www.xfa.org/schema/xfa-data/",
78      XFA_XDPPACKET_FLAGS_PREFIXMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
79     {0xcd309ff4, L"form", XFA_PacketType::Form,
80      L"http://www.xfa.org/schema/xfa-form/",
81      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
82     {0xe14c801c, L"connectionSet", XFA_PacketType::ConnectionSet,
83      L"http://www.xfa.org/schema/xfa-connection-set/",
84      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
85 };
86 
GetPacketByIndex(XFA_PacketType ePacket)87 const PacketInfo* GetPacketByIndex(XFA_PacketType ePacket) {
88   return PacketData + static_cast<uint8_t>(ePacket);
89 }
90 
GetPacketByName(const WideStringView & wsName)91 const PacketInfo* GetPacketByName(const WideStringView& wsName) {
92   if (wsName.IsEmpty())
93     return nullptr;
94 
95   uint32_t hash = FX_HashCode_GetW(wsName, false);
96   auto* elem = std::lower_bound(
97       std::begin(PacketData), std::end(PacketData), hash,
98       [](const PacketInfo& a, uint32_t hash) { return a.hash < hash; });
99   if (elem != std::end(PacketData) && elem->hash == hash)
100     return elem;
101   return nullptr;
102 }
103 
GetDocumentNode(CFX_XMLDoc * pXMLDoc,bool bVerifyWellFormness=false)104 CFX_XMLNode* GetDocumentNode(CFX_XMLDoc* pXMLDoc,
105                              bool bVerifyWellFormness = false) {
106   if (!pXMLDoc)
107     return nullptr;
108 
109   for (CFX_XMLNode* pXMLNode =
110            pXMLDoc->GetRoot()->GetNodeItem(CFX_XMLNode::FirstChild);
111        pXMLNode; pXMLNode = pXMLNode->GetNodeItem(CFX_XMLNode::NextSibling)) {
112     if (pXMLNode->GetType() != FX_XMLNODE_Element)
113       continue;
114 
115     if (!bVerifyWellFormness)
116       return pXMLNode;
117 
118     for (CFX_XMLNode* pNextNode =
119              pXMLNode->GetNodeItem(CFX_XMLNode::NextSibling);
120          pNextNode;
121          pNextNode = pNextNode->GetNodeItem(CFX_XMLNode::NextSibling)) {
122       if (pNextNode->GetType() == FX_XMLNODE_Element)
123         return nullptr;
124     }
125     return pXMLNode;
126   }
127   return nullptr;
128 }
129 
GetElementTagNamespaceURI(CFX_XMLElement * pElement)130 WideString GetElementTagNamespaceURI(CFX_XMLElement* pElement) {
131   WideString wsNodeStr = pElement->GetNamespacePrefix();
132   WideString wsNamespaceURI;
133   if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNodeStr,
134                                                   &wsNamespaceURI)) {
135     return WideString();
136   }
137   return wsNamespaceURI;
138 }
139 
MatchNodeName(CFX_XMLNode * pNode,const WideStringView & wsLocalTagName,const WideStringView & wsNamespaceURIPrefix,uint32_t eMatchFlags=XFA_XDPPACKET_FLAGS_NOMATCH)140 bool MatchNodeName(CFX_XMLNode* pNode,
141                    const WideStringView& wsLocalTagName,
142                    const WideStringView& wsNamespaceURIPrefix,
143                    uint32_t eMatchFlags = XFA_XDPPACKET_FLAGS_NOMATCH) {
144   if (!pNode || pNode->GetType() != FX_XMLNODE_Element)
145     return false;
146 
147   CFX_XMLElement* pElement = reinterpret_cast<CFX_XMLElement*>(pNode);
148   WideString wsNodeStr = pElement->GetLocalTagName();
149   if (wsNodeStr != wsLocalTagName)
150     return false;
151 
152   wsNodeStr = GetElementTagNamespaceURI(pElement);
153   if (eMatchFlags & XFA_XDPPACKET_FLAGS_NOMATCH)
154     return true;
155   if (eMatchFlags & XFA_XDPPACKET_FLAGS_PREFIXMATCH) {
156     return wsNodeStr.Left(wsNamespaceURIPrefix.GetLength()) ==
157            wsNamespaceURIPrefix;
158   }
159 
160   return wsNodeStr == wsNamespaceURIPrefix;
161 }
162 
GetAttributeLocalName(const WideStringView & wsAttributeName,WideString & wsLocalAttrName)163 bool GetAttributeLocalName(const WideStringView& wsAttributeName,
164                            WideString& wsLocalAttrName) {
165   WideString wsAttrName(wsAttributeName);
166   auto pos = wsAttrName.Find(L':', 0);
167   if (!pos.has_value()) {
168     wsLocalAttrName = wsAttrName;
169     return false;
170   }
171   wsLocalAttrName = wsAttrName.Right(wsAttrName.GetLength() - pos.value() - 1);
172   return true;
173 }
174 
ResolveAttribute(CFX_XMLElement * pElement,const WideString & wsAttrName,WideString & wsLocalAttrName,WideString & wsNamespaceURI)175 bool ResolveAttribute(CFX_XMLElement* pElement,
176                       const WideString& wsAttrName,
177                       WideString& wsLocalAttrName,
178                       WideString& wsNamespaceURI) {
179   WideString wsNSPrefix;
180   if (GetAttributeLocalName(wsAttrName.AsStringView(), wsLocalAttrName)) {
181     wsNSPrefix = wsAttrName.Left(wsAttrName.GetLength() -
182                                  wsLocalAttrName.GetLength() - 1);
183   }
184   if (wsLocalAttrName == L"xmlns" || wsNSPrefix == L"xmlns" ||
185       wsNSPrefix == L"xml") {
186     return false;
187   }
188   if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNSPrefix,
189                                                   &wsNamespaceURI)) {
190     wsNamespaceURI.clear();
191     return false;
192   }
193   return true;
194 }
195 
FindAttributeWithNS(CFX_XMLElement * pElement,const WideStringView & wsLocalAttributeName,const WideStringView & wsNamespaceURIPrefix,WideString & wsValue,bool bMatchNSAsPrefix=false)196 bool FindAttributeWithNS(CFX_XMLElement* pElement,
197                          const WideStringView& wsLocalAttributeName,
198                          const WideStringView& wsNamespaceURIPrefix,
199                          WideString& wsValue,
200                          bool bMatchNSAsPrefix = false) {
201   if (!pElement)
202     return false;
203 
204   WideString wsAttrNS;
205   for (auto it : pElement->GetAttributes()) {
206     auto pos = it.first.Find(L':', 0);
207     WideString wsNSPrefix;
208     if (!pos.has_value()) {
209       if (wsLocalAttributeName != it.first)
210         continue;
211     } else {
212       if (wsLocalAttributeName !=
213           it.first.Right(it.first.GetLength() - pos.value() - 1)) {
214         continue;
215       }
216       wsNSPrefix = it.first.Left(pos.value());
217     }
218 
219     if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNSPrefix,
220                                                     &wsAttrNS)) {
221       continue;
222     }
223     if (bMatchNSAsPrefix) {
224       if (wsAttrNS.Left(wsNamespaceURIPrefix.GetLength()) !=
225           wsNamespaceURIPrefix) {
226         continue;
227       }
228     } else {
229       if (wsAttrNS != wsNamespaceURIPrefix)
230         continue;
231     }
232     wsValue = it.second;
233     return true;
234   }
235   return false;
236 }
237 
GetDataSetsFromXDP(CFX_XMLNode * pXMLDocumentNode)238 CFX_XMLNode* GetDataSetsFromXDP(CFX_XMLNode* pXMLDocumentNode) {
239   const PacketInfo* datasets_packet =
240       GetPacketByIndex(XFA_PacketType::Datasets);
241   if (MatchNodeName(pXMLDocumentNode, datasets_packet->name,
242                     datasets_packet->uri, datasets_packet->flags)) {
243     return pXMLDocumentNode;
244   }
245 
246   const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Xdp);
247   if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
248                      packet->flags)) {
249     return nullptr;
250   }
251 
252   for (CFX_XMLNode* pDatasetsNode =
253            pXMLDocumentNode->GetNodeItem(CFX_XMLNode::FirstChild);
254        pDatasetsNode;
255        pDatasetsNode = pDatasetsNode->GetNodeItem(CFX_XMLNode::NextSibling)) {
256     if (MatchNodeName(pDatasetsNode, datasets_packet->name,
257                       datasets_packet->uri, datasets_packet->flags)) {
258       return pDatasetsNode;
259     }
260   }
261   return nullptr;
262 }
263 
IsStringAllWhitespace(WideString wsText)264 bool IsStringAllWhitespace(WideString wsText) {
265   wsText.TrimRight(L"\x20\x9\xD\xA");
266   return wsText.IsEmpty();
267 }
268 
ConvertXMLToPlainText(CFX_XMLElement * pRootXMLNode,WideString & wsOutput)269 void ConvertXMLToPlainText(CFX_XMLElement* pRootXMLNode, WideString& wsOutput) {
270   for (CFX_XMLNode* pXMLChild =
271            pRootXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
272        pXMLChild;
273        pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
274     switch (pXMLChild->GetType()) {
275       case FX_XMLNODE_Element: {
276         WideString wsTextData =
277             static_cast<CFX_XMLElement*>(pXMLChild)->GetTextData();
278         wsTextData += L"\n";
279         wsOutput += wsTextData;
280         break;
281       }
282       case FX_XMLNODE_Text:
283       case FX_XMLNODE_CharData: {
284         WideString wsText = static_cast<CFX_XMLText*>(pXMLChild)->GetText();
285         if (IsStringAllWhitespace(wsText))
286           continue;
287 
288         wsOutput = wsText;
289         break;
290       }
291       default:
292         NOTREACHED();
293         break;
294     }
295   }
296 }
297 
GetPlainTextFromRichText(CFX_XMLNode * pXMLNode)298 WideString GetPlainTextFromRichText(CFX_XMLNode* pXMLNode) {
299   if (!pXMLNode)
300     return L"";
301 
302   WideString wsPlainText;
303   switch (pXMLNode->GetType()) {
304     case FX_XMLNODE_Element: {
305       CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLNode);
306       WideString wsTag = pXMLElement->GetLocalTagName();
307       uint32_t uTag = FX_HashCode_GetW(wsTag.AsStringView(), true);
308       if (uTag == 0x0001f714) {
309         wsPlainText += L"\n";
310       } else if (uTag == 0x00000070) {
311         if (!wsPlainText.IsEmpty()) {
312           wsPlainText += L"\n";
313         }
314       } else if (uTag == 0xa48ac63) {
315         if (!wsPlainText.IsEmpty() &&
316             wsPlainText[wsPlainText.GetLength() - 1] != '\n') {
317           wsPlainText += L"\n";
318         }
319       }
320       break;
321     }
322     case FX_XMLNODE_Text:
323     case FX_XMLNODE_CharData: {
324       WideString wsContent = static_cast<CFX_XMLText*>(pXMLNode)->GetText();
325       wsPlainText += wsContent;
326       break;
327     }
328     default:
329       break;
330   }
331   for (CFX_XMLNode* pChildXML = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
332        pChildXML;
333        pChildXML = pChildXML->GetNodeItem(CFX_XMLNode::NextSibling)) {
334     wsPlainText += GetPlainTextFromRichText(pChildXML);
335   }
336 
337   return wsPlainText;
338 }
339 
340 }  // namespace
341 
XFA_RecognizeRichText(CFX_XMLElement * pRichTextXMLNode)342 bool XFA_RecognizeRichText(CFX_XMLElement* pRichTextXMLNode) {
343   return pRichTextXMLNode && GetElementTagNamespaceURI(pRichTextXMLNode) ==
344                                  L"http://www.w3.org/1999/xhtml";
345 }
346 
CXFA_SimpleParser()347 CXFA_SimpleParser::CXFA_SimpleParser() : m_bDocumentParser(true) {}
348 
CXFA_SimpleParser(CXFA_Document * pFactory)349 CXFA_SimpleParser::CXFA_SimpleParser(CXFA_Document* pFactory)
350     : m_pFactory(pFactory), m_bDocumentParser(false) {}
351 
~CXFA_SimpleParser()352 CXFA_SimpleParser::~CXFA_SimpleParser() {}
353 
SetFactory(CXFA_Document * pFactory)354 void CXFA_SimpleParser::SetFactory(CXFA_Document* pFactory) {
355   ASSERT(m_bDocumentParser);
356   m_pFactory = pFactory;
357 }
358 
StartParse(const RetainPtr<IFX_SeekableStream> & pStream,XFA_PacketType ePacketID)359 int32_t CXFA_SimpleParser::StartParse(
360     const RetainPtr<IFX_SeekableStream>& pStream,
361     XFA_PacketType ePacketID) {
362   CloseParser();
363   m_pFileRead = pStream;
364   m_pStream = pdfium::MakeRetain<CFX_SeekableStreamProxy>(pStream, false);
365   uint16_t wCodePage = m_pStream->GetCodePage();
366   if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE &&
367       wCodePage != FX_CODEPAGE_UTF8) {
368     m_pStream->SetCodePage(FX_CODEPAGE_UTF8);
369   }
370   m_pXMLDoc = pdfium::MakeUnique<CFX_XMLDoc>();
371   auto pNewParser =
372       pdfium::MakeUnique<CFX_XMLParser>(m_pXMLDoc->GetRoot(), m_pStream);
373   m_pXMLParser = pNewParser.get();
374   if (!m_pXMLDoc->LoadXML(std::move(pNewParser)))
375     return XFA_PARSESTATUS_StatusErr;
376 
377   m_bParseStarted = true;
378   m_ePacketID = ePacketID;
379   return XFA_PARSESTATUS_Ready;
380 }
381 
DoParse()382 int32_t CXFA_SimpleParser::DoParse() {
383   if (!m_pXMLDoc || !m_bParseStarted)
384     return XFA_PARSESTATUS_StatusErr;
385 
386   int32_t iRet = m_pXMLDoc->DoLoad();
387   if (iRet < 0)
388     return XFA_PARSESTATUS_SyntaxErr;
389   if (iRet < 100)
390     return iRet / 2;
391 
392   m_pRootNode = ParseAsXDPPacket(GetDocumentNode(m_pXMLDoc.get()), m_ePacketID);
393   m_pXMLParser.Release();
394   m_pXMLDoc->CloseXML();
395   m_pStream.Reset();
396 
397   if (!m_pRootNode)
398     return XFA_PARSESTATUS_StatusErr;
399 
400   return XFA_PARSESTATUS_Done;
401 }
402 
ParseXMLData(const ByteString & wsXML)403 CFX_XMLNode* CXFA_SimpleParser::ParseXMLData(const ByteString& wsXML) {
404   CloseParser();
405   m_pXMLDoc = pdfium::MakeUnique<CFX_XMLDoc>();
406 
407   auto pStream = pdfium::MakeRetain<CFX_SeekableStreamProxy>(
408       const_cast<uint8_t*>(wsXML.raw_str()), wsXML.GetLength());
409   auto pParser =
410       pdfium::MakeUnique<CFX_XMLParser>(m_pXMLDoc->GetRoot(), pStream);
411   pParser->m_dwCheckStatus = 0x03;
412   if (!m_pXMLDoc->LoadXML(std::move(pParser)))
413     return nullptr;
414 
415   int32_t iRet = m_pXMLDoc->DoLoad();
416   if (iRet < 0 || iRet >= 100)
417     m_pXMLDoc->CloseXML();
418   return iRet < 100 ? nullptr : GetDocumentNode(m_pXMLDoc.get());
419 }
420 
ConstructXFANode(CXFA_Node * pXFANode,CFX_XMLNode * pXMLNode)421 void CXFA_SimpleParser::ConstructXFANode(CXFA_Node* pXFANode,
422                                          CFX_XMLNode* pXMLNode) {
423   XFA_PacketType ePacketID = pXFANode->GetPacketType();
424   if (ePacketID == XFA_PacketType::Datasets) {
425     if (pXFANode->GetElementType() == XFA_Element::DataValue) {
426       for (CFX_XMLNode* pXMLChild =
427                pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
428            pXMLChild;
429            pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
430         FX_XMLNODETYPE eNodeType = pXMLChild->GetType();
431         if (eNodeType == FX_XMLNODE_Instruction)
432           continue;
433 
434         if (eNodeType == FX_XMLNODE_Element) {
435           CXFA_Node* pXFAChild = m_pFactory->CreateNode(
436               XFA_PacketType::Datasets, XFA_Element::DataValue);
437           if (!pXFAChild)
438             return;
439 
440           CFX_XMLElement* child = static_cast<CFX_XMLElement*>(pXMLChild);
441           WideString wsNodeStr = child->GetLocalTagName();
442           pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, wsNodeStr, false,
443                                           false);
444           WideString wsChildValue = GetPlainTextFromRichText(child);
445           if (!wsChildValue.IsEmpty())
446             pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsChildValue,
447                                             false, false);
448 
449           pXFANode->InsertChild(pXFAChild, nullptr);
450           pXFAChild->SetXMLMappingNode(pXMLChild);
451           pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
452           break;
453         }
454       }
455       m_pRootNode = pXFANode;
456     } else {
457       m_pRootNode = DataLoader(pXFANode, pXMLNode, true);
458     }
459   } else if (pXFANode->IsContentNode()) {
460     ParseContentNode(pXFANode, pXMLNode, ePacketID);
461     m_pRootNode = pXFANode;
462   } else {
463     m_pRootNode = NormalLoader(pXFANode, pXMLNode, ePacketID, true);
464   }
465 }
466 
GetRootNode() const467 CXFA_Node* CXFA_SimpleParser::GetRootNode() const {
468   return m_pRootNode;
469 }
470 
GetXMLDoc() const471 CFX_XMLDoc* CXFA_SimpleParser::GetXMLDoc() const {
472   return m_pXMLDoc.get();
473 }
474 
ParseAsXDPPacket(CFX_XMLNode * pXMLDocumentNode,XFA_PacketType ePacketID)475 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket(CFX_XMLNode* pXMLDocumentNode,
476                                                XFA_PacketType ePacketID) {
477   switch (ePacketID) {
478     case XFA_PacketType::Xdp:
479       return ParseAsXDPPacket_XDP(pXMLDocumentNode);
480     case XFA_PacketType::Config:
481       return ParseAsXDPPacket_Config(pXMLDocumentNode);
482     case XFA_PacketType::Template:
483       return ParseAsXDPPacket_Template(pXMLDocumentNode);
484     case XFA_PacketType::Form:
485       return ParseAsXDPPacket_Form(pXMLDocumentNode);
486     case XFA_PacketType::Datasets:
487       return ParseAsXDPPacket_Data(pXMLDocumentNode);
488     case XFA_PacketType::Xdc:
489       return ParseAsXDPPacket_Xdc(pXMLDocumentNode);
490     case XFA_PacketType::LocaleSet:
491       return ParseAsXDPPacket_LocaleConnectionSourceSet(
492           pXMLDocumentNode, XFA_PacketType::LocaleSet, XFA_Element::LocaleSet);
493     case XFA_PacketType::ConnectionSet:
494       return ParseAsXDPPacket_LocaleConnectionSourceSet(
495           pXMLDocumentNode, XFA_PacketType::ConnectionSet,
496           XFA_Element::ConnectionSet);
497     case XFA_PacketType::SourceSet:
498       return ParseAsXDPPacket_LocaleConnectionSourceSet(
499           pXMLDocumentNode, XFA_PacketType::SourceSet, XFA_Element::SourceSet);
500     default:
501       return ParseAsXDPPacket_User(pXMLDocumentNode);
502   }
503 }
504 
ParseAsXDPPacket_XDP(CFX_XMLNode * pXMLDocumentNode)505 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_XDP(
506     CFX_XMLNode* pXMLDocumentNode) {
507   const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Xdp);
508   if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
509                      packet->flags)) {
510     return nullptr;
511   }
512 
513   CXFA_Node* pXFARootNode =
514       m_pFactory->CreateNode(XFA_PacketType::Xdp, XFA_Element::Xfa);
515   if (!pXFARootNode)
516     return nullptr;
517 
518   m_pRootNode = pXFARootNode;
519   pXFARootNode->JSObject()->SetCData(XFA_Attribute::Name, L"xfa", false, false);
520 
521   CFX_XMLElement* pElement = static_cast<CFX_XMLElement*>(pXMLDocumentNode);
522   for (auto it : pElement->GetAttributes()) {
523     if (it.first == L"uuid")
524       pXFARootNode->JSObject()->SetCData(XFA_Attribute::Uuid, it.second, false,
525                                          false);
526     else if (it.first == L"timeStamp")
527       pXFARootNode->JSObject()->SetCData(XFA_Attribute::TimeStamp, it.second,
528                                          false, false);
529   }
530 
531   CFX_XMLNode* pXMLConfigDOMRoot = nullptr;
532   CXFA_Node* pXFAConfigDOMRoot = nullptr;
533   for (CFX_XMLNode* pChildItem =
534            pXMLDocumentNode->GetNodeItem(CFX_XMLNode::FirstChild);
535        pChildItem;
536        pChildItem = pChildItem->GetNodeItem(CFX_XMLNode::NextSibling)) {
537     const PacketInfo* pPacketInfo = GetPacketByIndex(XFA_PacketType::Config);
538     if (!MatchNodeName(pChildItem, pPacketInfo->name, pPacketInfo->uri,
539                        pPacketInfo->flags)) {
540       continue;
541     }
542     if (pXFARootNode->GetFirstChildByName(pPacketInfo->hash))
543       return nullptr;
544 
545     pXMLConfigDOMRoot = pChildItem;
546     pXFAConfigDOMRoot = ParseAsXDPPacket_Config(pXMLConfigDOMRoot);
547     if (pXFAConfigDOMRoot)
548       pXFARootNode->InsertChild(pXFAConfigDOMRoot, nullptr);
549   }
550 
551   CFX_XMLNode* pXMLDatasetsDOMRoot = nullptr;
552   CFX_XMLNode* pXMLFormDOMRoot = nullptr;
553   CFX_XMLNode* pXMLTemplateDOMRoot = nullptr;
554   for (CFX_XMLNode* pChildItem =
555            pXMLDocumentNode->GetNodeItem(CFX_XMLNode::FirstChild);
556        pChildItem;
557        pChildItem = pChildItem->GetNodeItem(CFX_XMLNode::NextSibling)) {
558     if (!pChildItem || pChildItem->GetType() != FX_XMLNODE_Element)
559       continue;
560     if (pChildItem == pXMLConfigDOMRoot)
561       continue;
562 
563     CFX_XMLElement* pElement = reinterpret_cast<CFX_XMLElement*>(pChildItem);
564     WideString wsPacketName = pElement->GetLocalTagName();
565     const PacketInfo* pPacketInfo =
566         GetPacketByName(wsPacketName.AsStringView());
567     if (pPacketInfo && pPacketInfo->uri) {
568       if (!MatchNodeName(pElement, pPacketInfo->name, pPacketInfo->uri,
569                          pPacketInfo->flags)) {
570         pPacketInfo = nullptr;
571       }
572     }
573     XFA_PacketType ePacket =
574         pPacketInfo ? pPacketInfo->packet_type : XFA_PacketType::User;
575     if (ePacket == XFA_PacketType::Xdp)
576       continue;
577     if (ePacket == XFA_PacketType::Datasets) {
578       if (pXMLDatasetsDOMRoot)
579         return nullptr;
580 
581       pXMLDatasetsDOMRoot = pElement;
582     } else if (ePacket == XFA_PacketType::Form) {
583       if (pXMLFormDOMRoot)
584         return nullptr;
585 
586       pXMLFormDOMRoot = pElement;
587     } else if (ePacket == XFA_PacketType::Template) {
588       // Found a duplicate template packet.
589       if (pXMLTemplateDOMRoot)
590         return nullptr;
591 
592       CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
593       if (pPacketNode) {
594         pXMLTemplateDOMRoot = pElement;
595         pXFARootNode->InsertChild(pPacketNode, nullptr);
596       }
597     } else {
598       CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
599       if (pPacketNode) {
600         if (pPacketInfo &&
601             (pPacketInfo->flags & XFA_XDPPACKET_FLAGS_SUPPORTONE) &&
602             pXFARootNode->GetFirstChildByName(pPacketInfo->hash)) {
603           return nullptr;
604         }
605         pXFARootNode->InsertChild(pPacketNode, nullptr);
606       }
607     }
608   }
609 
610   // No template is found.
611   if (!pXMLTemplateDOMRoot)
612     return nullptr;
613 
614   if (pXMLDatasetsDOMRoot) {
615     CXFA_Node* pPacketNode =
616         ParseAsXDPPacket(pXMLDatasetsDOMRoot, XFA_PacketType::Datasets);
617     if (pPacketNode)
618       pXFARootNode->InsertChild(pPacketNode, nullptr);
619   }
620   if (pXMLFormDOMRoot) {
621     CXFA_Node* pPacketNode =
622         ParseAsXDPPacket(pXMLFormDOMRoot, XFA_PacketType::Form);
623     if (pPacketNode)
624       pXFARootNode->InsertChild(pPacketNode, nullptr);
625   }
626 
627   pXFARootNode->SetXMLMappingNode(pXMLDocumentNode);
628   return pXFARootNode;
629 }
630 
ParseAsXDPPacket_Config(CFX_XMLNode * pXMLDocumentNode)631 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Config(
632     CFX_XMLNode* pXMLDocumentNode) {
633   const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Config);
634   if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
635                      packet->flags)) {
636     return nullptr;
637   }
638   CXFA_Node* pNode =
639       m_pFactory->CreateNode(XFA_PacketType::Config, XFA_Element::Config);
640   if (!pNode)
641     return nullptr;
642 
643   pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false);
644   if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Config, true))
645     return nullptr;
646 
647   pNode->SetXMLMappingNode(pXMLDocumentNode);
648   return pNode;
649 }
650 
ParseAsXDPPacket_Template(CFX_XMLNode * pXMLDocumentNode)651 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Template(
652     CFX_XMLNode* pXMLDocumentNode) {
653   const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Template);
654   if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
655                      packet->flags)) {
656     return nullptr;
657   }
658 
659   CXFA_Node* pNode =
660       m_pFactory->CreateNode(XFA_PacketType::Template, XFA_Element::Template);
661   if (!pNode)
662     return nullptr;
663 
664   pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false);
665   if (m_bDocumentParser) {
666     CFX_XMLElement* pXMLDocumentElement =
667         static_cast<CFX_XMLElement*>(pXMLDocumentNode);
668     WideString wsNamespaceURI = pXMLDocumentElement->GetNamespaceURI();
669     if (wsNamespaceURI.IsEmpty())
670       wsNamespaceURI = pXMLDocumentElement->GetString(L"xmlns:xfa");
671 
672     pNode->GetDocument()->RecognizeXFAVersionNumber(wsNamespaceURI);
673   }
674   if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Template, true))
675     return nullptr;
676 
677   pNode->SetXMLMappingNode(pXMLDocumentNode);
678   return pNode;
679 }
680 
ParseAsXDPPacket_Form(CFX_XMLNode * pXMLDocumentNode)681 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Form(
682     CFX_XMLNode* pXMLDocumentNode) {
683   const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Form);
684   if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
685                      packet->flags)) {
686     return nullptr;
687   }
688 
689   CFX_XMLElement* pXMLDocumentElement =
690       static_cast<CFX_XMLElement*>(pXMLDocumentNode);
691   WideString wsChecksum = pXMLDocumentElement->GetString(L"checksum");
692   if (wsChecksum.GetLength() != 28 || m_pXMLParser->m_dwCheckStatus != 0x03) {
693     return nullptr;
694   }
695 
696   auto pChecksum = pdfium::MakeUnique<CFX_ChecksumContext>();
697   pChecksum->StartChecksum();
698   pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[0],
699                             m_pXMLParser->m_nSize[0]);
700   pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[1],
701                             m_pXMLParser->m_nSize[1]);
702   pChecksum->FinishChecksum();
703   ByteString bsCheck = pChecksum->GetChecksum();
704   if (bsCheck != wsChecksum.UTF8Encode())
705     return nullptr;
706 
707   CXFA_Node* pNode =
708       m_pFactory->CreateNode(XFA_PacketType::Form, XFA_Element::Form);
709   if (!pNode)
710     return nullptr;
711 
712   pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false);
713   pNode->JSObject()->SetAttribute(XFA_Attribute::Checksum,
714                                   wsChecksum.AsStringView(), false);
715   CXFA_Template* pTemplateRoot =
716       m_pRootNode->GetFirstChildByClass<CXFA_Template>(XFA_Element::Template);
717   CXFA_Subform* pTemplateChosen =
718       pTemplateRoot ? pTemplateRoot->GetFirstChildByClass<CXFA_Subform>(
719                           XFA_Element::Subform)
720                     : nullptr;
721   bool bUseAttribute = true;
722   if (pTemplateChosen &&
723       pTemplateChosen->JSObject()->GetEnum(XFA_Attribute::RestoreState) !=
724           XFA_AttributeEnum::Auto) {
725     bUseAttribute = false;
726   }
727   if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Form,
728                     bUseAttribute))
729     return nullptr;
730 
731   pNode->SetXMLMappingNode(pXMLDocumentNode);
732   return pNode;
733 }
734 
ParseAsXDPPacket_Data(CFX_XMLNode * pXMLDocumentNode)735 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Data(
736     CFX_XMLNode* pXMLDocumentNode) {
737   CFX_XMLNode* pDatasetsXMLNode = GetDataSetsFromXDP(pXMLDocumentNode);
738   const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Datasets);
739   if (pDatasetsXMLNode) {
740     CXFA_Node* pNode = m_pFactory->CreateNode(XFA_PacketType::Datasets,
741                                               XFA_Element::DataModel);
742     if (!pNode)
743       return nullptr;
744 
745     pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false,
746                                 false);
747     if (!DataLoader(pNode, pDatasetsXMLNode, false))
748       return nullptr;
749 
750     pNode->SetXMLMappingNode(pDatasetsXMLNode);
751     return pNode;
752   }
753 
754   CFX_XMLNode* pDataXMLNode = nullptr;
755   if (MatchNodeName(pXMLDocumentNode, L"data", packet->uri, packet->flags)) {
756     static_cast<CFX_XMLElement*>(pXMLDocumentNode)
757         ->RemoveAttribute(L"xmlns:xfa");
758     pDataXMLNode = pXMLDocumentNode;
759   } else {
760     CFX_XMLElement* pDataElement = new CFX_XMLElement(L"xfa:data");
761     CFX_XMLNode* pParentXMLNode =
762         pXMLDocumentNode->GetNodeItem(CFX_XMLNode::Parent);
763     if (pParentXMLNode)
764       pParentXMLNode->RemoveChildNode(pXMLDocumentNode);
765 
766     ASSERT(pXMLDocumentNode->GetType() == FX_XMLNODE_Element);
767     if (pXMLDocumentNode->GetType() == FX_XMLNODE_Element) {
768       static_cast<CFX_XMLElement*>(pXMLDocumentNode)
769           ->RemoveAttribute(L"xmlns:xfa");
770     }
771     pDataElement->InsertChildNode(pXMLDocumentNode);
772     pDataXMLNode = pDataElement;
773   }
774 
775   if (pDataXMLNode) {
776     CXFA_Node* pNode = m_pFactory->CreateNode(XFA_PacketType::Datasets,
777                                               XFA_Element::DataGroup);
778     if (!pNode) {
779       if (pDataXMLNode != pXMLDocumentNode)
780         delete pDataXMLNode;
781       return nullptr;
782     }
783     WideString wsLocalName =
784         static_cast<CFX_XMLElement*>(pDataXMLNode)->GetLocalTagName();
785     pNode->JSObject()->SetCData(XFA_Attribute::Name, wsLocalName, false, false);
786     if (!DataLoader(pNode, pDataXMLNode, true))
787       return nullptr;
788 
789     pNode->SetXMLMappingNode(pDataXMLNode);
790     if (pDataXMLNode != pXMLDocumentNode)
791       pNode->SetFlag(XFA_NodeFlag_OwnXMLNode, false);
792     return pNode;
793   }
794   return nullptr;
795 }
796 
ParseAsXDPPacket_LocaleConnectionSourceSet(CFX_XMLNode * pXMLDocumentNode,XFA_PacketType packet_type,XFA_Element element)797 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_LocaleConnectionSourceSet(
798     CFX_XMLNode* pXMLDocumentNode,
799     XFA_PacketType packet_type,
800     XFA_Element element) {
801   const PacketInfo* packet = GetPacketByIndex(packet_type);
802   if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
803                      packet->flags)) {
804     return nullptr;
805   }
806 
807   CXFA_Node* pNode = m_pFactory->CreateNode(packet_type, element);
808   if (!pNode)
809     return nullptr;
810 
811   pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false);
812   if (!NormalLoader(pNode, pXMLDocumentNode, packet_type, true))
813     return nullptr;
814 
815   pNode->SetXMLMappingNode(pXMLDocumentNode);
816   return pNode;
817 }
818 
ParseAsXDPPacket_Xdc(CFX_XMLNode * pXMLDocumentNode)819 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Xdc(
820     CFX_XMLNode* pXMLDocumentNode) {
821   const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Xdc);
822   if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
823                      packet->flags))
824     return nullptr;
825 
826   CXFA_Node* pNode =
827       m_pFactory->CreateNode(XFA_PacketType::Xdc, XFA_Element::Xdc);
828   if (!pNode)
829     return nullptr;
830 
831   pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false);
832   pNode->SetXMLMappingNode(pXMLDocumentNode);
833   return pNode;
834 }
835 
ParseAsXDPPacket_User(CFX_XMLNode * pXMLDocumentNode)836 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_User(
837     CFX_XMLNode* pXMLDocumentNode) {
838   CXFA_Node* pNode =
839       m_pFactory->CreateNode(XFA_PacketType::Xdp, XFA_Element::Packet);
840   if (!pNode)
841     return nullptr;
842 
843   WideString wsName =
844       static_cast<CFX_XMLElement*>(pXMLDocumentNode)->GetLocalTagName();
845   pNode->JSObject()->SetCData(XFA_Attribute::Name, wsName, false, false);
846   if (!UserPacketLoader(pNode, pXMLDocumentNode))
847     return nullptr;
848 
849   pNode->SetXMLMappingNode(pXMLDocumentNode);
850   return pNode;
851 }
852 
UserPacketLoader(CXFA_Node * pXFANode,CFX_XMLNode * pXMLDoc)853 CXFA_Node* CXFA_SimpleParser::UserPacketLoader(CXFA_Node* pXFANode,
854                                                CFX_XMLNode* pXMLDoc) {
855   return pXFANode;
856 }
857 
DataLoader(CXFA_Node * pXFANode,CFX_XMLNode * pXMLDoc,bool bDoTransform)858 CXFA_Node* CXFA_SimpleParser::DataLoader(CXFA_Node* pXFANode,
859                                          CFX_XMLNode* pXMLDoc,
860                                          bool bDoTransform) {
861   ParseDataGroup(pXFANode, pXMLDoc, XFA_PacketType::Datasets);
862   return pXFANode;
863 }
864 
NormalLoader(CXFA_Node * pXFANode,CFX_XMLNode * pXMLDoc,XFA_PacketType ePacketID,bool bUseAttribute)865 CXFA_Node* CXFA_SimpleParser::NormalLoader(CXFA_Node* pXFANode,
866                                            CFX_XMLNode* pXMLDoc,
867                                            XFA_PacketType ePacketID,
868                                            bool bUseAttribute) {
869   bool bOneOfPropertyFound = false;
870   for (CFX_XMLNode* pXMLChild = pXMLDoc->GetNodeItem(CFX_XMLNode::FirstChild);
871        pXMLChild;
872        pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
873     switch (pXMLChild->GetType()) {
874       case FX_XMLNODE_Element: {
875         CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLChild);
876         WideString wsTagName = pXMLElement->GetLocalTagName();
877         XFA_Element eType = CXFA_Node::NameToElement(wsTagName);
878         if (eType == XFA_Element::Unknown)
879           continue;
880 
881         if (pXFANode->HasPropertyFlags(
882                 eType,
883                 XFA_PROPERTYFLAG_OneOf | XFA_PROPERTYFLAG_DefaultOneOf)) {
884           if (bOneOfPropertyFound)
885             break;
886           bOneOfPropertyFound = true;
887         }
888 
889         CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, eType);
890         if (!pXFAChild)
891           return nullptr;
892         if (ePacketID == XFA_PacketType::Config) {
893           pXFAChild->JSObject()->SetAttribute(XFA_Attribute::Name,
894                                               wsTagName.AsStringView(), false);
895         }
896 
897         bool IsNeedValue = true;
898         for (auto it : pXMLElement->GetAttributes()) {
899           WideString wsAttrName;
900           GetAttributeLocalName(it.first.AsStringView(), wsAttrName);
901           if (wsAttrName == L"nil" && it.second == L"true")
902             IsNeedValue = false;
903 
904           XFA_Attribute attr =
905               CXFA_Node::NameToAttribute(wsAttrName.AsStringView());
906           if (attr == XFA_Attribute::Unknown)
907             continue;
908 
909           if (!bUseAttribute && attr != XFA_Attribute::Name &&
910               attr != XFA_Attribute::Save) {
911             continue;
912           }
913           pXFAChild->JSObject()->SetAttribute(attr, it.second.AsStringView(),
914                                               false);
915         }
916         pXFANode->InsertChild(pXFAChild, nullptr);
917         if (eType == XFA_Element::Validate || eType == XFA_Element::Locale) {
918           if (ePacketID == XFA_PacketType::Config)
919             ParseContentNode(pXFAChild, pXMLElement, ePacketID);
920           else
921             NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
922 
923           break;
924         }
925         switch (pXFAChild->GetObjectType()) {
926           case XFA_ObjectType::ContentNode:
927           case XFA_ObjectType::TextNode:
928           case XFA_ObjectType::NodeC:
929           case XFA_ObjectType::NodeV:
930             if (IsNeedValue)
931               ParseContentNode(pXFAChild, pXMLElement, ePacketID);
932             break;
933           default:
934             NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
935             break;
936         }
937       } break;
938       case FX_XMLNODE_Instruction:
939         ParseInstruction(pXFANode, static_cast<CFX_XMLInstruction*>(pXMLChild),
940                          ePacketID);
941         break;
942       default:
943         break;
944     }
945   }
946   return pXFANode;
947 }
948 
ParseContentNode(CXFA_Node * pXFANode,CFX_XMLNode * pXMLNode,XFA_PacketType ePacketID)949 void CXFA_SimpleParser::ParseContentNode(CXFA_Node* pXFANode,
950                                          CFX_XMLNode* pXMLNode,
951                                          XFA_PacketType ePacketID) {
952   XFA_Element element = XFA_Element::Sharptext;
953   if (pXFANode->GetElementType() == XFA_Element::ExData) {
954     WideString wsContentType =
955         pXFANode->JSObject()->GetCData(XFA_Attribute::ContentType);
956     if (wsContentType == L"text/html")
957       element = XFA_Element::SharpxHTML;
958     else if (wsContentType == L"text/xml")
959       element = XFA_Element::Sharpxml;
960   }
961   if (element == XFA_Element::SharpxHTML)
962     pXFANode->SetXMLMappingNode(pXMLNode);
963 
964   WideString wsValue;
965   for (CFX_XMLNode* pXMLChild = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
966        pXMLChild;
967        pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
968     FX_XMLNODETYPE eNodeType = pXMLChild->GetType();
969     if (eNodeType == FX_XMLNODE_Instruction)
970       continue;
971 
972     if (element == XFA_Element::SharpxHTML) {
973       if (eNodeType != FX_XMLNODE_Element)
974         break;
975 
976       if (XFA_RecognizeRichText(static_cast<CFX_XMLElement*>(pXMLChild)))
977         wsValue +=
978             GetPlainTextFromRichText(static_cast<CFX_XMLElement*>(pXMLChild));
979     } else if (element == XFA_Element::Sharpxml) {
980       if (eNodeType != FX_XMLNODE_Element)
981         break;
982 
983       ConvertXMLToPlainText(static_cast<CFX_XMLElement*>(pXMLChild), wsValue);
984     } else {
985       if (eNodeType == FX_XMLNODE_Element)
986         break;
987       if (eNodeType == FX_XMLNODE_Text || eNodeType == FX_XMLNODE_CharData)
988         wsValue = static_cast<CFX_XMLText*>(pXMLChild)->GetText();
989     }
990     break;
991   }
992   if (!wsValue.IsEmpty()) {
993     if (pXFANode->IsContentNode()) {
994       CXFA_Node* pContentRawDataNode =
995           m_pFactory->CreateNode(ePacketID, element);
996       ASSERT(pContentRawDataNode);
997       pContentRawDataNode->JSObject()->SetCData(XFA_Attribute::Value, wsValue,
998                                                 false, false);
999       pXFANode->InsertChild(pContentRawDataNode, nullptr);
1000     } else {
1001       pXFANode->JSObject()->SetCData(XFA_Attribute::Value, wsValue, false,
1002                                      false);
1003     }
1004   }
1005 }
1006 
ParseDataGroup(CXFA_Node * pXFANode,CFX_XMLNode * pXMLNode,XFA_PacketType ePacketID)1007 void CXFA_SimpleParser::ParseDataGroup(CXFA_Node* pXFANode,
1008                                        CFX_XMLNode* pXMLNode,
1009                                        XFA_PacketType ePacketID) {
1010   for (CFX_XMLNode* pXMLChild = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
1011        pXMLChild;
1012        pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
1013     switch (pXMLChild->GetType()) {
1014       case FX_XMLNODE_Element: {
1015         CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLChild);
1016         {
1017           WideString wsNamespaceURI = GetElementTagNamespaceURI(pXMLElement);
1018           if (wsNamespaceURI == L"http://www.xfa.com/schema/xfa-package/" ||
1019               wsNamespaceURI == L"http://www.xfa.org/schema/xfa-package/" ||
1020               wsNamespaceURI == L"http://www.w3.org/2001/XMLSchema-instance") {
1021             continue;
1022           }
1023         }
1024 
1025         XFA_Element eNodeType = XFA_Element::DataModel;
1026         if (eNodeType == XFA_Element::DataModel) {
1027           WideString wsDataNodeAttr;
1028           if (FindAttributeWithNS(pXMLElement, L"dataNode",
1029                                   L"http://www.xfa.org/schema/xfa-data/1.0/",
1030                                   wsDataNodeAttr)) {
1031             if (wsDataNodeAttr == L"dataGroup")
1032               eNodeType = XFA_Element::DataGroup;
1033             else if (wsDataNodeAttr == L"dataValue")
1034               eNodeType = XFA_Element::DataValue;
1035           }
1036         }
1037         WideString wsContentType;
1038         if (eNodeType == XFA_Element::DataModel) {
1039           if (FindAttributeWithNS(pXMLElement, L"contentType",
1040                                   L"http://www.xfa.org/schema/xfa-data/1.0/",
1041                                   wsContentType)) {
1042             if (!wsContentType.IsEmpty())
1043               eNodeType = XFA_Element::DataValue;
1044           }
1045         }
1046         if (eNodeType == XFA_Element::DataModel) {
1047           for (CFX_XMLNode* pXMLDataChild =
1048                    pXMLElement->GetNodeItem(CFX_XMLNode::FirstChild);
1049                pXMLDataChild; pXMLDataChild = pXMLDataChild->GetNodeItem(
1050                                   CFX_XMLNode::NextSibling)) {
1051             if (pXMLDataChild->GetType() == FX_XMLNODE_Element) {
1052               if (!XFA_RecognizeRichText(
1053                       static_cast<CFX_XMLElement*>(pXMLDataChild))) {
1054                 eNodeType = XFA_Element::DataGroup;
1055                 break;
1056               }
1057             }
1058           }
1059         }
1060         if (eNodeType == XFA_Element::DataModel)
1061           eNodeType = XFA_Element::DataValue;
1062 
1063         CXFA_Node* pXFAChild =
1064             m_pFactory->CreateNode(XFA_PacketType::Datasets, eNodeType);
1065         if (!pXFAChild)
1066           return;
1067 
1068         pXFAChild->JSObject()->SetCData(
1069             XFA_Attribute::Name, pXMLElement->GetLocalTagName(), false, false);
1070         bool bNeedValue = true;
1071 
1072         for (auto it : pXMLElement->GetAttributes()) {
1073           WideString wsName;
1074           WideString wsNS;
1075           if (!ResolveAttribute(pXMLElement, it.first, wsName, wsNS)) {
1076             continue;
1077           }
1078           if (wsName == L"nil" && it.second == L"true") {
1079             bNeedValue = false;
1080             continue;
1081           }
1082           if (wsNS == L"http://www.xfa.com/schema/xfa-package/" ||
1083               wsNS == L"http://www.xfa.org/schema/xfa-package/" ||
1084               wsNS == L"http://www.w3.org/2001/XMLSchema-instance" ||
1085               wsNS == L"http://www.xfa.org/schema/xfa-data/1.0/") {
1086             continue;
1087           }
1088           CXFA_Node* pXFAMetaData = m_pFactory->CreateNode(
1089               XFA_PacketType::Datasets, XFA_Element::DataValue);
1090           if (!pXFAMetaData)
1091             return;
1092 
1093           pXFAMetaData->JSObject()->SetCData(XFA_Attribute::Name, wsName, false,
1094                                              false);
1095           pXFAMetaData->JSObject()->SetCData(XFA_Attribute::QualifiedName,
1096                                              it.first, false, false);
1097           pXFAMetaData->JSObject()->SetCData(XFA_Attribute::Value, it.second,
1098                                              false, false);
1099           pXFAMetaData->JSObject()->SetEnum(XFA_Attribute::Contains,
1100                                             XFA_AttributeEnum::MetaData, false);
1101           pXFAChild->InsertChild(pXFAMetaData, nullptr);
1102           pXFAMetaData->SetXMLMappingNode(pXMLElement);
1103           pXFAMetaData->SetFlag(XFA_NodeFlag_Initialized, false);
1104         }
1105 
1106         if (!bNeedValue) {
1107           WideString wsNilName(L"xsi:nil");
1108           pXMLElement->RemoveAttribute(wsNilName.c_str());
1109         }
1110         pXFANode->InsertChild(pXFAChild, nullptr);
1111         if (eNodeType == XFA_Element::DataGroup)
1112           ParseDataGroup(pXFAChild, pXMLElement, ePacketID);
1113         else if (bNeedValue)
1114           ParseDataValue(pXFAChild, pXMLChild, XFA_PacketType::Datasets);
1115 
1116         pXFAChild->SetXMLMappingNode(pXMLElement);
1117         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1118         continue;
1119       }
1120       case FX_XMLNODE_CharData:
1121       case FX_XMLNODE_Text: {
1122         CFX_XMLText* pXMLText = static_cast<CFX_XMLText*>(pXMLChild);
1123         WideString wsText = pXMLText->GetText();
1124         if (IsStringAllWhitespace(wsText))
1125           continue;
1126 
1127         CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_PacketType::Datasets,
1128                                                       XFA_Element::DataValue);
1129         if (!pXFAChild)
1130           return;
1131 
1132         pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsText, false,
1133                                         false);
1134         pXFANode->InsertChild(pXFAChild, nullptr);
1135         pXFAChild->SetXMLMappingNode(pXMLText);
1136         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1137         continue;
1138       }
1139       default:
1140         continue;
1141     }
1142   }
1143 }
1144 
ParseDataValue(CXFA_Node * pXFANode,CFX_XMLNode * pXMLNode,XFA_PacketType ePacketID)1145 void CXFA_SimpleParser::ParseDataValue(CXFA_Node* pXFANode,
1146                                        CFX_XMLNode* pXMLNode,
1147                                        XFA_PacketType ePacketID) {
1148   CFX_WideTextBuf wsValueTextBuf;
1149   CFX_WideTextBuf wsCurValueTextBuf;
1150   bool bMarkAsCompound = false;
1151   CFX_XMLNode* pXMLCurValueNode = nullptr;
1152   for (CFX_XMLNode* pXMLChild = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
1153        pXMLChild;
1154        pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
1155     FX_XMLNODETYPE eNodeType = pXMLChild->GetType();
1156     if (eNodeType == FX_XMLNODE_Instruction)
1157       continue;
1158 
1159     if (eNodeType == FX_XMLNODE_Text || eNodeType == FX_XMLNODE_CharData) {
1160       WideString wsText = static_cast<CFX_XMLText*>(pXMLChild)->GetText();
1161       if (!pXMLCurValueNode)
1162         pXMLCurValueNode = pXMLChild;
1163 
1164       wsCurValueTextBuf << wsText;
1165     } else if (XFA_RecognizeRichText(static_cast<CFX_XMLElement*>(pXMLChild))) {
1166       WideString wsText =
1167           GetPlainTextFromRichText(static_cast<CFX_XMLElement*>(pXMLChild));
1168       if (!pXMLCurValueNode)
1169         pXMLCurValueNode = pXMLChild;
1170 
1171       wsCurValueTextBuf << wsText;
1172     } else {
1173       bMarkAsCompound = true;
1174       if (pXMLCurValueNode) {
1175         WideString wsCurValue = wsCurValueTextBuf.MakeString();
1176         if (!wsCurValue.IsEmpty()) {
1177           CXFA_Node* pXFAChild =
1178               m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1179           if (!pXFAChild)
1180             return;
1181 
1182           pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, L"", false,
1183                                           false);
1184           pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsCurValue,
1185                                           false, false);
1186           pXFANode->InsertChild(pXFAChild, nullptr);
1187           pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
1188           pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1189           wsValueTextBuf << wsCurValue;
1190           wsCurValueTextBuf.Clear();
1191         }
1192         pXMLCurValueNode = nullptr;
1193       }
1194       CXFA_Node* pXFAChild =
1195           m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1196       if (!pXFAChild)
1197         return;
1198 
1199       WideString wsNodeStr =
1200           static_cast<CFX_XMLElement*>(pXMLChild)->GetLocalTagName();
1201       pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, wsNodeStr, false,
1202                                       false);
1203       ParseDataValue(pXFAChild, pXMLChild, ePacketID);
1204       pXFANode->InsertChild(pXFAChild, nullptr);
1205       pXFAChild->SetXMLMappingNode(pXMLChild);
1206       pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1207       WideString wsCurValue =
1208           pXFAChild->JSObject()->GetCData(XFA_Attribute::Value);
1209       wsValueTextBuf << wsCurValue;
1210     }
1211   }
1212   if (pXMLCurValueNode) {
1213     WideString wsCurValue = wsCurValueTextBuf.MakeString();
1214     if (!wsCurValue.IsEmpty()) {
1215       if (bMarkAsCompound) {
1216         CXFA_Node* pXFAChild =
1217             m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1218         if (!pXFAChild)
1219           return;
1220 
1221         pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, L"", false, false);
1222         pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsCurValue, false,
1223                                         false);
1224         pXFANode->InsertChild(pXFAChild, nullptr);
1225         pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
1226         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1227       }
1228       wsValueTextBuf << wsCurValue;
1229       wsCurValueTextBuf.Clear();
1230     }
1231     pXMLCurValueNode = nullptr;
1232   }
1233   WideString wsNodeValue = wsValueTextBuf.MakeString();
1234   pXFANode->JSObject()->SetCData(XFA_Attribute::Value, wsNodeValue, false,
1235                                  false);
1236 }
1237 
ParseInstruction(CXFA_Node * pXFANode,CFX_XMLInstruction * pXMLInstruction,XFA_PacketType ePacketID)1238 void CXFA_SimpleParser::ParseInstruction(CXFA_Node* pXFANode,
1239                                          CFX_XMLInstruction* pXMLInstruction,
1240                                          XFA_PacketType ePacketID) {
1241   if (!m_bDocumentParser)
1242     return;
1243 
1244   WideString wsTargetName = pXMLInstruction->GetName();
1245   const std::vector<WideString>& target_data = pXMLInstruction->GetTargetData();
1246   if (wsTargetName == L"originalXFAVersion") {
1247     if (target_data.size() > 1 &&
1248         (pXFANode->GetDocument()->RecognizeXFAVersionNumber(target_data[0]) !=
1249          XFA_VERSION_UNKNOWN) &&
1250         target_data[1] == L"v2.7-scripting:1") {
1251       pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_Scripting, true);
1252     }
1253   } else if (wsTargetName == L"acrobat") {
1254     if (target_data.size() > 1 && target_data[0] == L"JavaScript" &&
1255         target_data[1] == L"strictScoping") {
1256       pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_StrictScoping, true);
1257     }
1258   }
1259 }
1260 
CloseParser()1261 void CXFA_SimpleParser::CloseParser() {
1262   m_pXMLDoc.reset();
1263   m_pStream.Reset();
1264 }
1265