1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "xfa/fxfa/parser/cxfa_simple_parser.h"
8 
9 #include <utility>
10 
11 #include "core/fxcrt/fx_ext.h"
12 #include "third_party/base/ptr_util.h"
13 #include "xfa/fgas/crt/fgas_codepage.h"
14 #include "xfa/fxfa/fxfa.h"
15 #include "xfa/fxfa/parser/cxfa_document.h"
16 #include "xfa/fxfa/parser/cxfa_widetextread.h"
17 #include "xfa/fxfa/parser/cxfa_xml_parser.h"
18 #include "xfa/fxfa/parser/xfa_basic_data.h"
19 #include "xfa/fxfa/parser/xfa_utils.h"
20 #include "xfa/fxfa/xfa_checksum.h"
21 
22 namespace {
23 
GetDocumentNode(CFDE_XMLDoc * pXMLDoc,bool bVerifyWellFormness=false)24 CFDE_XMLNode* GetDocumentNode(CFDE_XMLDoc* pXMLDoc,
25                               bool bVerifyWellFormness = false) {
26   if (!pXMLDoc)
27     return nullptr;
28 
29   for (CFDE_XMLNode* pXMLNode =
30            pXMLDoc->GetRoot()->GetNodeItem(CFDE_XMLNode::FirstChild);
31        pXMLNode; pXMLNode = pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
32     if (pXMLNode->GetType() != FDE_XMLNODE_Element)
33       continue;
34 
35     if (!bVerifyWellFormness)
36       return pXMLNode;
37 
38     for (CFDE_XMLNode* pNextNode =
39              pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling);
40          pNextNode;
41          pNextNode = pNextNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
42       if (pNextNode->GetType() == FDE_XMLNODE_Element)
43         return nullptr;
44     }
45     return pXMLNode;
46   }
47   return nullptr;
48 }
49 
GetElementTagNamespaceURI(CFDE_XMLElement * pElement,CFX_WideString & wsNamespaceURI)50 void GetElementTagNamespaceURI(CFDE_XMLElement* pElement,
51                                CFX_WideString& wsNamespaceURI) {
52   CFX_WideString wsNodeStr;
53   pElement->GetNamespacePrefix(wsNodeStr);
54   if (!XFA_FDEExtension_ResolveNamespaceQualifier(
55           pElement, wsNodeStr.AsStringC(), wsNamespaceURI)) {
56     wsNamespaceURI.clear();
57   }
58 }
59 
MatchNodeName(CFDE_XMLNode * pNode,const CFX_WideStringC & wsLocalTagName,const CFX_WideStringC & wsNamespaceURIPrefix,uint32_t eMatchFlags=XFA_XDPPACKET_FLAGS_NOMATCH)60 bool MatchNodeName(CFDE_XMLNode* pNode,
61                    const CFX_WideStringC& wsLocalTagName,
62                    const CFX_WideStringC& wsNamespaceURIPrefix,
63                    uint32_t eMatchFlags = XFA_XDPPACKET_FLAGS_NOMATCH) {
64   if (!pNode || pNode->GetType() != FDE_XMLNODE_Element)
65     return false;
66 
67   CFDE_XMLElement* pElement = reinterpret_cast<CFDE_XMLElement*>(pNode);
68   CFX_WideString wsNodeStr;
69   pElement->GetLocalTagName(wsNodeStr);
70   if (wsNodeStr != wsLocalTagName)
71     return false;
72 
73   GetElementTagNamespaceURI(pElement, wsNodeStr);
74   if (eMatchFlags & XFA_XDPPACKET_FLAGS_NOMATCH)
75     return true;
76   if (eMatchFlags & XFA_XDPPACKET_FLAGS_PREFIXMATCH) {
77     return wsNodeStr.Left(wsNamespaceURIPrefix.GetLength()) ==
78            wsNamespaceURIPrefix;
79   }
80   return wsNodeStr == wsNamespaceURIPrefix;
81 }
82 
GetAttributeLocalName(const CFX_WideStringC & wsAttributeName,CFX_WideString & wsLocalAttrName)83 bool GetAttributeLocalName(const CFX_WideStringC& wsAttributeName,
84                            CFX_WideString& wsLocalAttrName) {
85   CFX_WideString wsAttrName(wsAttributeName);
86   FX_STRSIZE iFind = wsAttrName.Find(L':', 0);
87   if (iFind < 0) {
88     wsLocalAttrName = wsAttrName;
89     return false;
90   }
91   wsLocalAttrName = wsAttrName.Right(wsAttrName.GetLength() - iFind - 1);
92   return true;
93 }
94 
ResolveAttribute(CFDE_XMLElement * pElement,const CFX_WideStringC & wsAttributeName,CFX_WideString & wsLocalAttrName,CFX_WideString & wsNamespaceURI)95 bool ResolveAttribute(CFDE_XMLElement* pElement,
96                       const CFX_WideStringC& wsAttributeName,
97                       CFX_WideString& wsLocalAttrName,
98                       CFX_WideString& wsNamespaceURI) {
99   CFX_WideString wsAttrName(wsAttributeName);
100   CFX_WideString wsNSPrefix;
101   if (GetAttributeLocalName(wsAttributeName, wsLocalAttrName)) {
102     wsNSPrefix = wsAttrName.Left(wsAttributeName.GetLength() -
103                                  wsLocalAttrName.GetLength() - 1);
104   }
105   if (wsLocalAttrName == L"xmlns" || wsNSPrefix == L"xmlns" ||
106       wsNSPrefix == L"xml") {
107     return false;
108   }
109   if (!XFA_FDEExtension_ResolveNamespaceQualifier(
110           pElement, wsNSPrefix.AsStringC(), wsNamespaceURI)) {
111     wsNamespaceURI.clear();
112     return false;
113   }
114   return true;
115 }
116 
FindAttributeWithNS(CFDE_XMLElement * pElement,const CFX_WideStringC & wsLocalAttributeName,const CFX_WideStringC & wsNamespaceURIPrefix,CFX_WideString & wsValue,bool bMatchNSAsPrefix=false)117 bool FindAttributeWithNS(CFDE_XMLElement* pElement,
118                          const CFX_WideStringC& wsLocalAttributeName,
119                          const CFX_WideStringC& wsNamespaceURIPrefix,
120                          CFX_WideString& wsValue,
121                          bool bMatchNSAsPrefix = false) {
122   if (!pElement)
123     return false;
124 
125   CFX_WideString wsAttrName;
126   CFX_WideString wsAttrValue;
127   CFX_WideString wsAttrNS;
128   for (int32_t iAttrCount = pElement->CountAttributes(), i = 0; i < iAttrCount;
129        i++) {
130     pElement->GetAttribute(i, wsAttrName, wsAttrValue);
131     FX_STRSIZE iFind = wsAttrName.Find(L':', 0);
132     CFX_WideString wsNSPrefix;
133     if (iFind < 0) {
134       if (wsLocalAttributeName != wsAttrName)
135         continue;
136     } else {
137       if (wsLocalAttributeName !=
138           wsAttrName.Right(wsAttrName.GetLength() - iFind - 1)) {
139         continue;
140       }
141       wsNSPrefix = wsAttrName.Left(iFind);
142     }
143     if (!XFA_FDEExtension_ResolveNamespaceQualifier(
144             pElement, wsNSPrefix.AsStringC(), wsAttrNS)) {
145       continue;
146     }
147     if (bMatchNSAsPrefix) {
148       if (wsAttrNS.Left(wsNamespaceURIPrefix.GetLength()) !=
149           wsNamespaceURIPrefix) {
150         continue;
151       }
152     } else {
153       if (wsAttrNS != wsNamespaceURIPrefix)
154         continue;
155     }
156     wsValue = wsAttrValue;
157     return true;
158   }
159   return false;
160 }
161 
GetDataSetsFromXDP(CFDE_XMLNode * pXMLDocumentNode)162 CFDE_XMLNode* GetDataSetsFromXDP(CFDE_XMLNode* pXMLDocumentNode) {
163   if (MatchNodeName(pXMLDocumentNode,
164                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName,
165                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
166                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
167     return pXMLDocumentNode;
168   }
169   if (!MatchNodeName(pXMLDocumentNode,
170                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName,
171                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI,
172                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) {
173     return nullptr;
174   }
175   for (CFDE_XMLNode* pDatasetsNode =
176            pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
177        pDatasetsNode;
178        pDatasetsNode = pDatasetsNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
179     if (!MatchNodeName(pDatasetsNode,
180                        XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName,
181                        XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
182                        XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
183       continue;
184     }
185     return pDatasetsNode;
186   }
187   return nullptr;
188 }
189 
IsStringAllWhitespace(CFX_WideString wsText)190 bool IsStringAllWhitespace(CFX_WideString wsText) {
191   wsText.TrimRight(L"\x20\x9\xD\xA");
192   return wsText.IsEmpty();
193 }
194 
ConvertXMLToPlainText(CFDE_XMLElement * pRootXMLNode,CFX_WideString & wsOutput)195 void ConvertXMLToPlainText(CFDE_XMLElement* pRootXMLNode,
196                            CFX_WideString& wsOutput) {
197   for (CFDE_XMLNode* pXMLChild =
198            pRootXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
199        pXMLChild;
200        pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
201     switch (pXMLChild->GetType()) {
202       case FDE_XMLNODE_Element: {
203         CFX_WideString wsTextData;
204         static_cast<CFDE_XMLElement*>(pXMLChild)->GetTextData(wsTextData);
205         wsTextData += L"\n";
206         wsOutput += wsTextData;
207         break;
208       }
209       case FDE_XMLNODE_Text: {
210         CFX_WideString wsText;
211         static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsText);
212         if (IsStringAllWhitespace(wsText))
213           continue;
214 
215         wsOutput = wsText;
216         break;
217       }
218       case FDE_XMLNODE_CharData: {
219         CFX_WideString wsCharData;
220         static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsCharData);
221         if (IsStringAllWhitespace(wsCharData))
222           continue;
223 
224         wsOutput = wsCharData;
225         break;
226       }
227       default:
228         ASSERT(false);
229         break;
230     }
231   }
232 }
233 
GetPacketByName(const CFX_WideStringC & wsName)234 const XFA_PACKETINFO* GetPacketByName(const CFX_WideStringC& wsName) {
235   if (wsName.IsEmpty())
236     return nullptr;
237 
238   uint32_t uHash = FX_HashCode_GetW(wsName, false);
239   int32_t iStart = 0;
240   int32_t iEnd = g_iXFAPacketCount - 1;
241   do {
242     int32_t iMid = (iStart + iEnd) / 2;
243     const XFA_PACKETINFO* pInfo = g_XFAPacketData + iMid;
244     if (uHash == pInfo->uHash)
245       return pInfo;
246     if (uHash < pInfo->uHash)
247       iEnd = iMid - 1;
248     else
249       iStart = iMid + 1;
250   } while (iStart <= iEnd);
251   return nullptr;
252 }
253 
254 }  // namespace
255 
XFA_RecognizeRichText(CFDE_XMLElement * pRichTextXMLNode)256 bool XFA_RecognizeRichText(CFDE_XMLElement* pRichTextXMLNode) {
257   if (pRichTextXMLNode) {
258     CFX_WideString wsNamespaceURI;
259     GetElementTagNamespaceURI(pRichTextXMLNode, wsNamespaceURI);
260     if (wsNamespaceURI == L"http://www.w3.org/1999/xhtml")
261       return true;
262   }
263   return false;
264 }
265 
CXFA_SimpleParser(CXFA_Document * pFactory,bool bDocumentParser)266 CXFA_SimpleParser::CXFA_SimpleParser(CXFA_Document* pFactory,
267                                      bool bDocumentParser)
268     : m_pXMLParser(nullptr),
269       m_pXMLDoc(nullptr),
270       m_pStream(nullptr),
271       m_pFileRead(nullptr),
272       m_pFactory(pFactory),
273       m_pRootNode(nullptr),
274       m_ePacketID(XFA_XDPPACKET_UNKNOWN),
275       m_bDocumentParser(bDocumentParser) {}
276 
~CXFA_SimpleParser()277 CXFA_SimpleParser::~CXFA_SimpleParser() {}
278 
SetFactory(CXFA_Document * pFactory)279 void CXFA_SimpleParser::SetFactory(CXFA_Document* pFactory) {
280   m_pFactory = pFactory;
281 }
282 
StartParse(const CFX_RetainPtr<IFX_SeekableReadStream> & pStream,XFA_XDPPACKET ePacketID)283 int32_t CXFA_SimpleParser::StartParse(
284     const CFX_RetainPtr<IFX_SeekableReadStream>& pStream,
285     XFA_XDPPACKET ePacketID) {
286   CloseParser();
287   m_pFileRead = pStream;
288   m_pStream = IFGAS_Stream::CreateStream(
289       pStream, FX_STREAMACCESS_Read | FX_STREAMACCESS_Text);
290   if (!m_pStream)
291     return XFA_PARSESTATUS_StreamErr;
292 
293   uint16_t wCodePage = m_pStream->GetCodePage();
294   if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE &&
295       wCodePage != FX_CODEPAGE_UTF8) {
296     m_pStream->SetCodePage(FX_CODEPAGE_UTF8);
297   }
298   m_pXMLDoc = pdfium::MakeUnique<CFDE_XMLDoc>();
299   auto pNewParser =
300       pdfium::MakeUnique<CXFA_XMLParser>(m_pXMLDoc->GetRoot(), m_pStream);
301   m_pXMLParser = pNewParser.get();
302   if (!m_pXMLDoc->LoadXML(std::move(pNewParser)))
303     return XFA_PARSESTATUS_StatusErr;
304 
305   m_ePacketID = ePacketID;
306   return XFA_PARSESTATUS_Ready;
307 }
308 
DoParse(IFX_Pause * pPause)309 int32_t CXFA_SimpleParser::DoParse(IFX_Pause* pPause) {
310   if (!m_pXMLDoc || m_ePacketID == XFA_XDPPACKET_UNKNOWN)
311     return XFA_PARSESTATUS_StatusErr;
312 
313   int32_t iRet = m_pXMLDoc->DoLoad(pPause);
314   if (iRet < 0)
315     return XFA_PARSESTATUS_SyntaxErr;
316   if (iRet < 100)
317     return iRet / 2;
318 
319   m_pRootNode = ParseAsXDPPacket(GetDocumentNode(m_pXMLDoc.get()), m_ePacketID);
320   m_pXMLDoc->CloseXML();
321   m_pStream.Reset();
322   if (!m_pRootNode)
323     return XFA_PARSESTATUS_StatusErr;
324 
325   return XFA_PARSESTATUS_Done;
326 }
327 
ParseXMLData(const CFX_WideString & wsXML,CFDE_XMLNode * & pXMLNode,IFX_Pause * pPause)328 int32_t CXFA_SimpleParser::ParseXMLData(const CFX_WideString& wsXML,
329                                         CFDE_XMLNode*& pXMLNode,
330                                         IFX_Pause* pPause) {
331   CloseParser();
332   pXMLNode = nullptr;
333   m_pXMLDoc = pdfium::MakeUnique<CFDE_XMLDoc>();
334   auto pStream = pdfium::MakeRetain<CXFA_WideTextRead>(wsXML);
335   auto pParser =
336       pdfium::MakeUnique<CXFA_XMLParser>(m_pXMLDoc->GetRoot(), pStream);
337   pParser->m_dwCheckStatus = 0x03;
338   if (!m_pXMLDoc->LoadXML(std::move(pParser)))
339     return XFA_PARSESTATUS_StatusErr;
340 
341   int32_t iRet = m_pXMLDoc->DoLoad(pPause);
342   if (iRet < 0 || iRet >= 100)
343     m_pXMLDoc->CloseXML();
344   if (iRet < 0)
345     return XFA_PARSESTATUS_SyntaxErr;
346   if (iRet < 100)
347     return iRet / 2;
348 
349   pXMLNode = GetDocumentNode(m_pXMLDoc.get());
350   return XFA_PARSESTATUS_Done;
351 }
352 
ConstructXFANode(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLNode)353 void CXFA_SimpleParser::ConstructXFANode(CXFA_Node* pXFANode,
354                                          CFDE_XMLNode* pXMLNode) {
355   XFA_XDPPACKET ePacketID = (XFA_XDPPACKET)pXFANode->GetPacketID();
356   if (ePacketID == XFA_XDPPACKET_Datasets) {
357     if (pXFANode->GetElementType() == XFA_Element::DataValue) {
358       for (CFDE_XMLNode* pXMLChild =
359                pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
360            pXMLChild;
361            pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
362         FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
363         if (eNodeType == FDE_XMLNODE_Instruction)
364           continue;
365 
366         if (eNodeType == FDE_XMLNODE_Element) {
367           CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
368                                                         XFA_Element::DataValue);
369           if (!pXFAChild)
370             return;
371 
372           CFX_WideString wsNodeStr;
373           CFDE_XMLElement* child = static_cast<CFDE_XMLElement*>(pXMLChild);
374           child->GetLocalTagName(wsNodeStr);
375           pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr);
376           CFX_WideString wsChildValue;
377           XFA_GetPlainTextFromRichText(child, wsChildValue);
378           if (!wsChildValue.IsEmpty())
379             pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsChildValue);
380 
381           pXFANode->InsertChild(pXFAChild);
382           pXFAChild->SetXMLMappingNode(pXMLChild);
383           pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
384           break;
385         }
386       }
387       m_pRootNode = pXFANode;
388     } else {
389       m_pRootNode = DataLoader(pXFANode, pXMLNode, true);
390     }
391   } else if (pXFANode->IsContentNode()) {
392     ParseContentNode(pXFANode, pXMLNode, ePacketID);
393     m_pRootNode = pXFANode;
394   } else {
395     m_pRootNode = NormalLoader(pXFANode, pXMLNode, ePacketID, true);
396   }
397 }
398 
GetRootNode() const399 CXFA_Node* CXFA_SimpleParser::GetRootNode() const {
400   return m_pRootNode;
401 }
402 
GetXMLDoc() const403 CFDE_XMLDoc* CXFA_SimpleParser::GetXMLDoc() const {
404   return m_pXMLDoc.get();
405 }
406 
XFA_FDEExtension_ResolveNamespaceQualifier(CFDE_XMLElement * pNode,const CFX_WideStringC & wsQualifier,CFX_WideString & wsNamespaceURI)407 bool XFA_FDEExtension_ResolveNamespaceQualifier(
408     CFDE_XMLElement* pNode,
409     const CFX_WideStringC& wsQualifier,
410     CFX_WideString& wsNamespaceURI) {
411   if (!pNode)
412     return false;
413 
414   CFDE_XMLNode* pFakeRoot = pNode->GetNodeItem(CFDE_XMLNode::Root);
415   CFX_WideString wsNSAttribute;
416   bool bRet = false;
417   if (wsQualifier.IsEmpty()) {
418     wsNSAttribute = L"xmlns";
419     bRet = true;
420   } else {
421     wsNSAttribute = L"xmlns:" + wsQualifier;
422   }
423   for (; pNode != pFakeRoot; pNode = static_cast<CFDE_XMLElement*>(
424                                  pNode->GetNodeItem(CFDE_XMLNode::Parent))) {
425     if (pNode->GetType() != FDE_XMLNODE_Element)
426       continue;
427 
428     if (pNode->HasAttribute(wsNSAttribute.c_str())) {
429       pNode->GetString(wsNSAttribute.c_str(), wsNamespaceURI);
430       return true;
431     }
432   }
433   wsNamespaceURI.clear();
434   return bRet;
435 }
436 
ParseAsXDPPacket(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)437 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket(CFDE_XMLNode* pXMLDocumentNode,
438                                                XFA_XDPPACKET ePacketID) {
439   switch (ePacketID) {
440     case XFA_XDPPACKET_UNKNOWN:
441       return nullptr;
442     case XFA_XDPPACKET_XDP:
443       return ParseAsXDPPacket_XDP(pXMLDocumentNode, ePacketID);
444     case XFA_XDPPACKET_Config:
445       return ParseAsXDPPacket_Config(pXMLDocumentNode, ePacketID);
446     case XFA_XDPPACKET_Template:
447     case XFA_XDPPACKET_Form:
448       return ParseAsXDPPacket_TemplateForm(pXMLDocumentNode, ePacketID);
449     case XFA_XDPPACKET_Datasets:
450       return ParseAsXDPPacket_Data(pXMLDocumentNode, ePacketID);
451     case XFA_XDPPACKET_Xdc:
452       return ParseAsXDPPacket_Xdc(pXMLDocumentNode, ePacketID);
453     case XFA_XDPPACKET_LocaleSet:
454     case XFA_XDPPACKET_ConnectionSet:
455     case XFA_XDPPACKET_SourceSet:
456       return ParseAsXDPPacket_LocaleConnectionSourceSet(pXMLDocumentNode,
457                                                         ePacketID);
458     default:
459       return ParseAsXDPPacket_User(pXMLDocumentNode, ePacketID);
460   }
461 }
462 
ParseAsXDPPacket_XDP(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)463 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_XDP(
464     CFDE_XMLNode* pXMLDocumentNode,
465     XFA_XDPPACKET ePacketID) {
466   if (!MatchNodeName(pXMLDocumentNode,
467                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName,
468                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI,
469                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) {
470     return nullptr;
471   }
472   CXFA_Node* pXFARootNode =
473       m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_Element::Xfa);
474   if (!pXFARootNode)
475     return nullptr;
476 
477   m_pRootNode = pXFARootNode;
478   pXFARootNode->SetCData(XFA_ATTRIBUTE_Name, L"xfa");
479   {
480     CFDE_XMLElement* pElement = static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
481     int32_t iAttributeCount = pElement->CountAttributes();
482     for (int32_t i = 0; i < iAttributeCount; i++) {
483       CFX_WideString wsAttriName, wsAttriValue;
484       pElement->GetAttribute(i, wsAttriName, wsAttriValue);
485       if (wsAttriName == L"uuid")
486         pXFARootNode->SetCData(XFA_ATTRIBUTE_Uuid, wsAttriValue);
487       else if (wsAttriName == L"timeStamp")
488         pXFARootNode->SetCData(XFA_ATTRIBUTE_TimeStamp, wsAttriValue);
489     }
490   }
491 
492   CFDE_XMLNode* pXMLConfigDOMRoot = nullptr;
493   CXFA_Node* pXFAConfigDOMRoot = nullptr;
494   {
495     for (CFDE_XMLNode* pChildItem =
496              pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
497          pChildItem;
498          pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) {
499       const XFA_PACKETINFO* pPacketInfo =
500           XFA_GetPacketByIndex(XFA_PACKET_Config);
501       if (!MatchNodeName(pChildItem, pPacketInfo->pName, pPacketInfo->pURI,
502                          pPacketInfo->eFlags)) {
503         continue;
504       }
505       if (pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) {
506         return nullptr;
507       }
508       pXMLConfigDOMRoot = pChildItem;
509       pXFAConfigDOMRoot =
510           ParseAsXDPPacket_Config(pXMLConfigDOMRoot, XFA_XDPPACKET_Config);
511       pXFARootNode->InsertChild(pXFAConfigDOMRoot, nullptr);
512     }
513   }
514 
515   CFDE_XMLNode* pXMLDatasetsDOMRoot = nullptr;
516   CFDE_XMLNode* pXMLFormDOMRoot = nullptr;
517   CFDE_XMLNode* pXMLTemplateDOMRoot = nullptr;
518   {
519     for (CFDE_XMLNode* pChildItem =
520              pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
521          pChildItem;
522          pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) {
523       if (!pChildItem || pChildItem->GetType() != FDE_XMLNODE_Element)
524         continue;
525       if (pChildItem == pXMLConfigDOMRoot)
526         continue;
527 
528       CFDE_XMLElement* pElement =
529           reinterpret_cast<CFDE_XMLElement*>(pChildItem);
530       CFX_WideString wsPacketName;
531       pElement->GetLocalTagName(wsPacketName);
532       const XFA_PACKETINFO* pPacketInfo =
533           GetPacketByName(wsPacketName.AsStringC());
534       if (pPacketInfo && pPacketInfo->pURI) {
535         if (!MatchNodeName(pElement, pPacketInfo->pName, pPacketInfo->pURI,
536                            pPacketInfo->eFlags)) {
537           pPacketInfo = nullptr;
538         }
539       }
540       XFA_XDPPACKET ePacket =
541           pPacketInfo ? pPacketInfo->eName : XFA_XDPPACKET_USER;
542       if (ePacket == XFA_XDPPACKET_XDP)
543         continue;
544       if (ePacket == XFA_XDPPACKET_Datasets) {
545         if (pXMLDatasetsDOMRoot)
546           return nullptr;
547 
548         pXMLDatasetsDOMRoot = pElement;
549       } else if (ePacket == XFA_XDPPACKET_Form) {
550         if (pXMLFormDOMRoot)
551           return nullptr;
552 
553         pXMLFormDOMRoot = pElement;
554       } else if (ePacket == XFA_XDPPACKET_Template) {
555         if (pXMLTemplateDOMRoot) {
556           // Found a duplicate template packet.
557           return nullptr;
558         }
559         CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
560         if (pPacketNode) {
561           pXMLTemplateDOMRoot = pElement;
562           pXFARootNode->InsertChild(pPacketNode);
563         }
564       } else {
565         CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
566         if (pPacketNode) {
567           if (pPacketInfo &&
568               (pPacketInfo->eFlags & XFA_XDPPACKET_FLAGS_SUPPORTONE) &&
569               pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) {
570             return nullptr;
571           }
572           pXFARootNode->InsertChild(pPacketNode);
573         }
574       }
575     }
576   }
577 
578   if (!pXMLTemplateDOMRoot) {
579     // No template is found.
580     return nullptr;
581   }
582   if (pXMLDatasetsDOMRoot) {
583     CXFA_Node* pPacketNode =
584         ParseAsXDPPacket(pXMLDatasetsDOMRoot, XFA_XDPPACKET_Datasets);
585     if (pPacketNode)
586       pXFARootNode->InsertChild(pPacketNode);
587   }
588   if (pXMLFormDOMRoot) {
589     CXFA_Node* pPacketNode =
590         ParseAsXDPPacket(pXMLFormDOMRoot, XFA_XDPPACKET_Form);
591     if (pPacketNode)
592       pXFARootNode->InsertChild(pPacketNode);
593   }
594   pXFARootNode->SetXMLMappingNode(pXMLDocumentNode);
595   return pXFARootNode;
596 }
597 
ParseAsXDPPacket_Config(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)598 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Config(
599     CFDE_XMLNode* pXMLDocumentNode,
600     XFA_XDPPACKET ePacketID) {
601   if (!MatchNodeName(pXMLDocumentNode,
602                      XFA_GetPacketByIndex(XFA_PACKET_Config)->pName,
603                      XFA_GetPacketByIndex(XFA_PACKET_Config)->pURI,
604                      XFA_GetPacketByIndex(XFA_PACKET_Config)->eFlags)) {
605     return nullptr;
606   }
607   CXFA_Node* pNode =
608       m_pFactory->CreateNode(XFA_XDPPACKET_Config, XFA_Element::Config);
609   if (!pNode)
610     return nullptr;
611 
612   pNode->SetCData(XFA_ATTRIBUTE_Name,
613                   XFA_GetPacketByIndex(XFA_PACKET_Config)->pName);
614   if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
615     return nullptr;
616 
617   pNode->SetXMLMappingNode(pXMLDocumentNode);
618   return pNode;
619 }
620 
ParseAsXDPPacket_TemplateForm(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)621 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_TemplateForm(
622     CFDE_XMLNode* pXMLDocumentNode,
623     XFA_XDPPACKET ePacketID) {
624   CXFA_Node* pNode = nullptr;
625   if (ePacketID == XFA_XDPPACKET_Template) {
626     if (MatchNodeName(pXMLDocumentNode,
627                       XFA_GetPacketByIndex(XFA_PACKET_Template)->pName,
628                       XFA_GetPacketByIndex(XFA_PACKET_Template)->pURI,
629                       XFA_GetPacketByIndex(XFA_PACKET_Template)->eFlags)) {
630       pNode =
631           m_pFactory->CreateNode(XFA_XDPPACKET_Template, XFA_Element::Template);
632       if (!pNode)
633         return nullptr;
634 
635       pNode->SetCData(XFA_ATTRIBUTE_Name,
636                       XFA_GetPacketByIndex(XFA_PACKET_Template)->pName);
637       if (m_bDocumentParser) {
638         CFX_WideString wsNamespaceURI;
639         CFDE_XMLElement* pXMLDocumentElement =
640             static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
641         pXMLDocumentElement->GetNamespaceURI(wsNamespaceURI);
642         if (wsNamespaceURI.IsEmpty())
643           pXMLDocumentElement->GetString(L"xmlns:xfa", wsNamespaceURI);
644 
645         pNode->GetDocument()->RecognizeXFAVersionNumber(wsNamespaceURI);
646       }
647       if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
648         return nullptr;
649     }
650   } else if (ePacketID == XFA_XDPPACKET_Form) {
651     if (MatchNodeName(pXMLDocumentNode,
652                       XFA_GetPacketByIndex(XFA_PACKET_Form)->pName,
653                       XFA_GetPacketByIndex(XFA_PACKET_Form)->pURI,
654                       XFA_GetPacketByIndex(XFA_PACKET_Form)->eFlags)) {
655       CFDE_XMLElement* pXMLDocumentElement =
656           static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
657       CFX_WideString wsChecksum;
658       pXMLDocumentElement->GetString(L"checksum", wsChecksum);
659       if (wsChecksum.GetLength() != 28 ||
660           m_pXMLParser->m_dwCheckStatus != 0x03) {
661         return nullptr;
662       }
663       std::unique_ptr<CXFA_ChecksumContext> pChecksum(new CXFA_ChecksumContext);
664       pChecksum->StartChecksum();
665       pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[0],
666                                 m_pXMLParser->m_nSize[0]);
667       pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[1],
668                                 m_pXMLParser->m_nSize[1]);
669       pChecksum->FinishChecksum();
670       CFX_ByteString bsCheck = pChecksum->GetChecksum();
671       if (bsCheck != wsChecksum.UTF8Encode())
672         return nullptr;
673 
674       pNode = m_pFactory->CreateNode(XFA_XDPPACKET_Form, XFA_Element::Form);
675       if (!pNode)
676         return nullptr;
677 
678       pNode->SetCData(XFA_ATTRIBUTE_Name,
679                       XFA_GetPacketByIndex(XFA_PACKET_Form)->pName);
680       pNode->SetAttribute(XFA_ATTRIBUTE_Checksum, wsChecksum.AsStringC());
681       CXFA_Node* pTemplateRoot =
682           m_pRootNode->GetFirstChildByClass(XFA_Element::Template);
683       CXFA_Node* pTemplateChosen =
684           pTemplateRoot
685               ? pTemplateRoot->GetFirstChildByClass(XFA_Element::Subform)
686               : nullptr;
687       bool bUseAttribute = true;
688       if (pTemplateChosen &&
689           pTemplateChosen->GetEnum(XFA_ATTRIBUTE_RestoreState) !=
690               XFA_ATTRIBUTEENUM_Auto) {
691         bUseAttribute = false;
692       }
693       if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, bUseAttribute))
694         return nullptr;
695     }
696   }
697   if (pNode)
698     pNode->SetXMLMappingNode(pXMLDocumentNode);
699 
700   return pNode;
701 }
702 
ParseAsXDPPacket_Data(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)703 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Data(
704     CFDE_XMLNode* pXMLDocumentNode,
705     XFA_XDPPACKET ePacketID) {
706   CFDE_XMLNode* pDatasetsXMLNode = GetDataSetsFromXDP(pXMLDocumentNode);
707   if (pDatasetsXMLNode) {
708     CXFA_Node* pNode =
709         m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_Element::DataModel);
710     if (!pNode)
711       return nullptr;
712 
713     pNode->SetCData(XFA_ATTRIBUTE_Name,
714                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName);
715     if (!DataLoader(pNode, pDatasetsXMLNode, false))
716       return nullptr;
717 
718     pNode->SetXMLMappingNode(pDatasetsXMLNode);
719     return pNode;
720   }
721 
722   CFDE_XMLNode* pDataXMLNode = nullptr;
723   if (MatchNodeName(pXMLDocumentNode, L"data",
724                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
725                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
726     static_cast<CFDE_XMLElement*>(pXMLDocumentNode)
727         ->RemoveAttribute(L"xmlns:xfa");
728     pDataXMLNode = pXMLDocumentNode;
729   } else {
730     CFDE_XMLElement* pDataElement = new CFDE_XMLElement(L"xfa:data");
731     CFDE_XMLNode* pParentXMLNode =
732         pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::Parent);
733     if (pParentXMLNode)
734       pParentXMLNode->RemoveChildNode(pXMLDocumentNode);
735 
736     ASSERT(pXMLDocumentNode->GetType() == FDE_XMLNODE_Element);
737     if (pXMLDocumentNode->GetType() == FDE_XMLNODE_Element) {
738       static_cast<CFDE_XMLElement*>(pXMLDocumentNode)
739           ->RemoveAttribute(L"xmlns:xfa");
740     }
741     pDataElement->InsertChildNode(pXMLDocumentNode);
742     pDataXMLNode = pDataElement;
743   }
744 
745   if (pDataXMLNode) {
746     CXFA_Node* pNode =
747         m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_Element::DataGroup);
748     if (!pNode) {
749       if (pDataXMLNode != pXMLDocumentNode)
750         delete pDataXMLNode;
751       return nullptr;
752     }
753     CFX_WideString wsLocalName;
754     static_cast<CFDE_XMLElement*>(pDataXMLNode)->GetLocalTagName(wsLocalName);
755     pNode->SetCData(XFA_ATTRIBUTE_Name, wsLocalName);
756     if (!DataLoader(pNode, pDataXMLNode, true))
757       return nullptr;
758 
759     pNode->SetXMLMappingNode(pDataXMLNode);
760     if (pDataXMLNode != pXMLDocumentNode)
761       pNode->SetFlag(XFA_NodeFlag_OwnXMLNode, false);
762     return pNode;
763   }
764   return nullptr;
765 }
766 
ParseAsXDPPacket_LocaleConnectionSourceSet(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)767 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_LocaleConnectionSourceSet(
768     CFDE_XMLNode* pXMLDocumentNode,
769     XFA_XDPPACKET ePacketID) {
770   CXFA_Node* pNode = nullptr;
771   if (ePacketID == XFA_XDPPACKET_LocaleSet) {
772     if (MatchNodeName(pXMLDocumentNode,
773                       XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName,
774                       XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pURI,
775                       XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->eFlags)) {
776       pNode = m_pFactory->CreateNode(XFA_XDPPACKET_LocaleSet,
777                                      XFA_Element::LocaleSet);
778       if (!pNode)
779         return nullptr;
780 
781       pNode->SetCData(XFA_ATTRIBUTE_Name,
782                       XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName);
783       if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
784         return nullptr;
785     }
786   } else if (ePacketID == XFA_XDPPACKET_ConnectionSet) {
787     if (MatchNodeName(pXMLDocumentNode,
788                       XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName,
789                       XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pURI,
790                       XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->eFlags)) {
791       pNode = m_pFactory->CreateNode(XFA_XDPPACKET_ConnectionSet,
792                                      XFA_Element::ConnectionSet);
793       if (!pNode)
794         return nullptr;
795 
796       pNode->SetCData(XFA_ATTRIBUTE_Name,
797                       XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName);
798       if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
799         return nullptr;
800     }
801   } else if (ePacketID == XFA_XDPPACKET_SourceSet) {
802     if (MatchNodeName(pXMLDocumentNode,
803                       XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName,
804                       XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pURI,
805                       XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->eFlags)) {
806       pNode = m_pFactory->CreateNode(XFA_XDPPACKET_SourceSet,
807                                      XFA_Element::SourceSet);
808       if (!pNode)
809         return nullptr;
810 
811       pNode->SetCData(XFA_ATTRIBUTE_Name,
812                       XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName);
813       if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
814         return nullptr;
815     }
816   }
817   if (pNode)
818     pNode->SetXMLMappingNode(pXMLDocumentNode);
819   return pNode;
820 }
821 
ParseAsXDPPacket_Xdc(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)822 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Xdc(
823     CFDE_XMLNode* pXMLDocumentNode,
824     XFA_XDPPACKET ePacketID) {
825   if (!MatchNodeName(pXMLDocumentNode,
826                      XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName,
827                      XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pURI,
828                      XFA_GetPacketByIndex(XFA_PACKET_Xdc)->eFlags))
829     return nullptr;
830 
831   CXFA_Node* pNode =
832       m_pFactory->CreateNode(XFA_XDPPACKET_Xdc, XFA_Element::Xdc);
833   if (!pNode)
834     return nullptr;
835 
836   pNode->SetCData(XFA_ATTRIBUTE_Name,
837                   XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName);
838   pNode->SetXMLMappingNode(pXMLDocumentNode);
839   return pNode;
840 }
841 
ParseAsXDPPacket_User(CFDE_XMLNode * pXMLDocumentNode,XFA_XDPPACKET ePacketID)842 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_User(
843     CFDE_XMLNode* pXMLDocumentNode,
844     XFA_XDPPACKET ePacketID) {
845   CXFA_Node* pNode =
846       m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_Element::Packet);
847   if (!pNode)
848     return nullptr;
849 
850   CFX_WideString wsName;
851   static_cast<CFDE_XMLElement*>(pXMLDocumentNode)->GetLocalTagName(wsName);
852   pNode->SetCData(XFA_ATTRIBUTE_Name, wsName);
853   if (!UserPacketLoader(pNode, pXMLDocumentNode))
854     return nullptr;
855 
856   pNode->SetXMLMappingNode(pXMLDocumentNode);
857   return pNode;
858 }
859 
UserPacketLoader(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLDoc)860 CXFA_Node* CXFA_SimpleParser::UserPacketLoader(CXFA_Node* pXFANode,
861                                                CFDE_XMLNode* pXMLDoc) {
862   return pXFANode;
863 }
864 
DataLoader(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLDoc,bool bDoTransform)865 CXFA_Node* CXFA_SimpleParser::DataLoader(CXFA_Node* pXFANode,
866                                          CFDE_XMLNode* pXMLDoc,
867                                          bool bDoTransform) {
868   ParseDataGroup(pXFANode, pXMLDoc, XFA_XDPPACKET_Datasets);
869   return pXFANode;
870 }
871 
NormalLoader(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLDoc,XFA_XDPPACKET ePacketID,bool bUseAttribute)872 CXFA_Node* CXFA_SimpleParser::NormalLoader(CXFA_Node* pXFANode,
873                                            CFDE_XMLNode* pXMLDoc,
874                                            XFA_XDPPACKET ePacketID,
875                                            bool bUseAttribute) {
876   bool bOneOfPropertyFound = false;
877   for (CFDE_XMLNode* pXMLChild = pXMLDoc->GetNodeItem(CFDE_XMLNode::FirstChild);
878        pXMLChild;
879        pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
880     switch (pXMLChild->GetType()) {
881       case FDE_XMLNODE_Element: {
882         CFDE_XMLElement* pXMLElement = static_cast<CFDE_XMLElement*>(pXMLChild);
883         CFX_WideString wsTagName;
884         pXMLElement->GetLocalTagName(wsTagName);
885         XFA_Element eType = XFA_GetElementTypeForName(wsTagName.AsStringC());
886         if (eType == XFA_Element::Unknown)
887           continue;
888 
889         const XFA_PROPERTY* pPropertyInfo = XFA_GetPropertyOfElement(
890             pXFANode->GetElementType(), eType, ePacketID);
891         if (pPropertyInfo &&
892             ((pPropertyInfo->uFlags &
893               (XFA_PROPERTYFLAG_OneOf | XFA_PROPERTYFLAG_DefaultOneOf)) != 0)) {
894           if (bOneOfPropertyFound)
895             break;
896 
897           bOneOfPropertyFound = true;
898         }
899         CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, eType);
900         if (!pXFAChild)
901           return nullptr;
902         if (ePacketID == XFA_XDPPACKET_Config)
903           pXFAChild->SetAttribute(XFA_ATTRIBUTE_Name, wsTagName.AsStringC());
904 
905         bool IsNeedValue = true;
906         for (int32_t i = 0, count = pXMLElement->CountAttributes(); i < count;
907              i++) {
908           CFX_WideString wsAttrQualifiedName;
909           CFX_WideString wsAttrName;
910           CFX_WideString wsAttrValue;
911           pXMLElement->GetAttribute(i, wsAttrQualifiedName, wsAttrValue);
912           GetAttributeLocalName(wsAttrQualifiedName.AsStringC(), wsAttrName);
913           if (wsAttrName == L"nil" && wsAttrValue == L"true") {
914             IsNeedValue = false;
915           }
916           const XFA_ATTRIBUTEINFO* lpAttrInfo =
917               XFA_GetAttributeByName(wsAttrName.AsStringC());
918           if (!lpAttrInfo)
919             continue;
920 
921           if (!bUseAttribute && lpAttrInfo->eName != XFA_ATTRIBUTE_Name &&
922               lpAttrInfo->eName != XFA_ATTRIBUTE_Save) {
923             continue;
924           }
925           pXFAChild->SetAttribute(lpAttrInfo->eName, wsAttrValue.AsStringC());
926         }
927         pXFANode->InsertChild(pXFAChild);
928         if (eType == XFA_Element::Validate || eType == XFA_Element::Locale) {
929           if (ePacketID == XFA_XDPPACKET_Config)
930             ParseContentNode(pXFAChild, pXMLElement, ePacketID);
931           else
932             NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
933 
934           break;
935         }
936         switch (pXFAChild->GetObjectType()) {
937           case XFA_ObjectType::ContentNode:
938           case XFA_ObjectType::TextNode:
939           case XFA_ObjectType::NodeC:
940           case XFA_ObjectType::NodeV:
941             if (IsNeedValue)
942               ParseContentNode(pXFAChild, pXMLElement, ePacketID);
943             break;
944           default:
945             NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
946             break;
947         }
948       } break;
949       case FDE_XMLNODE_Instruction:
950         ParseInstruction(pXFANode, static_cast<CFDE_XMLInstruction*>(pXMLChild),
951                          ePacketID);
952         break;
953       default:
954         break;
955     }
956   }
957   return pXFANode;
958 }
959 
ParseContentNode(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLNode,XFA_XDPPACKET ePacketID)960 void CXFA_SimpleParser::ParseContentNode(CXFA_Node* pXFANode,
961                                          CFDE_XMLNode* pXMLNode,
962                                          XFA_XDPPACKET ePacketID) {
963   XFA_Element element = XFA_Element::Sharptext;
964   if (pXFANode->GetElementType() == XFA_Element::ExData) {
965     CFX_WideStringC wsContentType =
966         pXFANode->GetCData(XFA_ATTRIBUTE_ContentType);
967     if (wsContentType == L"text/html")
968       element = XFA_Element::SharpxHTML;
969     else if (wsContentType == L"text/xml")
970       element = XFA_Element::Sharpxml;
971   }
972   if (element == XFA_Element::SharpxHTML)
973     pXFANode->SetXMLMappingNode(pXMLNode);
974 
975   CFX_WideString wsValue;
976   for (CFDE_XMLNode* pXMLChild =
977            pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
978        pXMLChild;
979        pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
980     FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
981     if (eNodeType == FDE_XMLNODE_Instruction)
982       continue;
983 
984     if (element == XFA_Element::SharpxHTML) {
985       if (eNodeType != FDE_XMLNODE_Element)
986         break;
987 
988       if (XFA_RecognizeRichText(static_cast<CFDE_XMLElement*>(pXMLChild)))
989         XFA_GetPlainTextFromRichText(static_cast<CFDE_XMLElement*>(pXMLChild),
990                                      wsValue);
991     } else if (element == XFA_Element::Sharpxml) {
992       if (eNodeType != FDE_XMLNODE_Element)
993         break;
994 
995       ConvertXMLToPlainText(static_cast<CFDE_XMLElement*>(pXMLChild), wsValue);
996     } else {
997       if (eNodeType == FDE_XMLNODE_Element)
998         break;
999       if (eNodeType == FDE_XMLNODE_Text)
1000         static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsValue);
1001       else if (eNodeType == FDE_XMLNODE_CharData)
1002         static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsValue);
1003     }
1004     break;
1005   }
1006   if (!wsValue.IsEmpty()) {
1007     if (pXFANode->IsContentNode()) {
1008       CXFA_Node* pContentRawDataNode =
1009           m_pFactory->CreateNode(ePacketID, element);
1010       ASSERT(pContentRawDataNode);
1011       pContentRawDataNode->SetCData(XFA_ATTRIBUTE_Value, wsValue);
1012       pXFANode->InsertChild(pContentRawDataNode);
1013     } else {
1014       pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsValue);
1015     }
1016   }
1017 }
1018 
ParseDataGroup(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLNode,XFA_XDPPACKET ePacketID)1019 void CXFA_SimpleParser::ParseDataGroup(CXFA_Node* pXFANode,
1020                                        CFDE_XMLNode* pXMLNode,
1021                                        XFA_XDPPACKET ePacketID) {
1022   for (CFDE_XMLNode* pXMLChild =
1023            pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
1024        pXMLChild;
1025        pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
1026     switch (pXMLChild->GetType()) {
1027       case FDE_XMLNODE_Element: {
1028         CFDE_XMLElement* pXMLElement = static_cast<CFDE_XMLElement*>(pXMLChild);
1029         {
1030           CFX_WideString wsNamespaceURI;
1031           GetElementTagNamespaceURI(pXMLElement, wsNamespaceURI);
1032           if (wsNamespaceURI == L"http://www.xfa.com/schema/xfa-package/" ||
1033               wsNamespaceURI == L"http://www.xfa.org/schema/xfa-package/" ||
1034               wsNamespaceURI == L"http://www.w3.org/2001/XMLSchema-instance") {
1035             continue;
1036           }
1037         }
1038 
1039         XFA_Element eNodeType = XFA_Element::DataModel;
1040         if (eNodeType == XFA_Element::DataModel) {
1041           CFX_WideString wsDataNodeAttr;
1042           if (FindAttributeWithNS(pXMLElement, L"dataNode",
1043                                   L"http://www.xfa.org/schema/xfa-data/1.0/",
1044                                   wsDataNodeAttr)) {
1045             if (wsDataNodeAttr == L"dataGroup")
1046               eNodeType = XFA_Element::DataGroup;
1047             else if (wsDataNodeAttr == L"dataValue")
1048               eNodeType = XFA_Element::DataValue;
1049           }
1050         }
1051         CFX_WideString wsContentType;
1052         if (eNodeType == XFA_Element::DataModel) {
1053           if (FindAttributeWithNS(pXMLElement, L"contentType",
1054                                   L"http://www.xfa.org/schema/xfa-data/1.0/",
1055                                   wsContentType)) {
1056             if (!wsContentType.IsEmpty())
1057               eNodeType = XFA_Element::DataValue;
1058           }
1059         }
1060         if (eNodeType == XFA_Element::DataModel) {
1061           for (CFDE_XMLNode* pXMLDataChild =
1062                    pXMLElement->GetNodeItem(CFDE_XMLNode::FirstChild);
1063                pXMLDataChild; pXMLDataChild = pXMLDataChild->GetNodeItem(
1064                                   CFDE_XMLNode::NextSibling)) {
1065             if (pXMLDataChild->GetType() == FDE_XMLNODE_Element) {
1066               if (!XFA_RecognizeRichText(
1067                       static_cast<CFDE_XMLElement*>(pXMLDataChild))) {
1068                 eNodeType = XFA_Element::DataGroup;
1069                 break;
1070               }
1071             }
1072           }
1073         }
1074         if (eNodeType == XFA_Element::DataModel)
1075           eNodeType = XFA_Element::DataValue;
1076 
1077         CXFA_Node* pXFAChild =
1078             m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, eNodeType);
1079         if (!pXFAChild)
1080           return;
1081 
1082         CFX_WideString wsNodeName;
1083         pXMLElement->GetLocalTagName(wsNodeName);
1084         pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeName);
1085         bool bNeedValue = true;
1086         for (int32_t i = 0; i < pXMLElement->CountAttributes(); ++i) {
1087           CFX_WideString wsQualifiedName;
1088           CFX_WideString wsValue;
1089           CFX_WideString wsName;
1090           CFX_WideString wsNS;
1091           pXMLElement->GetAttribute(i, wsQualifiedName, wsValue);
1092           if (!ResolveAttribute(pXMLElement, wsQualifiedName.AsStringC(),
1093                                 wsName, wsNS)) {
1094             continue;
1095           }
1096           if (wsName == L"nil" && wsValue == L"true") {
1097             bNeedValue = false;
1098             continue;
1099           }
1100           if (wsNS == L"http://www.xfa.com/schema/xfa-package/" ||
1101               wsNS == L"http://www.xfa.org/schema/xfa-package/" ||
1102               wsNS == L"http://www.w3.org/2001/XMLSchema-instance" ||
1103               wsNS == L"http://www.xfa.org/schema/xfa-data/1.0/") {
1104             continue;
1105           }
1106           CXFA_Node* pXFAMetaData = m_pFactory->CreateNode(
1107               XFA_XDPPACKET_Datasets, XFA_Element::DataValue);
1108           if (!pXFAMetaData)
1109             return;
1110 
1111           pXFAMetaData->SetCData(XFA_ATTRIBUTE_Name, wsName);
1112           pXFAMetaData->SetCData(XFA_ATTRIBUTE_QualifiedName, wsQualifiedName);
1113           pXFAMetaData->SetCData(XFA_ATTRIBUTE_Value, wsValue);
1114           pXFAMetaData->SetEnum(XFA_ATTRIBUTE_Contains,
1115                                 XFA_ATTRIBUTEENUM_MetaData);
1116           pXFAChild->InsertChild(pXFAMetaData);
1117           pXFAMetaData->SetXMLMappingNode(pXMLElement);
1118           pXFAMetaData->SetFlag(XFA_NodeFlag_Initialized, false);
1119         }
1120 
1121         if (!bNeedValue) {
1122           CFX_WideString wsNilName(L"xsi:nil");
1123           pXMLElement->RemoveAttribute(wsNilName.c_str());
1124         }
1125         pXFANode->InsertChild(pXFAChild);
1126         if (eNodeType == XFA_Element::DataGroup)
1127           ParseDataGroup(pXFAChild, pXMLElement, ePacketID);
1128         else if (bNeedValue)
1129           ParseDataValue(pXFAChild, pXMLChild, XFA_XDPPACKET_Datasets);
1130 
1131         pXFAChild->SetXMLMappingNode(pXMLElement);
1132         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1133         continue;
1134       }
1135       case FDE_XMLNODE_CharData: {
1136         CFDE_XMLCharData* pXMLCharData =
1137             static_cast<CFDE_XMLCharData*>(pXMLChild);
1138         CFX_WideString wsCharData;
1139         pXMLCharData->GetCharData(wsCharData);
1140         if (IsStringAllWhitespace(wsCharData))
1141           continue;
1142 
1143         CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
1144                                                       XFA_Element::DataValue);
1145         if (!pXFAChild)
1146           return;
1147 
1148         pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCharData);
1149         pXFANode->InsertChild(pXFAChild);
1150         pXFAChild->SetXMLMappingNode(pXMLCharData);
1151         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1152         continue;
1153       }
1154       case FDE_XMLNODE_Text: {
1155         CFDE_XMLText* pXMLText = static_cast<CFDE_XMLText*>(pXMLChild);
1156         CFX_WideString wsText;
1157         pXMLText->GetText(wsText);
1158         if (IsStringAllWhitespace(wsText))
1159           continue;
1160 
1161         CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
1162                                                       XFA_Element::DataValue);
1163         if (!pXFAChild)
1164           return;
1165 
1166         pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsText);
1167         pXFANode->InsertChild(pXFAChild);
1168         pXFAChild->SetXMLMappingNode(pXMLText);
1169         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1170         continue;
1171       }
1172       default:
1173         continue;
1174     }
1175   }
1176 }
1177 
ParseDataValue(CXFA_Node * pXFANode,CFDE_XMLNode * pXMLNode,XFA_XDPPACKET ePacketID)1178 void CXFA_SimpleParser::ParseDataValue(CXFA_Node* pXFANode,
1179                                        CFDE_XMLNode* pXMLNode,
1180                                        XFA_XDPPACKET ePacketID) {
1181   CFX_WideTextBuf wsValueTextBuf;
1182   CFX_WideTextBuf wsCurValueTextBuf;
1183   bool bMarkAsCompound = false;
1184   CFDE_XMLNode* pXMLCurValueNode = nullptr;
1185   for (CFDE_XMLNode* pXMLChild =
1186            pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
1187        pXMLChild;
1188        pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
1189     FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
1190     if (eNodeType == FDE_XMLNODE_Instruction)
1191       continue;
1192 
1193     CFX_WideString wsText;
1194     if (eNodeType == FDE_XMLNODE_Text) {
1195       static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsText);
1196       if (!pXMLCurValueNode)
1197         pXMLCurValueNode = pXMLChild;
1198 
1199       wsCurValueTextBuf << wsText;
1200     } else if (eNodeType == FDE_XMLNODE_CharData) {
1201       static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsText);
1202       if (!pXMLCurValueNode)
1203         pXMLCurValueNode = pXMLChild;
1204 
1205       wsCurValueTextBuf << wsText;
1206     } else if (XFA_RecognizeRichText(
1207                    static_cast<CFDE_XMLElement*>(pXMLChild))) {
1208       XFA_GetPlainTextFromRichText(static_cast<CFDE_XMLElement*>(pXMLChild),
1209                                    wsText);
1210       if (!pXMLCurValueNode)
1211         pXMLCurValueNode = pXMLChild;
1212 
1213       wsCurValueTextBuf << wsText;
1214     } else {
1215       bMarkAsCompound = true;
1216       if (pXMLCurValueNode) {
1217         CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString();
1218         if (!wsCurValue.IsEmpty()) {
1219           CXFA_Node* pXFAChild =
1220               m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1221           if (!pXFAChild)
1222             return;
1223 
1224           pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L"");
1225           pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue);
1226           pXFANode->InsertChild(pXFAChild);
1227           pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
1228           pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1229           wsValueTextBuf << wsCurValue;
1230           wsCurValueTextBuf.Clear();
1231         }
1232         pXMLCurValueNode = nullptr;
1233       }
1234       CXFA_Node* pXFAChild =
1235           m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1236       if (!pXFAChild)
1237         return;
1238 
1239       CFX_WideString wsNodeStr;
1240       static_cast<CFDE_XMLElement*>(pXMLChild)->GetLocalTagName(wsNodeStr);
1241       pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr);
1242       ParseDataValue(pXFAChild, pXMLChild, ePacketID);
1243       pXFANode->InsertChild(pXFAChild);
1244       pXFAChild->SetXMLMappingNode(pXMLChild);
1245       pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1246       CFX_WideStringC wsCurValue = pXFAChild->GetCData(XFA_ATTRIBUTE_Value);
1247       wsValueTextBuf << wsCurValue;
1248     }
1249   }
1250   if (pXMLCurValueNode) {
1251     CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString();
1252     if (!wsCurValue.IsEmpty()) {
1253       if (bMarkAsCompound) {
1254         CXFA_Node* pXFAChild =
1255             m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
1256         if (!pXFAChild)
1257           return;
1258 
1259         pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L"");
1260         pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue);
1261         pXFANode->InsertChild(pXFAChild);
1262         pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
1263         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
1264       }
1265       wsValueTextBuf << wsCurValue;
1266       wsCurValueTextBuf.Clear();
1267     }
1268     pXMLCurValueNode = nullptr;
1269   }
1270   CFX_WideString wsNodeValue = wsValueTextBuf.MakeString();
1271   pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsNodeValue);
1272 }
1273 
ParseInstruction(CXFA_Node * pXFANode,CFDE_XMLInstruction * pXMLInstruction,XFA_XDPPACKET ePacketID)1274 void CXFA_SimpleParser::ParseInstruction(CXFA_Node* pXFANode,
1275                                          CFDE_XMLInstruction* pXMLInstruction,
1276                                          XFA_XDPPACKET ePacketID) {
1277   if (!m_bDocumentParser)
1278     return;
1279 
1280   CFX_WideString wsTargetName;
1281   pXMLInstruction->GetTargetName(wsTargetName);
1282   if (wsTargetName == L"originalXFAVersion") {
1283     CFX_WideString wsData;
1284     if (pXMLInstruction->GetData(0, wsData) &&
1285         (pXFANode->GetDocument()->RecognizeXFAVersionNumber(wsData) !=
1286          XFA_VERSION_UNKNOWN)) {
1287       wsData.clear();
1288       if (pXMLInstruction->GetData(1, wsData) &&
1289           wsData == L"v2.7-scripting:1") {
1290         pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_Scripting, true);
1291       }
1292     }
1293   } else if (wsTargetName == L"acrobat") {
1294     CFX_WideString wsData;
1295     if (pXMLInstruction->GetData(0, wsData) && wsData == L"JavaScript") {
1296       if (pXMLInstruction->GetData(1, wsData) && wsData == L"strictScoping") {
1297         pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_StrictScoping, true);
1298       }
1299     }
1300   }
1301 }
1302 
CloseParser()1303 void CXFA_SimpleParser::CloseParser() {
1304   m_pXMLDoc.reset();
1305   m_pStream.Reset();
1306 }
1307