1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "xfa/fxfa/parser/cxfa_xml_parser.h"
8 
CXFA_XMLParser(CFDE_XMLNode * pRoot,const CFX_RetainPtr<IFGAS_Stream> & pStream)9 CXFA_XMLParser::CXFA_XMLParser(CFDE_XMLNode* pRoot,
10                                const CFX_RetainPtr<IFGAS_Stream>& pStream)
11     : m_nElementStart(0),
12       m_dwCheckStatus(0),
13       m_dwCurrentCheckStatus(0),
14       m_pRoot(pRoot),
15       m_pStream(pStream),
16       m_pParser(new CFDE_XMLSyntaxParser),
17       m_pParent(pRoot),
18       m_pChild(nullptr),
19       m_NodeStack(16),
20       m_syntaxParserResult(FDE_XmlSyntaxResult::None) {
21   ASSERT(m_pParent && m_pStream);
22   m_NodeStack.Push(m_pParent);
23   m_pParser->Init(m_pStream, 32 * 1024, 1024 * 1024);
24 }
25 
~CXFA_XMLParser()26 CXFA_XMLParser::~CXFA_XMLParser() {
27   m_NodeStack.RemoveAll(false);
28   m_ws1.clear();
29   m_ws2.clear();
30 }
31 
DoParser(IFX_Pause * pPause)32 int32_t CXFA_XMLParser::DoParser(IFX_Pause* pPause) {
33   if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error)
34     return -1;
35   if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString)
36     return 100;
37 
38   int32_t iCount = 0;
39   while (true) {
40     m_syntaxParserResult = m_pParser->DoSyntaxParse();
41     switch (m_syntaxParserResult) {
42       case FDE_XmlSyntaxResult::InstructionOpen:
43         break;
44       case FDE_XmlSyntaxResult::InstructionClose:
45         if (m_pChild) {
46           if (m_pChild->GetType() != FDE_XMLNODE_Instruction) {
47             m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
48             break;
49           }
50         }
51         m_pChild = m_pParent;
52         break;
53       case FDE_XmlSyntaxResult::ElementOpen:
54         if (m_dwCheckStatus != 0x03 && m_NodeStack.GetSize() == 2) {
55           m_nElementStart = m_pParser->GetCurrentPos() - 1;
56         }
57         break;
58       case FDE_XmlSyntaxResult::ElementBreak:
59         break;
60       case FDE_XmlSyntaxResult::ElementClose:
61         if (m_pChild->GetType() != FDE_XMLNODE_Element) {
62           m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
63           break;
64         }
65         m_pParser->GetTagName(m_ws1);
66         static_cast<CFDE_XMLElement*>(m_pChild)->GetTagName(m_ws2);
67         if (m_ws1.GetLength() > 0 && m_ws1 != m_ws2) {
68           m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
69           break;
70         }
71         m_NodeStack.Pop();
72         if (m_NodeStack.GetSize() < 1) {
73           m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
74           break;
75         } else if (m_dwCurrentCheckStatus != 0 && m_NodeStack.GetSize() == 2) {
76           m_nSize[m_dwCurrentCheckStatus - 1] =
77               m_pParser->GetCurrentBinaryPos() -
78               m_nStart[m_dwCurrentCheckStatus - 1];
79           m_dwCurrentCheckStatus = 0;
80         }
81 
82         m_pParent = static_cast<CFDE_XMLNode*>(*m_NodeStack.GetTopElement());
83         m_pChild = m_pParent;
84         iCount++;
85         break;
86       case FDE_XmlSyntaxResult::TargetName:
87         m_pParser->GetTargetName(m_ws1);
88         if (m_ws1 == L"originalXFAVersion" || m_ws1 == L"acrobat") {
89           m_pChild = new CFDE_XMLInstruction(m_ws1);
90           m_pParent->InsertChildNode(m_pChild);
91         } else {
92           m_pChild = nullptr;
93         }
94         m_ws1.clear();
95         break;
96       case FDE_XmlSyntaxResult::TagName:
97         m_pParser->GetTagName(m_ws1);
98         m_pChild = new CFDE_XMLElement(m_ws1);
99         m_pParent->InsertChildNode(m_pChild);
100         m_NodeStack.Push(m_pChild);
101         m_pParent = m_pChild;
102 
103         if (m_dwCheckStatus != 0x03 && m_NodeStack.GetSize() == 3) {
104           CFX_WideString wsTag;
105           static_cast<CFDE_XMLElement*>(m_pChild)->GetLocalTagName(wsTag);
106           if (wsTag == L"template") {
107             m_dwCheckStatus |= 0x01;
108             m_dwCurrentCheckStatus = 0x01;
109             m_nStart[0] = m_pParser->GetCurrentBinaryPos() -
110                           (m_pParser->GetCurrentPos() - m_nElementStart);
111           } else if (wsTag == L"datasets") {
112             m_dwCheckStatus |= 0x02;
113             m_dwCurrentCheckStatus = 0x02;
114             m_nStart[1] = m_pParser->GetCurrentBinaryPos() -
115                           (m_pParser->GetCurrentPos() - m_nElementStart);
116           }
117         }
118         break;
119       case FDE_XmlSyntaxResult::AttriName:
120         m_pParser->GetAttributeName(m_ws1);
121         break;
122       case FDE_XmlSyntaxResult::AttriValue:
123         if (m_pChild) {
124           m_pParser->GetAttributeName(m_ws2);
125           if (m_pChild->GetType() == FDE_XMLNODE_Element) {
126             static_cast<CFDE_XMLElement*>(m_pChild)->SetString(m_ws1, m_ws2);
127           }
128         }
129         m_ws1.clear();
130         break;
131       case FDE_XmlSyntaxResult::Text:
132         m_pParser->GetTextData(m_ws1);
133         m_pChild = new CFDE_XMLText(m_ws1);
134         m_pParent->InsertChildNode(m_pChild);
135         m_pChild = m_pParent;
136         break;
137       case FDE_XmlSyntaxResult::CData:
138         m_pParser->GetTextData(m_ws1);
139         m_pChild = new CFDE_XMLCharData(m_ws1);
140         m_pParent->InsertChildNode(m_pChild);
141         m_pChild = m_pParent;
142         break;
143       case FDE_XmlSyntaxResult::TargetData:
144         if (m_pChild) {
145           if (m_pChild->GetType() != FDE_XMLNODE_Instruction) {
146             m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
147             break;
148           }
149           if (!m_ws1.IsEmpty()) {
150             static_cast<CFDE_XMLInstruction*>(m_pChild)->AppendData(m_ws1);
151           }
152           m_pParser->GetTargetData(m_ws1);
153           static_cast<CFDE_XMLInstruction*>(m_pChild)->AppendData(m_ws1);
154         }
155         m_ws1.clear();
156         break;
157       default:
158         break;
159     }
160     if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error ||
161         m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) {
162       break;
163     }
164     if (pPause && iCount > 500 && pPause->NeedToPauseNow()) {
165       break;
166     }
167   }
168   return (m_syntaxParserResult == FDE_XmlSyntaxResult::Error ||
169           m_NodeStack.GetSize() != 1)
170              ? -1
171              : m_pParser->GetStatus();
172 }
173