1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fxcrt/xml/cfx_xmlparser.h"
8 
9 #include "core/fxcrt/xml/cfx_xmlchardata.h"
10 #include "core/fxcrt/xml/cfx_xmlelement.h"
11 #include "core/fxcrt/xml/cfx_xmlinstruction.h"
12 #include "core/fxcrt/xml/cfx_xmlnode.h"
13 #include "core/fxcrt/xml/cfx_xmltext.h"
14 #include "third_party/base/ptr_util.h"
15 
CFX_XMLParser(CFX_XMLNode * pParent,const RetainPtr<CFX_SeekableStreamProxy> & pStream)16 CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent,
17                              const RetainPtr<CFX_SeekableStreamProxy>& pStream)
18     : m_nElementStart(0),
19       m_dwCheckStatus(0),
20       m_dwCurrentCheckStatus(0),
21       m_pStream(pStream),
22       m_pParser(pdfium::MakeUnique<CFX_XMLSyntaxParser>(m_pStream)),
23       m_pParent(pParent),
24       m_pChild(nullptr),
25       m_syntaxParserResult(FX_XmlSyntaxResult::None) {
26   ASSERT(m_pParent && m_pStream);
27   m_NodeStack.push(m_pParent);
28 }
29 
~CFX_XMLParser()30 CFX_XMLParser::~CFX_XMLParser() {}
31 
DoParser()32 int32_t CFX_XMLParser::DoParser() {
33   if (m_syntaxParserResult == FX_XmlSyntaxResult::Error)
34     return -1;
35   if (m_syntaxParserResult == FX_XmlSyntaxResult::EndOfString)
36     return 100;
37 
38   int32_t iCount = 0;
39   while (true) {
40     m_syntaxParserResult = m_pParser->DoSyntaxParse();
41     switch (m_syntaxParserResult) {
42       case FX_XmlSyntaxResult::InstructionOpen:
43         break;
44       case FX_XmlSyntaxResult::InstructionClose:
45         if (m_pChild) {
46           if (m_pChild->GetType() != FX_XMLNODE_Instruction) {
47             m_syntaxParserResult = FX_XmlSyntaxResult::Error;
48             break;
49           }
50         }
51         m_pChild = m_pParent;
52         break;
53       case FX_XmlSyntaxResult::ElementOpen:
54         if (m_dwCheckStatus != 0x03 && m_NodeStack.size() == 2)
55           m_nElementStart = m_pParser->GetCurrentPos() - 1;
56         break;
57       case FX_XmlSyntaxResult::ElementBreak:
58         break;
59       case FX_XmlSyntaxResult::ElementClose:
60         if (m_pChild->GetType() != FX_XMLNODE_Element) {
61           m_syntaxParserResult = FX_XmlSyntaxResult::Error;
62           break;
63         }
64         m_ws1 = m_pParser->GetTagName();
65         m_ws2 = static_cast<CFX_XMLElement*>(m_pChild)->GetName();
66         if (m_ws1.GetLength() > 0 && m_ws1 != m_ws2) {
67           m_syntaxParserResult = FX_XmlSyntaxResult::Error;
68           break;
69         }
70         if (!m_NodeStack.empty())
71           m_NodeStack.pop();
72         if (m_NodeStack.empty()) {
73           m_syntaxParserResult = FX_XmlSyntaxResult::Error;
74           break;
75         }
76         if (m_dwCurrentCheckStatus != 0 && m_NodeStack.size() == 2) {
77           m_nSize[m_dwCurrentCheckStatus - 1] =
78               m_pParser->GetCurrentBinaryPos() -
79               m_nStart[m_dwCurrentCheckStatus - 1];
80           m_dwCurrentCheckStatus = 0;
81         }
82         m_pParent = m_NodeStack.top();
83         m_pChild = m_pParent;
84         iCount++;
85         break;
86       case FX_XmlSyntaxResult::TargetName:
87         m_ws1 = m_pParser->GetTargetName();
88         if (m_ws1 == L"originalXFAVersion" || m_ws1 == L"acrobat") {
89           m_pChild = new CFX_XMLInstruction(m_ws1);
90           m_pParent->InsertChildNode(m_pChild);
91         } else {
92           m_pChild = nullptr;
93         }
94         m_ws1.clear();
95         break;
96       case FX_XmlSyntaxResult::TagName:
97         m_ws1 = m_pParser->GetTagName();
98         m_pChild = new CFX_XMLElement(m_ws1);
99         m_pParent->InsertChildNode(m_pChild);
100         m_NodeStack.push(m_pChild);
101         m_pParent = m_pChild;
102 
103         if (m_dwCheckStatus != 0x03 && m_NodeStack.size() == 3) {
104           WideString wsTag =
105               static_cast<CFX_XMLElement*>(m_pChild)->GetLocalTagName();
106           if (wsTag == L"template") {
107             m_dwCheckStatus |= 0x01;
108             m_dwCurrentCheckStatus = 0x01;
109             m_nStart[0] = m_pParser->GetCurrentBinaryPos() -
110                           (m_pParser->GetCurrentPos() - m_nElementStart);
111           } else if (wsTag == L"datasets") {
112             m_dwCheckStatus |= 0x02;
113             m_dwCurrentCheckStatus = 0x02;
114             m_nStart[1] = m_pParser->GetCurrentBinaryPos() -
115                           (m_pParser->GetCurrentPos() - m_nElementStart);
116           }
117         }
118         break;
119       case FX_XmlSyntaxResult::AttriName:
120         m_ws1 = m_pParser->GetAttributeName();
121         break;
122       case FX_XmlSyntaxResult::AttriValue:
123         if (m_pChild) {
124           m_ws2 = m_pParser->GetAttributeName();
125           if (m_pChild->GetType() == FX_XMLNODE_Element)
126             static_cast<CFX_XMLElement*>(m_pChild)->SetString(m_ws1, m_ws2);
127         }
128         m_ws1.clear();
129         break;
130       case FX_XmlSyntaxResult::Text:
131         m_ws1 = m_pParser->GetTextData();
132         m_pChild = new CFX_XMLText(m_ws1);
133         m_pParent->InsertChildNode(m_pChild);
134         m_pChild = m_pParent;
135         break;
136       case FX_XmlSyntaxResult::CData:
137         m_ws1 = m_pParser->GetTextData();
138         m_pChild = new CFX_XMLCharData(m_ws1);
139         m_pParent->InsertChildNode(m_pChild);
140         m_pChild = m_pParent;
141         break;
142       case FX_XmlSyntaxResult::TargetData:
143         if (m_pChild) {
144           if (m_pChild->GetType() != FX_XMLNODE_Instruction) {
145             m_syntaxParserResult = FX_XmlSyntaxResult::Error;
146             break;
147           }
148           auto* instruction = static_cast<CFX_XMLInstruction*>(m_pChild);
149           if (!m_ws1.IsEmpty())
150             instruction->AppendData(m_ws1);
151           instruction->AppendData(m_pParser->GetTargetData());
152         }
153         m_ws1.clear();
154         break;
155       default:
156         break;
157     }
158     if (m_syntaxParserResult == FX_XmlSyntaxResult::Error ||
159         m_syntaxParserResult == FX_XmlSyntaxResult::EndOfString) {
160       break;
161     }
162   }
163   return (m_syntaxParserResult == FX_XmlSyntaxResult::Error ||
164           m_NodeStack.size() != 1)
165              ? -1
166              : m_pParser->GetStatus();
167 }
168