1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fxcrt/xml/cfx_xmlparser.h"
8
9 #include "core/fxcrt/xml/cfx_xmlchardata.h"
10 #include "core/fxcrt/xml/cfx_xmlelement.h"
11 #include "core/fxcrt/xml/cfx_xmlinstruction.h"
12 #include "core/fxcrt/xml/cfx_xmlnode.h"
13 #include "core/fxcrt/xml/cfx_xmltext.h"
14 #include "third_party/base/ptr_util.h"
15
CFX_XMLParser(CFX_XMLNode * pParent,const RetainPtr<CFX_SeekableStreamProxy> & pStream)16 CFX_XMLParser::CFX_XMLParser(CFX_XMLNode* pParent,
17 const RetainPtr<CFX_SeekableStreamProxy>& pStream)
18 : m_nElementStart(0),
19 m_dwCheckStatus(0),
20 m_dwCurrentCheckStatus(0),
21 m_pStream(pStream),
22 m_pParser(pdfium::MakeUnique<CFX_XMLSyntaxParser>(m_pStream)),
23 m_pParent(pParent),
24 m_pChild(nullptr),
25 m_syntaxParserResult(FX_XmlSyntaxResult::None) {
26 ASSERT(m_pParent && m_pStream);
27 m_NodeStack.push(m_pParent);
28 }
29
~CFX_XMLParser()30 CFX_XMLParser::~CFX_XMLParser() {}
31
DoParser()32 int32_t CFX_XMLParser::DoParser() {
33 if (m_syntaxParserResult == FX_XmlSyntaxResult::Error)
34 return -1;
35 if (m_syntaxParserResult == FX_XmlSyntaxResult::EndOfString)
36 return 100;
37
38 int32_t iCount = 0;
39 while (true) {
40 m_syntaxParserResult = m_pParser->DoSyntaxParse();
41 switch (m_syntaxParserResult) {
42 case FX_XmlSyntaxResult::InstructionOpen:
43 break;
44 case FX_XmlSyntaxResult::InstructionClose:
45 if (m_pChild) {
46 if (m_pChild->GetType() != FX_XMLNODE_Instruction) {
47 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
48 break;
49 }
50 }
51 m_pChild = m_pParent;
52 break;
53 case FX_XmlSyntaxResult::ElementOpen:
54 if (m_dwCheckStatus != 0x03 && m_NodeStack.size() == 2)
55 m_nElementStart = m_pParser->GetCurrentPos() - 1;
56 break;
57 case FX_XmlSyntaxResult::ElementBreak:
58 break;
59 case FX_XmlSyntaxResult::ElementClose:
60 if (m_pChild->GetType() != FX_XMLNODE_Element) {
61 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
62 break;
63 }
64 m_ws1 = m_pParser->GetTagName();
65 m_ws2 = static_cast<CFX_XMLElement*>(m_pChild)->GetName();
66 if (m_ws1.GetLength() > 0 && m_ws1 != m_ws2) {
67 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
68 break;
69 }
70 if (!m_NodeStack.empty())
71 m_NodeStack.pop();
72 if (m_NodeStack.empty()) {
73 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
74 break;
75 }
76 if (m_dwCurrentCheckStatus != 0 && m_NodeStack.size() == 2) {
77 m_nSize[m_dwCurrentCheckStatus - 1] =
78 m_pParser->GetCurrentBinaryPos() -
79 m_nStart[m_dwCurrentCheckStatus - 1];
80 m_dwCurrentCheckStatus = 0;
81 }
82 m_pParent = m_NodeStack.top();
83 m_pChild = m_pParent;
84 iCount++;
85 break;
86 case FX_XmlSyntaxResult::TargetName:
87 m_ws1 = m_pParser->GetTargetName();
88 if (m_ws1 == L"originalXFAVersion" || m_ws1 == L"acrobat") {
89 m_pChild = new CFX_XMLInstruction(m_ws1);
90 m_pParent->InsertChildNode(m_pChild);
91 } else {
92 m_pChild = nullptr;
93 }
94 m_ws1.clear();
95 break;
96 case FX_XmlSyntaxResult::TagName:
97 m_ws1 = m_pParser->GetTagName();
98 m_pChild = new CFX_XMLElement(m_ws1);
99 m_pParent->InsertChildNode(m_pChild);
100 m_NodeStack.push(m_pChild);
101 m_pParent = m_pChild;
102
103 if (m_dwCheckStatus != 0x03 && m_NodeStack.size() == 3) {
104 WideString wsTag =
105 static_cast<CFX_XMLElement*>(m_pChild)->GetLocalTagName();
106 if (wsTag == L"template") {
107 m_dwCheckStatus |= 0x01;
108 m_dwCurrentCheckStatus = 0x01;
109 m_nStart[0] = m_pParser->GetCurrentBinaryPos() -
110 (m_pParser->GetCurrentPos() - m_nElementStart);
111 } else if (wsTag == L"datasets") {
112 m_dwCheckStatus |= 0x02;
113 m_dwCurrentCheckStatus = 0x02;
114 m_nStart[1] = m_pParser->GetCurrentBinaryPos() -
115 (m_pParser->GetCurrentPos() - m_nElementStart);
116 }
117 }
118 break;
119 case FX_XmlSyntaxResult::AttriName:
120 m_ws1 = m_pParser->GetAttributeName();
121 break;
122 case FX_XmlSyntaxResult::AttriValue:
123 if (m_pChild) {
124 m_ws2 = m_pParser->GetAttributeName();
125 if (m_pChild->GetType() == FX_XMLNODE_Element)
126 static_cast<CFX_XMLElement*>(m_pChild)->SetString(m_ws1, m_ws2);
127 }
128 m_ws1.clear();
129 break;
130 case FX_XmlSyntaxResult::Text:
131 m_ws1 = m_pParser->GetTextData();
132 m_pChild = new CFX_XMLText(m_ws1);
133 m_pParent->InsertChildNode(m_pChild);
134 m_pChild = m_pParent;
135 break;
136 case FX_XmlSyntaxResult::CData:
137 m_ws1 = m_pParser->GetTextData();
138 m_pChild = new CFX_XMLCharData(m_ws1);
139 m_pParent->InsertChildNode(m_pChild);
140 m_pChild = m_pParent;
141 break;
142 case FX_XmlSyntaxResult::TargetData:
143 if (m_pChild) {
144 if (m_pChild->GetType() != FX_XMLNODE_Instruction) {
145 m_syntaxParserResult = FX_XmlSyntaxResult::Error;
146 break;
147 }
148 auto* instruction = static_cast<CFX_XMLInstruction*>(m_pChild);
149 if (!m_ws1.IsEmpty())
150 instruction->AppendData(m_ws1);
151 instruction->AppendData(m_pParser->GetTargetData());
152 }
153 m_ws1.clear();
154 break;
155 default:
156 break;
157 }
158 if (m_syntaxParserResult == FX_XmlSyntaxResult::Error ||
159 m_syntaxParserResult == FX_XmlSyntaxResult::EndOfString) {
160 break;
161 }
162 }
163 return (m_syntaxParserResult == FX_XmlSyntaxResult::Error ||
164 m_NodeStack.size() != 1)
165 ? -1
166 : m_pParser->GetStatus();
167 }
168