1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef XFA_FDE_XML_FDE_XML_IMP_H_
8 #define XFA_FDE_XML_FDE_XML_IMP_H_
9 
10 #include <memory>
11 #include <vector>
12 
13 #include "core/fxcrt/fx_system.h"
14 #include "xfa/fde/xml/fde_xml.h"
15 #include "xfa/fgas/crt/fgas_stream.h"
16 #include "xfa/fgas/crt/fgas_utils.h"
17 
18 class CFDE_BlockBuffer;
19 class CFDE_XMLInstruction;
20 class CFDE_XMLElement;
21 class CFDE_XMLText;
22 class CFDE_XMLDoc;
23 class CFDE_XMLDOMParser;
24 class CFDE_XMLSyntaxParser;
25 class IFDE_XMLParser;
26 
27 class CFDE_XMLNode {
28  public:
29   enum NodeItem {
30     Root = 0,
31     Parent,
32     FirstSibling,
33     PriorSibling,
34     NextSibling,
35     LastSibling,
36     FirstNeighbor,
37     PriorNeighbor,
38     NextNeighbor,
39     LastNeighbor,
40     FirstChild,
41     LastChild
42   };
43 
44   CFDE_XMLNode();
45   virtual ~CFDE_XMLNode();
46 
47   virtual FDE_XMLNODETYPE GetType() const;
48   virtual CFDE_XMLNode* Clone(bool bRecursive);
49 
50   int32_t CountChildNodes() const;
51   CFDE_XMLNode* GetChildNode(int32_t index) const;
52   int32_t GetChildNodeIndex(CFDE_XMLNode* pNode) const;
53   int32_t InsertChildNode(CFDE_XMLNode* pNode, int32_t index = -1);
54   void RemoveChildNode(CFDE_XMLNode* pNode);
55   void DeleteChildren();
56   void CloneChildren(CFDE_XMLNode* pClone);
57 
58   CFDE_XMLNode* GetPath(const FX_WCHAR* pPath,
59                         int32_t iLength = -1,
60                         bool bQualifiedName = true) const;
61 
62   int32_t GetNodeLevel() const;
63   CFDE_XMLNode* GetNodeItem(CFDE_XMLNode::NodeItem eItem) const;
64   bool InsertNodeItem(CFDE_XMLNode::NodeItem eItem, CFDE_XMLNode* pNode);
65   CFDE_XMLNode* RemoveNodeItem(CFDE_XMLNode::NodeItem eItem);
66 
67   void SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream);
68 
69   CFDE_XMLNode* m_pParent;
70   CFDE_XMLNode* m_pChild;
71   CFDE_XMLNode* m_pPrior;
72   CFDE_XMLNode* m_pNext;
73 };
74 
75 class CFDE_XMLInstruction : public CFDE_XMLNode {
76  public:
77   explicit CFDE_XMLInstruction(const CFX_WideString& wsTarget);
78   ~CFDE_XMLInstruction() override;
79 
80   // CFDE_XMLNode
81   FDE_XMLNODETYPE GetType() const override;
82   CFDE_XMLNode* Clone(bool bRecursive) override;
83 
GetTargetName(CFX_WideString & wsTarget)84   void GetTargetName(CFX_WideString& wsTarget) const { wsTarget = m_wsTarget; }
85   int32_t CountAttributes() const;
86   bool GetAttribute(int32_t index,
87                     CFX_WideString& wsAttriName,
88                     CFX_WideString& wsAttriValue) const;
89   bool HasAttribute(const FX_WCHAR* pwsAttriName) const;
90   void GetString(const FX_WCHAR* pwsAttriName,
91                  CFX_WideString& wsAttriValue,
92                  const FX_WCHAR* pwsDefValue = nullptr) const;
93   void SetString(const CFX_WideString& wsAttriName,
94                  const CFX_WideString& wsAttriValue);
95   int32_t GetInteger(const FX_WCHAR* pwsAttriName, int32_t iDefValue = 0) const;
96   void SetInteger(const FX_WCHAR* pwsAttriName, int32_t iAttriValue);
97   FX_FLOAT GetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fDefValue = 0) const;
98   void SetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fAttriValue);
99   void RemoveAttribute(const FX_WCHAR* pwsAttriName);
100   int32_t CountData() const;
101   bool GetData(int32_t index, CFX_WideString& wsData) const;
102   void AppendData(const CFX_WideString& wsData);
103   void RemoveData(int32_t index);
104 
105   CFX_WideString m_wsTarget;
106   std::vector<CFX_WideString> m_Attributes;
107   std::vector<CFX_WideString> m_TargetData;
108 };
109 
110 class CFDE_XMLElement : public CFDE_XMLNode {
111  public:
112   explicit CFDE_XMLElement(const CFX_WideString& wsTag);
113   ~CFDE_XMLElement() override;
114 
115   // CFDE_XMLNode
116   FDE_XMLNODETYPE GetType() const override;
117   CFDE_XMLNode* Clone(bool bRecursive) override;
118 
119   void GetTagName(CFX_WideString& wsTag) const;
120   void GetLocalTagName(CFX_WideString& wsTag) const;
121 
122   void GetNamespacePrefix(CFX_WideString& wsPrefix) const;
123   void GetNamespaceURI(CFX_WideString& wsNamespace) const;
124 
125   int32_t CountAttributes() const;
126   bool GetAttribute(int32_t index,
127                     CFX_WideString& wsAttriName,
128                     CFX_WideString& wsAttriValue) const;
129   bool HasAttribute(const FX_WCHAR* pwsAttriName) const;
130   void RemoveAttribute(const FX_WCHAR* pwsAttriName);
131 
132   void GetString(const FX_WCHAR* pwsAttriName,
133                  CFX_WideString& wsAttriValue,
134                  const FX_WCHAR* pwsDefValue = nullptr) const;
135   void SetString(const CFX_WideString& wsAttriName,
136                  const CFX_WideString& wsAttriValue);
137 
138   int32_t GetInteger(const FX_WCHAR* pwsAttriName, int32_t iDefValue = 0) const;
139   void SetInteger(const FX_WCHAR* pwsAttriName, int32_t iAttriValue);
140 
141   FX_FLOAT GetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fDefValue = 0) const;
142   void SetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fAttriValue);
143 
144   void GetTextData(CFX_WideString& wsText) const;
145   void SetTextData(const CFX_WideString& wsText);
146 
147   CFX_WideString m_wsTag;
148   std::vector<CFX_WideString> m_Attributes;
149 };
150 
151 class CFDE_XMLText : public CFDE_XMLNode {
152  public:
153   explicit CFDE_XMLText(const CFX_WideString& wsText);
154   ~CFDE_XMLText() override;
155 
156   // CFDE_XMLNode
157   FDE_XMLNODETYPE GetType() const override;
158   CFDE_XMLNode* Clone(bool bRecursive) override;
159 
GetText(CFX_WideString & wsText)160   void GetText(CFX_WideString& wsText) const { wsText = m_wsText; }
SetText(const CFX_WideString & wsText)161   void SetText(const CFX_WideString& wsText) { m_wsText = wsText; }
162 
163   CFX_WideString m_wsText;
164 };
165 
166 class CFDE_XMLDeclaration : public CFDE_XMLNode {
167  public:
CFDE_XMLDeclaration()168   CFDE_XMLDeclaration() {}
~CFDE_XMLDeclaration()169   ~CFDE_XMLDeclaration() override {}
170 };
171 
172 class CFDE_XMLCharData : public CFDE_XMLDeclaration {
173  public:
174   explicit CFDE_XMLCharData(const CFX_WideString& wsCData);
175   ~CFDE_XMLCharData() override;
176 
177   FDE_XMLNODETYPE GetType() const override;
178   CFDE_XMLNode* Clone(bool bRecursive) override;
179 
GetCharData(CFX_WideString & wsCharData)180   void GetCharData(CFX_WideString& wsCharData) const {
181     wsCharData = m_wsCharData;
182   }
SetCharData(const CFX_WideString & wsCData)183   void SetCharData(const CFX_WideString& wsCData) { m_wsCharData = wsCData; }
184 
185   CFX_WideString m_wsCharData;
186 };
187 
188 class CFDE_XMLDoc {
189  public:
190   CFDE_XMLDoc();
191   ~CFDE_XMLDoc();
192 
193   bool LoadXML(std::unique_ptr<IFDE_XMLParser> pXMLParser);
194   int32_t DoLoad(IFX_Pause* pPause = nullptr);
195   void CloseXML();
GetRoot()196   CFDE_XMLNode* GetRoot() const { return m_pRoot; }
197   void SaveXML(CFX_RetainPtr<IFGAS_Stream>& pXMLStream, bool bSaveBOM = true);
198   void SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream,
199                    CFDE_XMLNode* pNode);
200 
201  protected:
202   void Reset(bool bInitRoot);
203   void ReleaseParser();
204 
205   CFX_RetainPtr<IFGAS_Stream> m_pStream;
206   int32_t m_iStatus;
207   CFDE_XMLNode* m_pRoot;
208   std::unique_ptr<IFDE_XMLParser> m_pXMLParser;
209 };
210 
211 class IFDE_XMLParser {
212  public:
~IFDE_XMLParser()213   virtual ~IFDE_XMLParser() {}
214   virtual int32_t DoParser(IFX_Pause* pPause) = 0;
215 };
216 
217 class CFDE_BlockBuffer {
218  public:
219   explicit CFDE_BlockBuffer(int32_t iAllocStep = 1024 * 1024);
220   ~CFDE_BlockBuffer();
221 
222   bool InitBuffer(int32_t iBufferSize = 1024 * 1024);
IsInitialized()223   bool IsInitialized() { return m_iBufferSize / m_iAllocStep >= 1; }
224   FX_WCHAR* GetAvailableBlock(int32_t& iIndexInBlock);
GetAllocStep()225   inline int32_t GetAllocStep() const { return m_iAllocStep; }
GetDataLengthRef()226   inline int32_t& GetDataLengthRef() { return m_iDataLength; }
227   inline void Reset(bool bReserveData = true) {
228     if (!bReserveData) {
229       m_iStartPosition = 0;
230     }
231     m_iDataLength = 0;
232   }
233   void SetTextChar(int32_t iIndex, FX_WCHAR ch);
234   int32_t DeleteTextChars(int32_t iCount, bool bDirection = true);
235   void GetTextData(CFX_WideString& wsTextData,
236                    int32_t iStart = 0,
237                    int32_t iLength = -1) const;
238 
239  protected:
240   inline void TextDataIndex2BufIndex(const int32_t iIndex,
241                                      int32_t& iBlockIndex,
242                                      int32_t& iInnerIndex) const;
243   void ClearBuffer();
244 
245   CFX_ArrayTemplate<FX_WCHAR*> m_BlockArray;
246   int32_t m_iDataLength;
247   int32_t m_iBufferSize;
248   int32_t m_iAllocStep;
249   int32_t m_iStartPosition;
250 };
251 
252 class CFDE_XMLSyntaxParser {
253  public:
254   CFDE_XMLSyntaxParser();
255   ~CFDE_XMLSyntaxParser();
256 
257   void Init(const CFX_RetainPtr<IFGAS_Stream>& pStream,
258             int32_t iXMLPlaneSize,
259             int32_t iTextDataSize = 256);
260 
261   FDE_XmlSyntaxResult DoSyntaxParse();
262 
263   int32_t GetStatus() const;
GetCurrentPos()264   int32_t GetCurrentPos() const {
265     return m_iParsedChars + (m_pStart - m_pBuffer);
266   }
267   FX_FILESIZE GetCurrentBinaryPos() const;
GetCurrentNodeNumber()268   int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; }
GetLastNodeNumber()269   int32_t GetLastNodeNumber() const { return m_iLastNodeNum; }
270 
GetTargetName(CFX_WideString & wsTarget)271   void GetTargetName(CFX_WideString& wsTarget) const {
272     m_BlockBuffer.GetTextData(wsTarget, 0, m_iTextDataLength);
273   }
GetTagName(CFX_WideString & wsTag)274   void GetTagName(CFX_WideString& wsTag) const {
275     m_BlockBuffer.GetTextData(wsTag, 0, m_iTextDataLength);
276   }
GetAttributeName(CFX_WideString & wsAttriName)277   void GetAttributeName(CFX_WideString& wsAttriName) const {
278     m_BlockBuffer.GetTextData(wsAttriName, 0, m_iTextDataLength);
279   }
GetAttributeValue(CFX_WideString & wsAttriValue)280   void GetAttributeValue(CFX_WideString& wsAttriValue) const {
281     m_BlockBuffer.GetTextData(wsAttriValue, 0, m_iTextDataLength);
282   }
GetTextData(CFX_WideString & wsText)283   void GetTextData(CFX_WideString& wsText) const {
284     m_BlockBuffer.GetTextData(wsText, 0, m_iTextDataLength);
285   }
GetTargetData(CFX_WideString & wsData)286   void GetTargetData(CFX_WideString& wsData) const {
287     m_BlockBuffer.GetTextData(wsData, 0, m_iTextDataLength);
288   }
289 
290  protected:
291   enum class FDE_XmlSyntaxState {
292     Text,
293     Node,
294     Target,
295     Tag,
296     AttriName,
297     AttriEqualSign,
298     AttriQuotation,
299     AttriValue,
300     Entity,
301     EntityDecimal,
302     EntityHex,
303     CloseInstruction,
304     BreakElement,
305     CloseElement,
306     SkipDeclNode,
307     DeclCharData,
308     SkipComment,
309     SkipCommentOrDecl,
310     SkipCData,
311     TargetData
312   };
313 
314   void ParseTextChar(FX_WCHAR ch);
315 
316   CFX_RetainPtr<IFGAS_Stream> m_pStream;
317   int32_t m_iXMLPlaneSize;
318   int32_t m_iCurrentPos;
319   int32_t m_iCurrentNodeNum;
320   int32_t m_iLastNodeNum;
321   int32_t m_iParsedChars;
322   int32_t m_iParsedBytes;
323   FX_WCHAR* m_pBuffer;
324   int32_t m_iBufferChars;
325   bool m_bEOS;
326   FX_WCHAR* m_pStart;
327   FX_WCHAR* m_pEnd;
328   FDE_XMLNODE m_CurNode;
329   CFX_StackTemplate<FDE_XMLNODE> m_XMLNodeStack;
330   CFDE_BlockBuffer m_BlockBuffer;
331   int32_t m_iAllocStep;
332   int32_t& m_iDataLength;
333   FX_WCHAR* m_pCurrentBlock;
334   int32_t m_iIndexInBlock;
335   int32_t m_iTextDataLength;
336   FDE_XmlSyntaxResult m_syntaxParserResult;
337   FDE_XmlSyntaxState m_syntaxParserState;
338   FX_WCHAR m_wQuotationMark;
339   int32_t m_iEntityStart;
340   CFX_StackTemplate<uint32_t> m_SkipStack;
341   FX_WCHAR m_SkipChar;
342 };
343 
344 #endif  // XFA_FDE_XML_FDE_XML_IMP_H_
345