1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef _FDE_XML_IMP
8 #define _FDE_XML_IMP
9 #define _FDE_BLOCK_BUFFER
10 #ifdef _FDE_BLOCK_BUFFER
11 class CFDE_BlockBuffer;
12 #endif
13 class CFDE_XMLNode;
14 class CFDE_XMLInstruction;
15 class CFDE_XMLElement;
16 class CFDE_XMLText;
17 class CFDE_XMLDoc;
18 class IFDE_XMLParser;
19 class CFDE_XMLDOMParser;
20 class CFDE_XMLSAXParser;
21 class CFDE_XMLSyntaxParser;
22 class CFDE_XMLNode : public CFX_Target {
23  public:
24   CFDE_XMLNode();
Release()25   virtual void Release() { delete this; }
GetType()26   virtual FDE_XMLNODETYPE GetType() const { return FDE_XMLNODE_Unknown; }
27   virtual int32_t CountChildNodes() const;
28   virtual CFDE_XMLNode* GetChildNode(int32_t index) const;
29   virtual int32_t GetChildNodeIndex(CFDE_XMLNode* pNode) const;
30   virtual CFDE_XMLNode* GetPath(const FX_WCHAR* pPath,
31                                 int32_t iLength = -1,
32                                 FX_BOOL bQualifiedName = TRUE) const;
33   virtual int32_t InsertChildNode(CFDE_XMLNode* pNode, int32_t index = -1);
34   virtual void RemoveChildNode(CFDE_XMLNode* pNode);
35   virtual void DeleteChildren();
36   virtual CFDE_XMLNode* GetNodeItem(IFDE_XMLNode::NodeItem eItem) const;
37   virtual int32_t GetNodeLevel() const;
38   virtual FX_BOOL InsertNodeItem(IFDE_XMLNode::NodeItem eItem,
39                                  CFDE_XMLNode* pNode);
40   virtual CFDE_XMLNode* RemoveNodeItem(IFDE_XMLNode::NodeItem eItem);
41   virtual CFDE_XMLNode* Clone(FX_BOOL bRecursive);
42   virtual void SaveXMLNode(IFX_Stream* pXMLStream);
43 
44  public:
45   ~CFDE_XMLNode();
46   void CloneChildren(CFDE_XMLNode* pClone);
47   CFDE_XMLNode* m_pParent;
48   CFDE_XMLNode* m_pChild;
49   CFDE_XMLNode* m_pPrior;
50   CFDE_XMLNode* m_pNext;
51 };
52 class CFDE_XMLInstruction : public CFDE_XMLNode {
53  public:
54   CFDE_XMLInstruction(const CFX_WideString& wsTarget);
Release()55   virtual void Release() { delete this; }
GetType()56   virtual FDE_XMLNODETYPE GetType() const { return FDE_XMLNODE_Instruction; }
57   virtual CFDE_XMLNode* Clone(FX_BOOL bRecursive);
GetTargetName(CFX_WideString & wsTarget)58   virtual void GetTargetName(CFX_WideString& wsTarget) const {
59     wsTarget = m_wsTarget;
60   }
61   virtual int32_t CountAttributes() const;
62   virtual FX_BOOL GetAttribute(int32_t index,
63                                CFX_WideString& wsAttriName,
64                                CFX_WideString& wsAttriValue) const;
65   virtual FX_BOOL HasAttribute(const FX_WCHAR* pwsAttriName) const;
66   virtual void GetString(const FX_WCHAR* pwsAttriName,
67                          CFX_WideString& wsAttriValue,
68                          const FX_WCHAR* pwsDefValue = NULL) const;
69   virtual void SetString(const CFX_WideString& wsAttriName,
70                          const CFX_WideString& wsAttriValue);
71   virtual int32_t GetInteger(const FX_WCHAR* pwsAttriName,
72                              int32_t iDefValue = 0) const;
73   virtual void SetInteger(const FX_WCHAR* pwsAttriName, int32_t iAttriValue);
74   virtual FX_FLOAT GetFloat(const FX_WCHAR* pwsAttriName,
75                             FX_FLOAT fDefValue = 0) const;
76   virtual void SetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fAttriValue);
77   virtual void RemoveAttribute(const FX_WCHAR* pwsAttriName);
78   virtual int32_t CountData() const;
79   virtual FX_BOOL GetData(int32_t index, CFX_WideString& wsData) const;
80   virtual void AppendData(const CFX_WideString& wsData);
81   virtual void RemoveData(int32_t index);
82 
83  public:
~CFDE_XMLInstruction()84   ~CFDE_XMLInstruction() {}
85   CFX_WideString m_wsTarget;
86   CFX_WideStringArray m_Attributes;
87   CFX_WideStringArray m_TargetData;
88 };
89 class CFDE_XMLElement : public CFDE_XMLNode {
90  public:
91   CFDE_XMLElement(const CFX_WideString& wsTag);
Release()92   virtual void Release() { delete this; }
GetType()93   virtual FDE_XMLNODETYPE GetType() const { return FDE_XMLNODE_Element; }
94   virtual CFDE_XMLNode* Clone(FX_BOOL bRecursive);
95   virtual void GetTagName(CFX_WideString& wsTag) const;
96   virtual void GetLocalTagName(CFX_WideString& wsTag) const;
97   virtual void GetNamespacePrefix(CFX_WideString& wsPrefix) const;
98   virtual void GetNamespaceURI(CFX_WideString& wsNamespace) const;
99   virtual int32_t CountAttributes() const;
100   virtual FX_BOOL GetAttribute(int32_t index,
101                                CFX_WideString& wsAttriName,
102                                CFX_WideString& wsAttriValue) const;
103   virtual FX_BOOL HasAttribute(const FX_WCHAR* pwsAttriName) const;
104   virtual void GetString(const FX_WCHAR* pwsAttriName,
105                          CFX_WideString& wsAttriValue,
106                          const FX_WCHAR* pwsDefValue = NULL) const;
107   virtual void SetString(const CFX_WideString& wsAttriName,
108                          const CFX_WideString& wsAttriValue);
109   virtual int32_t GetInteger(const FX_WCHAR* pwsAttriName,
110                              int32_t iDefValue = 0) const;
111   virtual void SetInteger(const FX_WCHAR* pwsAttriName, int32_t iAttriValue);
112   virtual FX_FLOAT GetFloat(const FX_WCHAR* pwsAttriName,
113                             FX_FLOAT fDefValue = 0) const;
114   virtual void SetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fAttriValue);
115   virtual void RemoveAttribute(const FX_WCHAR* pwsAttriName);
116   virtual void GetTextData(CFX_WideString& wsText) const;
117   virtual void SetTextData(const CFX_WideString& wsText);
118 
119  public:
120   ~CFDE_XMLElement();
121   CFX_WideString m_wsTag;
122   CFX_WideStringArray m_Attributes;
123 };
124 class CFDE_XMLText : public CFDE_XMLNode {
125  public:
126   CFDE_XMLText(const CFX_WideString& wsText);
Release()127   virtual void Release() { delete this; }
GetType()128   virtual FDE_XMLNODETYPE GetType() const { return FDE_XMLNODE_Text; }
129   virtual CFDE_XMLNode* Clone(FX_BOOL bRecursive);
GetText(CFX_WideString & wsText)130   virtual void GetText(CFX_WideString& wsText) const { wsText = m_wsText; }
SetText(const CFX_WideString & wsText)131   virtual void SetText(const CFX_WideString& wsText) { m_wsText = wsText; }
132 
133  public:
~CFDE_XMLText()134   ~CFDE_XMLText() {}
135   CFX_WideString m_wsText;
136 };
137 class CFDE_XMLDeclaration : public CFDE_XMLNode {
138  public:
CFDE_XMLDeclaration()139   CFDE_XMLDeclaration() : CFDE_XMLNode() {}
140 };
141 class CFDE_XMLCharData : public CFDE_XMLDeclaration {
142  public:
143   CFDE_XMLCharData(const CFX_WideString& wsCData);
144 
Release()145   virtual void Release() { delete this; }
GetType()146   virtual FDE_XMLNODETYPE GetType() const { return FDE_XMLNODE_CharData; }
147   virtual CFDE_XMLNode* Clone(FX_BOOL bRecursive);
GetCharData(CFX_WideString & wsCharData)148   virtual void GetCharData(CFX_WideString& wsCharData) const {
149     wsCharData = m_wsCharData;
150   }
SetCharData(const CFX_WideString & wsCData)151   virtual void SetCharData(const CFX_WideString& wsCData) {
152     m_wsCharData = wsCData;
153   }
154 
155  public:
~CFDE_XMLCharData()156   ~CFDE_XMLCharData() {}
157 
158   CFX_WideString m_wsCharData;
159 };
160 class CFDE_XMLDoc : public CFX_Target {
161  public:
162   CFDE_XMLDoc();
163   ~CFDE_XMLDoc();
Release()164   virtual void Release() { delete this; }
165   virtual FX_BOOL LoadXML(IFX_Stream* pXMLStream,
166                           int32_t iXMLPlaneSize = 8192,
167                           int32_t iTextDataSize = 256,
168                           FDE_LPXMLREADERHANDLER pHandler = NULL);
169   virtual FX_BOOL LoadXML(IFDE_XMLParser* pXMLParser);
170   virtual int32_t DoLoad(IFX_Pause* pPause = NULL);
171   virtual void CloseXML();
GetRoot()172   virtual CFDE_XMLNode* GetRoot() const { return m_pRoot; }
173   virtual void SaveXML(IFX_Stream* pXMLStream = NULL, FX_BOOL bSaveBOM = TRUE);
174   virtual void SaveXMLNode(IFX_Stream* pXMLStream, IFDE_XMLNode* pNode);
175 
176  protected:
177   IFX_Stream* m_pStream;
178   int32_t m_iStatus;
179   CFDE_XMLNode* m_pRoot;
180   IFDE_XMLSyntaxParser* m_pSyntaxParser;
181   IFDE_XMLParser* m_pXMLParser;
182   void Reset(FX_BOOL bInitRoot);
183   void ReleaseParser();
184 };
185 typedef CFX_StackTemplate<CFDE_XMLNode*> CFDE_XMLDOMNodeStack;
186 class CFDE_XMLDOMParser : public IFDE_XMLParser, public CFX_Target {
187  public:
188   CFDE_XMLDOMParser(CFDE_XMLNode* pRoot, IFDE_XMLSyntaxParser* pParser);
189   ~CFDE_XMLDOMParser();
190 
Release()191   virtual void Release() { delete this; }
192   virtual int32_t DoParser(IFX_Pause* pPause);
193 
194  private:
195   IFDE_XMLSyntaxParser* m_pParser;
196   CFDE_XMLNode* m_pParent;
197   CFDE_XMLNode* m_pChild;
198   CFDE_XMLDOMNodeStack m_NodeStack;
199   CFX_WideString m_ws1;
200   CFX_WideString m_ws2;
201 };
202 class CFDE_XMLTAG : public CFX_Target {
203  public:
CFDE_XMLTAG()204   CFDE_XMLTAG() : eType(FDE_XMLNODE_Unknown) {}
CFDE_XMLTAG(const CFDE_XMLTAG & src)205   CFDE_XMLTAG(const CFDE_XMLTAG& src)
206       : wsTagName(src.wsTagName), eType(src.eType) {}
207   CFX_WideString wsTagName;
208   FDE_XMLNODETYPE eType;
209 };
210 typedef CFX_ObjectStackTemplate<CFDE_XMLTAG> CFDE_XMLTagStack;
211 class CFDE_XMLSAXParser : public IFDE_XMLParser, public CFX_Target {
212  public:
213   CFDE_XMLSAXParser(FDE_LPXMLREADERHANDLER pHandler,
214                     IFDE_XMLSyntaxParser* pParser);
215   ~CFDE_XMLSAXParser();
216 
Release()217   virtual void Release() { delete this; }
218   virtual int32_t DoParser(IFX_Pause* pPause);
219 
220  private:
221   void Push(const CFDE_XMLTAG& xmlTag);
222   void Pop();
223   FDE_LPXMLREADERHANDLER m_pHandler;
224   IFDE_XMLSyntaxParser* m_pParser;
225   CFDE_XMLTagStack m_TagStack;
226   CFDE_XMLTAG* m_pTagTop;
227   CFX_WideString m_ws1;
228   CFX_WideString m_ws2;
229 };
230 #ifdef _FDE_BLOCK_BUFFER
231 class CFDE_BlockBuffer : public CFX_Target {
232  public:
233   CFDE_BlockBuffer(int32_t iAllocStep = 1024 * 1024);
234   ~CFDE_BlockBuffer();
235 
236   FX_BOOL InitBuffer(int32_t iBufferSize = 1024 * 1024);
IsInitialized()237   FX_BOOL IsInitialized() { return m_iBufferSize / m_iAllocStep >= 1; }
ReleaseBuffer()238   void ReleaseBuffer() { delete this; }
239   FX_WCHAR* GetAvailableBlock(int32_t& iIndexInBlock);
GetAllocStep()240   inline int32_t GetAllocStep() const { return m_iAllocStep; }
GetDataLengthRef()241   inline int32_t& GetDataLengthRef() { return m_iDataLength; }
242   inline void Reset(FX_BOOL bReserveData = TRUE) {
243     if (!bReserveData) {
244       m_iStartPosition = 0;
245     }
246     m_iDataLength = 0;
247   }
248   void SetTextChar(int32_t iIndex, FX_WCHAR ch);
249   int32_t DeleteTextChars(int32_t iCount, FX_BOOL bDirection = TRUE);
250   void GetTextData(CFX_WideString& wsTextData,
251                    int32_t iStart = 0,
252                    int32_t iLength = -1) const;
253 
254  protected:
255   inline void TextDataIndex2BufIndex(const int32_t iIndex,
256                                      int32_t& iBlockIndex,
257                                      int32_t& iInnerIndex) const;
258   void ClearBuffer();
259   CFX_PtrArray m_BlockArray;
260   int32_t m_iDataLength;
261   int32_t m_iBufferSize;
262   int32_t m_iAllocStep;
263   int32_t m_iStartPosition;
264 };
265 #endif
266 #define FDE_XMLSYNTAXMODE_Text 0
267 #define FDE_XMLSYNTAXMODE_Node 1
268 #define FDE_XMLSYNTAXMODE_Target 2
269 #define FDE_XMLSYNTAXMODE_Tag 3
270 #define FDE_XMLSYNTAXMODE_AttriName 4
271 #define FDE_XMLSYNTAXMODE_AttriEqualSign 5
272 #define FDE_XMLSYNTAXMODE_AttriQuotation 6
273 #define FDE_XMLSYNTAXMODE_AttriValue 7
274 #define FDE_XMLSYNTAXMODE_Entity 8
275 #define FDE_XMLSYNTAXMODE_EntityDecimal 9
276 #define FDE_XMLSYNTAXMODE_EntityHex 10
277 #define FDE_XMLSYNTAXMODE_CloseInstruction 11
278 #define FDE_XMLSYNTAXMODE_BreakElement 12
279 #define FDE_XMLSYNTAXMODE_CloseElement 13
280 #define FDE_XMLSYNTAXMODE_SkipDeclNode 14
281 #define FDE_XMLSYNTAXMODE_DeclCharData 15
282 #define FDE_XMLSYNTAXMODE_SkipComment 16
283 #define FDE_XMLSYNTAXMODE_SkipCommentOrDecl 17
284 #define FDE_XMLSYNTAXMODE_TargetData 18
285 class CFDE_XMLSyntaxParser : public IFDE_XMLSyntaxParser, public CFX_Target {
286  public:
287   CFDE_XMLSyntaxParser();
288   ~CFDE_XMLSyntaxParser();
Release()289   virtual void Release() { delete this; }
290   virtual void Init(IFX_Stream* pStream,
291                     int32_t iXMLPlaneSize,
292                     int32_t iTextDataSize = 256);
293   virtual FX_DWORD DoSyntaxParse();
294   virtual int32_t GetStatus() const;
GetCurrentPos()295   virtual int32_t GetCurrentPos() const {
296     return m_iParsedChars + (m_pStart - m_pBuffer);
297   }
298   virtual FX_FILESIZE GetCurrentBinaryPos() const;
GetCurrentNodeNumber()299   virtual int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; }
GetLastNodeNumber()300   virtual int32_t GetLastNodeNumber() const { return m_iLastNodeNum; }
301 #ifdef _FDE_BLOCK_BUFFER
GetTargetName(CFX_WideString & wsTarget)302   virtual void GetTargetName(CFX_WideString& wsTarget) const {
303     m_BlockBuffer.GetTextData(wsTarget, 0, m_iTextDataLength);
304   }
GetTagName(CFX_WideString & wsTag)305   virtual void GetTagName(CFX_WideString& wsTag) const {
306     m_BlockBuffer.GetTextData(wsTag, 0, m_iTextDataLength);
307   }
GetAttributeName(CFX_WideString & wsAttriName)308   virtual void GetAttributeName(CFX_WideString& wsAttriName) const {
309     m_BlockBuffer.GetTextData(wsAttriName, 0, m_iTextDataLength);
310   }
GetAttributeValue(CFX_WideString & wsAttriValue)311   virtual void GetAttributeValue(CFX_WideString& wsAttriValue) const {
312     m_BlockBuffer.GetTextData(wsAttriValue, 0, m_iTextDataLength);
313   }
GetTextData(CFX_WideString & wsText)314   virtual void GetTextData(CFX_WideString& wsText) const {
315     m_BlockBuffer.GetTextData(wsText, 0, m_iTextDataLength);
316   }
GetTargetData(CFX_WideString & wsData)317   virtual void GetTargetData(CFX_WideString& wsData) const {
318     m_BlockBuffer.GetTextData(wsData, 0, m_iTextDataLength);
319   }
320 #else
GetTargetName(CFX_WideString & wsTarget)321   virtual void GetTargetName(CFX_WideString& wsTarget) const {
322     GetData(wsTarget);
323   }
GetTagName(CFX_WideString & wsTag)324   virtual void GetTagName(CFX_WideString& wsTag) const { GetData(wsTag); }
GetAttributeName(CFX_WideString & wsAttriName)325   virtual void GetAttributeName(CFX_WideString& wsAttriName) const {
326     GetData(wsAttriName);
327   }
GetAttributeValue(CFX_WideString & wsAttriValue)328   virtual void GetAttributeValue(CFX_WideString& wsAttriValue) const {
329     GetData(wsAttriValue);
330   }
GetTextData(CFX_WideString & wsText)331   virtual void GetTextData(CFX_WideString& wsText) const { GetData(wsText); }
GetTargetData(CFX_WideString & wsData)332   virtual void GetTargetData(CFX_WideString& wsData) const { GetData(wsData); }
333 #endif
334  protected:
335   IFX_Stream* m_pStream;
336   int32_t m_iXMLPlaneSize;
337   int32_t m_iCurrentPos;
338   int32_t m_iCurrentNodeNum;
339   int32_t m_iLastNodeNum;
340   int32_t m_iParsedChars;
341   int32_t m_iParsedBytes;
342   FX_WCHAR* m_pBuffer;
343   int32_t m_iBufferChars;
344   FX_BOOL m_bEOS;
345   FX_WCHAR* m_pStart;
346   FX_WCHAR* m_pEnd;
347   FDE_XMLNODE m_CurNode;
348   CFDE_XMLNodeStack m_XMLNodeStack;
349 #ifdef _FDE_BLOCK_BUFFER
350   CFDE_BlockBuffer m_BlockBuffer;
351   int32_t m_iAllocStep;
352   int32_t& m_iDataLength;
353   FX_WCHAR* m_pCurrentBlock;
354   int32_t m_iIndexInBlock;
355 #else
356   int32_t m_iTextDataSize;
357   FX_WCHAR* m_pwsTextData;
358   int32_t m_iDataPos;
359 #endif
360   int32_t m_iTextDataLength;
361   FX_DWORD m_dwStatus;
362   FX_DWORD m_dwMode;
363   FX_WCHAR m_wQuotationMark;
364   int32_t m_iEntityStart;
365   CFX_DWordStack m_SkipStack;
366   FX_WCHAR m_SkipChar;
367   inline void ParseTextChar(FX_WCHAR ch);
368 #ifndef _FDE_BLOCK_BUFFER
369   void ReallocTextDataBuffer();
370   void GetData(CFX_WideString& wsData) const;
371 #endif
372 };
373 #endif
374