1 // Copyright 2017 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FXCRT_XML_CFX_XMLSYNTAXPARSER_H_
8 #define CORE_FXCRT_XML_CFX_XMLSYNTAXPARSER_H_
9 
10 #include <stack>
11 #include <vector>
12 
13 #include "core/fxcrt/cfx_blockbuffer.h"
14 #include "core/fxcrt/cfx_seekablestreamproxy.h"
15 #include "core/fxcrt/fx_string.h"
16 #include "core/fxcrt/retain_ptr.h"
17 #include "core/fxcrt/xml/cfx_xmlnode.h"
18 
19 enum class FX_XmlSyntaxResult {
20   None,
21   InstructionOpen,
22   InstructionClose,
23   ElementOpen,
24   ElementBreak,
25   ElementClose,
26   TargetName,
27   TagName,
28   AttriName,
29   AttriValue,
30   Text,
31   CData,
32   TargetData,
33   Error,
34   EndOfString
35 };
36 
37 class CFX_XMLSyntaxParser {
38  public:
39   static bool IsXMLNameChar(wchar_t ch, bool bFirstChar);
40 
41   explicit CFX_XMLSyntaxParser(
42       const RetainPtr<CFX_SeekableStreamProxy>& pStream);
43   ~CFX_XMLSyntaxParser();
44 
45   FX_XmlSyntaxResult DoSyntaxParse();
46 
47   int32_t GetStatus() const;
GetCurrentPos()48   FX_FILESIZE GetCurrentPos() const { return m_ParsedChars + m_Start; }
49   FX_FILESIZE GetCurrentBinaryPos() const;
GetCurrentNodeNumber()50   int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; }
GetLastNodeNumber()51   int32_t GetLastNodeNumber() const { return m_iLastNodeNum; }
52 
GetTargetName()53   WideString GetTargetName() const {
54     return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
55   }
56 
GetTagName()57   WideString GetTagName() const {
58     return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
59   }
60 
GetAttributeName()61   WideString GetAttributeName() const {
62     return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
63   }
64 
GetAttributeValue()65   WideString GetAttributeValue() const {
66     return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
67   }
68 
GetTextData()69   WideString GetTextData() const {
70     return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
71   }
72 
GetTargetData()73   WideString GetTargetData() const {
74     return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
75   }
76 
77  protected:
78   enum class FDE_XmlSyntaxState {
79     Text,
80     Node,
81     Target,
82     Tag,
83     AttriName,
84     AttriEqualSign,
85     AttriQuotation,
86     AttriValue,
87     Entity,
88     EntityDecimal,
89     EntityHex,
90     CloseInstruction,
91     BreakElement,
92     CloseElement,
93     SkipDeclNode,
94     DeclCharData,
95     SkipComment,
96     SkipCommentOrDecl,
97     SkipCData,
98     TargetData
99   };
100 
101   void ParseTextChar(wchar_t ch);
102 
103   RetainPtr<CFX_SeekableStreamProxy> m_pStream;
104   size_t m_iXMLPlaneSize;
105   FX_FILESIZE m_iCurrentPos;
106   int32_t m_iCurrentNodeNum;
107   int32_t m_iLastNodeNum;
108   int32_t m_iParsedBytes;
109   FX_FILESIZE m_ParsedChars;
110   std::vector<wchar_t> m_Buffer;
111   size_t m_iBufferChars;
112   bool m_bEOS;
113   FX_FILESIZE m_Start;  // Start position in m_Buffer
114   FX_FILESIZE m_End;    // End position in m_Buffer
115   FX_XMLNODE m_CurNode;
116   std::stack<FX_XMLNODE> m_XMLNodeStack;
117   CFX_BlockBuffer m_BlockBuffer;
118   int32_t m_iAllocStep;
119   wchar_t* m_pCurrentBlock;  // Pointer into CFX_BlockBuffer
120   int32_t m_iIndexInBlock;
121   int32_t m_iTextDataLength;
122   FX_XmlSyntaxResult m_syntaxParserResult;
123   FDE_XmlSyntaxState m_syntaxParserState;
124   wchar_t m_wQuotationMark;
125   int32_t m_iEntityStart;
126   std::stack<wchar_t> m_SkipStack;
127   wchar_t m_SkipChar;
128 };
129 
130 #endif  // CORE_FXCRT_XML_CFX_XMLSYNTAXPARSER_H_
131