1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FXCRT_XML_CFX_XMLPARSER_H_
8 #define CORE_FXCRT_XML_CFX_XMLPARSER_H_
9 
10 #include <memory>
11 #include <vector>
12 
13 #include "core/fxcrt/fx_memory_wrappers.h"
14 #include "core/fxcrt/fx_string.h"
15 #include "core/fxcrt/retain_ptr.h"
16 
17 class CFX_SeekableStreamProxy;
18 class CFX_XMLDocument;
19 class CFX_XMLElement;
20 class CFX_XMLNode;
21 class IFX_SeekableReadStream;
22 
23 class CFX_XMLParser final {
24  public:
25   static bool IsXMLNameChar(wchar_t ch, bool bFirstChar);
26 
27   explicit CFX_XMLParser(const RetainPtr<IFX_SeekableReadStream>& pStream);
28   ~CFX_XMLParser();
29 
30   std::unique_ptr<CFX_XMLDocument> Parse();
31 
32  private:
33   enum class FDE_XmlSyntaxState {
34     Text,
35     Node,
36     Target,
37     Tag,
38     AttriName,
39     AttriEqualSign,
40     AttriQuotation,
41     AttriValue,
42     CloseInstruction,
43     BreakElement,
44     CloseElement,
45     SkipDeclNode,
46     SkipComment,
47     SkipCommentOrDecl,
48     SkipCData,
49     TargetData
50   };
51 
52   bool DoSyntaxParse(CFX_XMLDocument* doc);
53   WideString GetTextData();
54   void ProcessTextChar(wchar_t ch);
55   void ProcessTargetData();
56 
57   CFX_XMLNode* current_node_ = nullptr;
58   RetainPtr<CFX_SeekableStreamProxy> stream_;
59   std::vector<wchar_t, FxAllocAllocator<wchar_t>> current_text_;
60   size_t xml_plane_size_ = 1024;
61   int32_t entity_start_ = -1;
62 };
63 
64 #endif  // CORE_FXCRT_XML_CFX_XMLPARSER_H_
65