1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
8 #define CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
9 
10 #include <algorithm>
11 #include <memory>
12 #include <vector>
13 
14 #include "core/fxcrt/string_pool_template.h"
15 #include "core/fxcrt/weak_ptr.h"
16 
17 class CPDF_CryptoHandler;
18 class CPDF_Dictionary;
19 class CPDF_IndirectObjectHolder;
20 class CPDF_Object;
21 class CPDF_ReadValidator;
22 class CPDF_Stream;
23 class IFX_SeekableReadStream;
24 
25 class CPDF_SyntaxParser {
26  public:
27   enum class ParseType { kStrict, kLoose };
28 
29   CPDF_SyntaxParser();
30   explicit CPDF_SyntaxParser(const WeakPtr<ByteStringPool>& pPool);
31   ~CPDF_SyntaxParser();
32 
33   void InitParser(const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
34                   uint32_t HeaderOffset);
35 
36   void InitParserWithValidator(const RetainPtr<CPDF_ReadValidator>& pValidator,
37                                uint32_t HeaderOffset);
38 
GetPos()39   FX_FILESIZE GetPos() const { return m_Pos; }
SetPos(FX_FILESIZE pos)40   void SetPos(FX_FILESIZE pos) { m_Pos = std::min(pos, m_FileLen); }
41 
42   std::unique_ptr<CPDF_Object> GetObjectBody(
43       CPDF_IndirectObjectHolder* pObjList);
44 
45   std::unique_ptr<CPDF_Object> GetIndirectObject(
46       CPDF_IndirectObjectHolder* pObjList,
47       ParseType parse_type);
48 
49   ByteString GetKeyword();
50   void ToNextLine();
51   void ToNextWord();
52   bool BackwardsSearchToWord(const ByteStringView& word, FX_FILESIZE limit);
53   FX_FILESIZE FindTag(const ByteStringView& tag, FX_FILESIZE limit);
54   bool ReadBlock(uint8_t* pBuf, uint32_t size);
55   bool GetCharAt(FX_FILESIZE pos, uint8_t& ch);
56   ByteString GetNextWord(bool* bIsNumber);
57   ByteString PeekNextWord(bool* bIsNumber);
58 
59   RetainPtr<IFX_SeekableReadStream> GetFileAccess() const;
60 
GetValidator()61   const RetainPtr<CPDF_ReadValidator>& GetValidator() const {
62     return m_pFileAccess;
63   }
64 
65  private:
66   friend class CPDF_Parser;
67   friend class CPDF_DataAvail;
68   friend class cpdf_syntax_parser_ReadHexString_Test;
69 
70   static const int kParserMaxRecursionDepth = 64;
71   static int s_CurrentRecursionDepth;
72 
73   uint32_t GetDirectNum();
74   bool ReadBlockAt(FX_FILESIZE read_pos);
75   bool GetNextChar(uint8_t& ch);
76   bool GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch);
77   void GetNextWordInternal(bool* bIsNumber);
78   bool IsWholeWord(FX_FILESIZE startpos,
79                    FX_FILESIZE limit,
80                    const ByteStringView& tag,
81                    bool checkKeyword);
82 
83   ByteString ReadString();
84   ByteString ReadHexString();
85   unsigned int ReadEOLMarkers(FX_FILESIZE pos);
86   std::unique_ptr<CPDF_Stream> ReadStream(
87       std::unique_ptr<CPDF_Dictionary> pDict);
88 
89   bool IsPositionRead(FX_FILESIZE pos) const;
90 
91   std::unique_ptr<CPDF_Object> GetObjectBodyInternal(
92       CPDF_IndirectObjectHolder* pObjList,
93       ParseType parse_type);
94 
95   FX_FILESIZE m_Pos;
96   RetainPtr<CPDF_ReadValidator> m_pFileAccess;
97   FX_FILESIZE m_HeaderOffset;
98   FX_FILESIZE m_FileLen;
99   std::vector<uint8_t> m_pFileBuf;
100   FX_FILESIZE m_BufOffset;
101   uint8_t m_WordBuffer[257];
102   uint32_t m_WordSize;
103   WeakPtr<ByteStringPool> m_pPool;
104 };
105 
106 #endif  // CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
107