1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
8 #define CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
9 
10 #include <memory>
11 #include <vector>
12 
13 #include "core/fpdfapi/parser/cpdf_stream.h"
14 #include "core/fxcrt/string_pool_template.h"
15 #include "core/fxcrt/weak_ptr.h"
16 
17 class CPDF_CryptoHandler;
18 class CPDF_Dictionary;
19 class CPDF_IndirectObjectHolder;
20 class CPDF_Object;
21 class CPDF_ReadValidator;
22 class CPDF_Stream;
23 class IFX_SeekableReadStream;
24 
25 class CPDF_SyntaxParser {
26  public:
27   enum class ParseType { kStrict, kLoose };
28 
29   static std::unique_ptr<CPDF_SyntaxParser> CreateForTesting(
30       const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
31       FX_FILESIZE HeaderOffset);
32 
33   explicit CPDF_SyntaxParser(
34       const RetainPtr<IFX_SeekableReadStream>& pFileAccess);
35   CPDF_SyntaxParser(const RetainPtr<CPDF_ReadValidator>& pValidator,
36                     FX_FILESIZE HeaderOffset);
37   ~CPDF_SyntaxParser();
38 
SetReadBufferSize(uint32_t read_buffer_size)39   void SetReadBufferSize(uint32_t read_buffer_size) {
40     m_ReadBufferSize = read_buffer_size;
41   }
42 
GetPos()43   FX_FILESIZE GetPos() const { return m_Pos; }
44   void SetPos(FX_FILESIZE pos);
45 
46   RetainPtr<CPDF_Object> GetObjectBody(CPDF_IndirectObjectHolder* pObjList);
47 
48   RetainPtr<CPDF_Object> GetIndirectObject(CPDF_IndirectObjectHolder* pObjList,
49                                            ParseType parse_type);
50 
51   ByteString GetKeyword();
52   void ToNextLine();
53   void ToNextWord();
54   bool BackwardsSearchToWord(ByteStringView word, FX_FILESIZE limit);
55   FX_FILESIZE FindTag(ByteStringView tag);
56   bool ReadBlock(uint8_t* pBuf, uint32_t size);
57   bool GetCharAt(FX_FILESIZE pos, uint8_t& ch);
58   ByteString GetNextWord(bool* bIsNumber);
59   ByteString PeekNextWord(bool* bIsNumber);
60 
GetValidator()61   const RetainPtr<CPDF_ReadValidator>& GetValidator() const {
62     return m_pFileAccess;
63   }
64   uint32_t GetDirectNum();
65   bool GetNextChar(uint8_t& ch);
66 
67   // The document size may be smaller than the file size.
68   // The syntax parser use position relative to document
69   // offset (|m_HeaderOffset|).
70   // The document size will be FileSize - "Header offset".
71   // All offsets was readed from document, should not be great than document
72   // size. Use it for checks instead of real file size.
73   FX_FILESIZE GetDocumentSize() const;
74 
75   ByteString ReadString();
76   ByteString ReadHexString();
77 
78  private:
79   friend class CPDF_DataAvail;
80   friend class cpdf_syntax_parser_ReadHexString_Test;
81 
82   static const int kParserMaxRecursionDepth = 64;
83   static int s_CurrentRecursionDepth;
84 
85   bool ReadBlockAt(FX_FILESIZE read_pos);
86   bool GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch);
87   void GetNextWordInternal(bool* bIsNumber);
88   bool IsWholeWord(FX_FILESIZE startpos,
89                    FX_FILESIZE limit,
90                    ByteStringView tag,
91                    bool checkKeyword);
92 
93   unsigned int ReadEOLMarkers(FX_FILESIZE pos);
94   FX_FILESIZE FindWordPos(ByteStringView word);
95   FX_FILESIZE FindStreamEndPos();
96   RetainPtr<CPDF_Stream> ReadStream(RetainPtr<CPDF_Dictionary> pDict);
97 
98   bool IsPositionRead(FX_FILESIZE pos) const;
99 
100   RetainPtr<CPDF_Object> GetObjectBodyInternal(
101       CPDF_IndirectObjectHolder* pObjList,
102       ParseType parse_type);
103 
104   RetainPtr<CPDF_ReadValidator> m_pFileAccess;
105   // The syntax parser use position relative to header offset.
106   // The header contains at file start, and can follow after some stuff. We
107   // ignore this stuff.
108   const FX_FILESIZE m_HeaderOffset;
109   const FX_FILESIZE m_FileLen;
110   FX_FILESIZE m_Pos = 0;
111   WeakPtr<ByteStringPool> m_pPool;
112   std::vector<uint8_t> m_pFileBuf;
113   FX_FILESIZE m_BufOffset = 0;
114   uint32_t m_WordSize = 0;
115   uint8_t m_WordBuffer[257];
116   uint32_t m_ReadBufferSize = CPDF_Stream::kFileBufSize;
117 };
118 
119 #endif  // CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
120