1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
8 #define CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
9 
10 #include <map>
11 #include <memory>
12 #include <set>
13 #include <utility>
14 #include <vector>
15 
16 #include "core/fpdfapi/parser/cpdf_parser.h"
17 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
18 #include "core/fxcrt/unowned_ptr.h"
19 
20 class CPDF_CrossRefAvail;
21 class CPDF_Dictionary;
22 class CPDF_HintTables;
23 class CPDF_IndirectObjectHolder;
24 class CPDF_LinearizedHeader;
25 class CPDF_PageObjectAvail;
26 class CPDF_Parser;
27 class CPDF_ReadValidator;
28 
29 enum PDF_DATAAVAIL_STATUS {
30   PDF_DATAAVAIL_HEADER = 0,
31   PDF_DATAAVAIL_FIRSTPAGE,
32   PDF_DATAAVAIL_HINTTABLE,
33   PDF_DATAAVAIL_LOADALLCROSSREF,
34   PDF_DATAAVAIL_ROOT,
35   PDF_DATAAVAIL_INFO,
36   PDF_DATAAVAIL_PAGETREE,
37   PDF_DATAAVAIL_PAGE,
38   PDF_DATAAVAIL_PAGE_LATERLOAD,
39   PDF_DATAAVAIL_RESOURCES,
40   PDF_DATAAVAIL_DONE,
41   PDF_DATAAVAIL_ERROR,
42   PDF_DATAAVAIL_LOADALLFILE,
43 };
44 
45 enum PDF_PAGENODE_TYPE {
46   PDF_PAGENODE_UNKNOWN = 0,
47   PDF_PAGENODE_PAGE,
48   PDF_PAGENODE_PAGES,
49   PDF_PAGENODE_ARRAY,
50 };
51 
52 class CPDF_DataAvail final {
53  public:
54   // Must match PDF_DATA_* definitions in public/fpdf_dataavail.h, but cannot
55   // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
56   // to make sure the two sets of values match.
57   enum DocAvailStatus {
58     DataError = -1,        // PDF_DATA_ERROR
59     DataNotAvailable = 0,  // PDF_DATA_NOTAVAIL
60     DataAvailable = 1,     // PDF_DATA_AVAIL
61   };
62 
63   // Must match PDF_*LINEAR* definitions in public/fpdf_dataavail.h, but cannot
64   // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
65   // to make sure the two sets of values match.
66   enum DocLinearizationStatus {
67     LinearizationUnknown = -1,  // PDF_LINEARIZATION_UNKNOWN
68     NotLinearized = 0,          // PDF_NOT_LINEARIZED
69     Linearized = 1,             // PDF_LINEARIZED
70   };
71 
72   // Must match PDF_FORM_* definitions in public/fpdf_dataavail.h, but cannot
73   // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
74   // to make sure the two sets of values match.
75   enum DocFormStatus {
76     FormError = -1,        // PDF_FORM_ERROR
77     FormNotAvailable = 0,  // PDF_FORM_NOTAVAIL
78     FormAvailable = 1,     // PDF_FORM_AVAIL
79     FormNotExist = 2,      // PDF_FORM_NOTEXIST
80   };
81 
82   class FileAvail {
83    public:
84     virtual ~FileAvail();
85     virtual bool IsDataAvail(FX_FILESIZE offset, size_t size) = 0;
86   };
87 
88   class DownloadHints {
89    public:
90     virtual ~DownloadHints();
91     virtual void AddSegment(FX_FILESIZE offset, size_t size) = 0;
92   };
93 
94   CPDF_DataAvail(FileAvail* pFileAvail,
95                  const RetainPtr<IFX_SeekableReadStream>& pFileRead,
96                  bool bSupportHintTable);
97   ~CPDF_DataAvail();
98 
99   DocAvailStatus IsDocAvail(DownloadHints* pHints);
100   DocAvailStatus IsPageAvail(uint32_t dwPage, DownloadHints* pHints);
101   DocFormStatus IsFormAvail(DownloadHints* pHints);
102   DocLinearizationStatus IsLinearizedPDF();
103   RetainPtr<IFX_SeekableReadStream> GetFileRead() const;
104   int GetPageCount() const;
105   CPDF_Dictionary* GetPage(int index);
106   RetainPtr<CPDF_ReadValidator> GetValidator() const;
107 
108   std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>> ParseDocument(
109       const char* password);
110 
GetHintTables()111   const CPDF_HintTables* GetHintTables() const { return m_pHintTables.get(); }
112 
113  protected:
114   class PageNode {
115    public:
116     PageNode();
117     ~PageNode();
118 
119     PDF_PAGENODE_TYPE m_type;
120     uint32_t m_dwPageNo;
121     std::vector<std::unique_ptr<PageNode>> m_ChildNodes;
122   };
123 
124   static const int kMaxPageRecursionDepth = 1024;
125 
126   bool CheckDocStatus();
127   bool CheckHeader();
128   bool CheckFirstPage();
129   bool CheckHintTables();
130   bool CheckRoot();
131   bool CheckInfo();
132   bool CheckPages();
133   bool CheckPage();
134   DocAvailStatus CheckResources(const CPDF_Dictionary* page);
135   DocFormStatus CheckAcroForm();
136   bool CheckPageStatus();
137 
138   DocAvailStatus CheckHeaderAndLinearized();
139   std::unique_ptr<CPDF_Object> ParseIndirectObjectAt(
140       FX_FILESIZE pos,
141       uint32_t objnum,
142       CPDF_IndirectObjectHolder* pObjList = nullptr);
143   std::unique_ptr<CPDF_Object> GetObject(uint32_t objnum,
144                                          bool* pExistInFile);
145   bool GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages);
146   bool PreparePageItem();
147   bool LoadPages();
148   bool CheckAndLoadAllXref();
149   bool LoadAllFile();
150   DocAvailStatus CheckLinearizedData();
151 
152   bool CheckPage(uint32_t dwPage);
153   bool LoadDocPages();
154   bool LoadDocPage(uint32_t dwPage);
155   bool CheckPageNode(const PageNode& pageNode,
156                      int32_t iPage,
157                      int32_t& iCount,
158                      int level);
159   bool CheckUnknownPageNode(uint32_t dwPageNo, PageNode* pPageNode);
160   bool CheckArrayPageNode(uint32_t dwPageNo, PageNode* pPageNode);
161   bool CheckPageCount();
162   bool IsFirstCheck(uint32_t dwPage);
163   void ResetFirstCheck(uint32_t dwPage);
164   bool ValidatePage(uint32_t dwPage);
165   CPDF_SyntaxParser* GetSyntaxParser() const;
166 
167   FileAvail* const m_pFileAvail;
168   RetainPtr<CPDF_ReadValidator> m_pFileRead;
169   CPDF_Parser m_parser;
170   std::unique_ptr<CPDF_Object> m_pRoot;
171   uint32_t m_dwRootObjNum = 0;
172   uint32_t m_dwInfoObjNum = 0;
173   std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized;
174   bool m_bDocAvail = false;
175   std::unique_ptr<CPDF_CrossRefAvail> m_pCrossRefAvail;
176   PDF_DATAAVAIL_STATUS m_docStatus = PDF_DATAAVAIL_HEADER;
177   const FX_FILESIZE m_dwFileLen;
178   CPDF_Document* m_pDocument = nullptr;
179   std::vector<uint32_t> m_PageObjList;
180   uint32_t m_PagesObjNum = 0;
181   bool m_bLinearedDataOK = false;
182   bool m_bMainXRefLoadTried = false;
183   bool m_bMainXRefLoadedOK = false;
184   bool m_bPagesTreeLoad = false;
185   bool m_bPagesLoad = false;
186   CPDF_Parser* m_pCurrentParser = nullptr;
187   std::unique_ptr<CPDF_PageObjectAvail> m_pFormAvail;
188   std::vector<std::unique_ptr<CPDF_Object>> m_PagesArray;
189   uint32_t m_dwEncryptObjNum = 0;
190   bool m_bTotalLoadPageTree = false;
191   bool m_bCurPageDictLoadOK = false;
192   PageNode m_PageNode;
193   std::set<uint32_t> m_pageMapCheckState;
194   std::set<uint32_t> m_pagesLoadState;
195   std::set<uint32_t> m_SeenPrevPositions;
196   std::unique_ptr<CPDF_HintTables> m_pHintTables;
197   const bool m_bSupportHintTable;
198   std::map<uint32_t, std::unique_ptr<CPDF_PageObjectAvail>> m_PagesObjAvail;
199   std::map<const CPDF_Object*, std::unique_ptr<CPDF_PageObjectAvail>>
200       m_PagesResourcesAvail;
201   bool m_bHeaderAvail = false;
202 };
203 
204 #endif  // CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
205