1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #ifndef CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
8 #define CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
9 
10 #include <map>
11 #include <memory>
12 #include <set>
13 #include <utility>
14 #include <vector>
15 
16 #include "core/fpdfapi/parser/cpdf_document.h"
17 #include "core/fpdfapi/parser/cpdf_parser.h"
18 #include "core/fxcrt/unowned_ptr.h"
19 
20 class CPDF_CrossRefAvail;
21 class CPDF_Dictionary;
22 class CPDF_HintTables;
23 class CPDF_IndirectObjectHolder;
24 class CPDF_LinearizedHeader;
25 class CPDF_PageObjectAvail;
26 class CPDF_ReadValidator;
27 class CPDF_SyntaxParser;
28 
29 enum PDF_DATAAVAIL_STATUS {
30   PDF_DATAAVAIL_HEADER = 0,
31   PDF_DATAAVAIL_FIRSTPAGE,
32   PDF_DATAAVAIL_HINTTABLE,
33   PDF_DATAAVAIL_LOADALLCROSSREF,
34   PDF_DATAAVAIL_ROOT,
35   PDF_DATAAVAIL_INFO,
36   PDF_DATAAVAIL_PAGETREE,
37   PDF_DATAAVAIL_PAGE,
38   PDF_DATAAVAIL_PAGE_LATERLOAD,
39   PDF_DATAAVAIL_RESOURCES,
40   PDF_DATAAVAIL_DONE,
41   PDF_DATAAVAIL_ERROR,
42   PDF_DATAAVAIL_LOADALLFILE,
43 };
44 
45 enum PDF_PAGENODE_TYPE {
46   PDF_PAGENODE_UNKNOWN = 0,
47   PDF_PAGENODE_PAGE,
48   PDF_PAGENODE_PAGES,
49   PDF_PAGENODE_ARRAY,
50 };
51 
52 class CPDF_DataAvail final : public Observable::ObserverIface {
53  public:
54   // Must match PDF_DATA_* definitions in public/fpdf_dataavail.h, but cannot
55   // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
56   // to make sure the two sets of values match.
57   enum DocAvailStatus {
58     DataError = -1,        // PDF_DATA_ERROR
59     DataNotAvailable = 0,  // PDF_DATA_NOTAVAIL
60     DataAvailable = 1,     // PDF_DATA_AVAIL
61   };
62 
63   // Must match PDF_*LINEAR* definitions in public/fpdf_dataavail.h, but cannot
64   // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
65   // to make sure the two sets of values match.
66   enum DocLinearizationStatus {
67     LinearizationUnknown = -1,  // PDF_LINEARIZATION_UNKNOWN
68     NotLinearized = 0,          // PDF_NOT_LINEARIZED
69     Linearized = 1,             // PDF_LINEARIZED
70   };
71 
72   // Must match PDF_FORM_* definitions in public/fpdf_dataavail.h, but cannot
73   // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
74   // to make sure the two sets of values match.
75   enum DocFormStatus {
76     FormError = -1,        // PDF_FORM_ERROR
77     FormNotAvailable = 0,  // PDF_FORM_NOTAVAIL
78     FormAvailable = 1,     // PDF_FORM_AVAIL
79     FormNotExist = 2,      // PDF_FORM_NOTEXIST
80   };
81 
82   class FileAvail {
83    public:
84     virtual ~FileAvail();
85     virtual bool IsDataAvail(FX_FILESIZE offset, size_t size) = 0;
86   };
87 
88   class DownloadHints {
89    public:
90     virtual ~DownloadHints();
91     virtual void AddSegment(FX_FILESIZE offset, size_t size) = 0;
92   };
93 
94   CPDF_DataAvail(FileAvail* pFileAvail,
95                  const RetainPtr<IFX_SeekableReadStream>& pFileRead,
96                  bool bSupportHintTable);
97   ~CPDF_DataAvail() override;
98 
99   // CPDF_Document::Observer:
100   void OnObservableDestroyed() override;
101 
102   DocAvailStatus IsDocAvail(DownloadHints* pHints);
103   DocAvailStatus IsPageAvail(uint32_t dwPage, DownloadHints* pHints);
104   DocFormStatus IsFormAvail(DownloadHints* pHints);
105   DocLinearizationStatus IsLinearizedPDF();
106   int GetPageCount() const;
107   CPDF_Dictionary* GetPageDictionary(int index) const;
108   RetainPtr<CPDF_ReadValidator> GetValidator() const;
109 
110   std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>> ParseDocument(
111       std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,
112       std::unique_ptr<CPDF_Document::PageDataIface> pPageData,
113       const char* password);
114 
GetHintTables()115   const CPDF_HintTables* GetHintTables() const { return m_pHintTables.get(); }
116 
117  private:
118   class PageNode {
119    public:
120     PageNode();
121     ~PageNode();
122 
123     PDF_PAGENODE_TYPE m_type;
124     uint32_t m_dwPageNo;
125     std::vector<std::unique_ptr<PageNode>> m_ChildNodes;
126   };
127 
128   static const int kMaxPageRecursionDepth = 1024;
129 
130   bool CheckDocStatus();
131   bool CheckHeader();
132   bool CheckFirstPage();
133   bool CheckHintTables();
134   bool CheckRoot();
135   bool CheckInfo();
136   bool CheckPages();
137   bool CheckPage();
138   DocAvailStatus CheckResources(CPDF_Dictionary* page);
139   DocFormStatus CheckAcroForm();
140   bool CheckPageStatus();
141 
142   DocAvailStatus CheckHeaderAndLinearized();
143   RetainPtr<CPDF_Object> ParseIndirectObjectAt(
144       FX_FILESIZE pos,
145       uint32_t objnum,
146       CPDF_IndirectObjectHolder* pObjList) const;
147   RetainPtr<CPDF_Object> GetObject(uint32_t objnum, bool* pExistInFile);
148   bool GetPageKids(CPDF_Object* pPages);
149   bool PreparePageItem();
150   bool LoadPages();
151   bool CheckAndLoadAllXref();
152   bool LoadAllFile();
153   DocAvailStatus CheckLinearizedData();
154 
155   bool CheckPage(uint32_t dwPage);
156   bool LoadDocPages();
157   bool LoadDocPage(uint32_t dwPage);
158   bool CheckPageNode(const PageNode& pageNode,
159                      int32_t iPage,
160                      int32_t& iCount,
161                      int level);
162   bool CheckUnknownPageNode(uint32_t dwPageNo, PageNode* pPageNode);
163   bool CheckArrayPageNode(uint32_t dwPageNo, PageNode* pPageNode);
164   bool CheckPageCount();
165   bool IsFirstCheck(uint32_t dwPage);
166   void ResetFirstCheck(uint32_t dwPage);
167   bool ValidatePage(uint32_t dwPage) const;
168   CPDF_SyntaxParser* GetSyntaxParser() const;
169 
170   RetainPtr<CPDF_ReadValidator> m_pFileRead;
171   CPDF_Parser m_parser;
172   RetainPtr<CPDF_Dictionary> m_pRoot;
173   std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized;
174   bool m_bDocAvail = false;
175   std::unique_ptr<CPDF_CrossRefAvail> m_pCrossRefAvail;
176   PDF_DATAAVAIL_STATUS m_docStatus = PDF_DATAAVAIL_HEADER;
177   const FX_FILESIZE m_dwFileLen;
178   UnownedPtr<CPDF_Document> m_pDocument;
179   std::vector<uint32_t> m_PageObjList;
180   uint32_t m_PagesObjNum = 0;
181   bool m_bLinearedDataOK = false;
182   bool m_bMainXRefLoadTried = false;
183   bool m_bMainXRefLoadedOK = false;
184   bool m_bPagesTreeLoad = false;
185   bool m_bPagesLoad = false;
186   std::unique_ptr<CPDF_PageObjectAvail> m_pFormAvail;
187   std::vector<RetainPtr<CPDF_Object>> m_PagesArray;
188   bool m_bTotalLoadPageTree = false;
189   bool m_bCurPageDictLoadOK = false;
190   PageNode m_PageNode;
191   std::set<uint32_t> m_pageMapCheckState;
192   std::set<uint32_t> m_pagesLoadState;
193   std::unique_ptr<CPDF_HintTables> m_pHintTables;
194   const bool m_bSupportHintTable;
195   std::map<uint32_t, std::unique_ptr<CPDF_PageObjectAvail>> m_PagesObjAvail;
196   std::map<const CPDF_Object*, std::unique_ptr<CPDF_PageObjectAvail>>
197       m_PagesResourcesAvail;
198   bool m_bHeaderAvail = false;
199 };
200 
201 #endif  // CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
202