1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_data_avail.h"
8 
9 #include <algorithm>
10 #include <memory>
11 #include <utility>
12 
13 #include "core/fpdfapi/parser/cpdf_array.h"
14 #include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_document.h"
17 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
18 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
19 #include "core/fpdfapi/parser/cpdf_name.h"
20 #include "core/fpdfapi/parser/cpdf_number.h"
21 #include "core/fpdfapi/parser/cpdf_page_object_avail.h"
22 #include "core/fpdfapi/parser/cpdf_read_validator.h"
23 #include "core/fpdfapi/parser/cpdf_reference.h"
24 #include "core/fpdfapi/parser/cpdf_stream.h"
25 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
26 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
27 #include "core/fxcrt/fx_extension.h"
28 #include "core/fxcrt/fx_safe_types.h"
29 #include "third_party/base/compiler_specific.h"
30 #include "third_party/base/numerics/safe_conversions.h"
31 #include "third_party/base/ptr_util.h"
32 #include "third_party/base/stl_util.h"
33 
34 namespace {
35 
36 // static
GetResourceObject(CPDF_Dictionary * pDict)37 CPDF_Object* GetResourceObject(CPDF_Dictionary* pDict) {
38   constexpr size_t kMaxHierarchyDepth = 64;
39   size_t depth = 0;
40 
41   CPDF_Dictionary* dictionary_to_check = pDict;
42   while (dictionary_to_check) {
43     CPDF_Object* result = dictionary_to_check->GetObjectFor("Resources");
44     if (result)
45       return result;
46     CPDF_Object* parent = dictionary_to_check->GetObjectFor("Parent");
47     dictionary_to_check = parent ? parent->GetDict() : nullptr;
48 
49     if (++depth > kMaxHierarchyDepth) {
50       // We have cycle in parents hierarchy.
51       return nullptr;
52     }
53   }
54   return nullptr;
55 }
56 
57 class HintsScope {
58  public:
HintsScope(RetainPtr<CPDF_ReadValidator> validator,CPDF_DataAvail::DownloadHints * hints)59   HintsScope(RetainPtr<CPDF_ReadValidator> validator,
60              CPDF_DataAvail::DownloadHints* hints)
61       : validator_(std::move(validator)) {
62     ASSERT(validator_);
63     validator_->SetDownloadHints(hints);
64   }
65 
~HintsScope()66   ~HintsScope() { validator_->SetDownloadHints(nullptr); }
67 
68  private:
69   RetainPtr<CPDF_ReadValidator> validator_;
70 };
71 
72 }  // namespace
73 
~FileAvail()74 CPDF_DataAvail::FileAvail::~FileAvail() {}
75 
~DownloadHints()76 CPDF_DataAvail::DownloadHints::~DownloadHints() {}
77 
CPDF_DataAvail(FileAvail * pFileAvail,const RetainPtr<IFX_SeekableReadStream> & pFileRead,bool bSupportHintTable)78 CPDF_DataAvail::CPDF_DataAvail(
79     FileAvail* pFileAvail,
80     const RetainPtr<IFX_SeekableReadStream>& pFileRead,
81     bool bSupportHintTable)
82     : m_pFileRead(
83           pdfium::MakeRetain<CPDF_ReadValidator>(pFileRead, pFileAvail)),
84       m_dwFileLen(m_pFileRead->GetSize()),
85       m_bSupportHintTable(bSupportHintTable) {}
86 
~CPDF_DataAvail()87 CPDF_DataAvail::~CPDF_DataAvail() {
88   m_pHintTables.reset();
89   if (m_pDocument)
90     m_pDocument->RemoveObserver(this);
91 }
92 
OnObservableDestroyed()93 void CPDF_DataAvail::OnObservableDestroyed() {
94   m_pDocument = nullptr;
95   m_pFormAvail.reset();
96   m_PagesArray.clear();
97   m_PagesObjAvail.clear();
98   m_PagesResourcesAvail.clear();
99 }
100 
IsDocAvail(DownloadHints * pHints)101 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
102     DownloadHints* pHints) {
103   if (!m_dwFileLen)
104     return DataError;
105 
106   const HintsScope hints_scope(GetValidator(), pHints);
107   while (!m_bDocAvail) {
108     if (!CheckDocStatus())
109       return DataNotAvailable;
110   }
111 
112   return DataAvailable;
113 }
114 
CheckDocStatus()115 bool CPDF_DataAvail::CheckDocStatus() {
116   switch (m_docStatus) {
117     case PDF_DATAAVAIL_HEADER:
118       return CheckHeader();
119     case PDF_DATAAVAIL_FIRSTPAGE:
120       return CheckFirstPage();
121     case PDF_DATAAVAIL_HINTTABLE:
122       return CheckHintTables();
123     case PDF_DATAAVAIL_LOADALLCROSSREF:
124       return CheckAndLoadAllXref();
125     case PDF_DATAAVAIL_LOADALLFILE:
126       return LoadAllFile();
127     case PDF_DATAAVAIL_ROOT:
128       return CheckRoot();
129     case PDF_DATAAVAIL_INFO:
130       return CheckInfo();
131     case PDF_DATAAVAIL_PAGETREE:
132       if (m_bTotalLoadPageTree)
133         return CheckPages();
134       return LoadDocPages();
135     case PDF_DATAAVAIL_PAGE:
136       if (m_bTotalLoadPageTree)
137         return CheckPage();
138       m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD;
139       return true;
140     case PDF_DATAAVAIL_ERROR:
141       return LoadAllFile();
142     case PDF_DATAAVAIL_PAGE_LATERLOAD:
143       m_docStatus = PDF_DATAAVAIL_PAGE;
144       FALLTHROUGH;
145     default:
146       m_bDocAvail = true;
147       return true;
148   }
149 }
150 
CheckPageStatus()151 bool CPDF_DataAvail::CheckPageStatus() {
152   switch (m_docStatus) {
153     case PDF_DATAAVAIL_PAGETREE:
154       return CheckPages();
155     case PDF_DATAAVAIL_PAGE:
156       return CheckPage();
157     case PDF_DATAAVAIL_ERROR:
158       return LoadAllFile();
159     default:
160       m_bPagesTreeLoad = true;
161       m_bPagesLoad = true;
162       return true;
163   }
164 }
165 
LoadAllFile()166 bool CPDF_DataAvail::LoadAllFile() {
167   if (GetValidator()->CheckWholeFileAndRequestIfUnavailable()) {
168     m_docStatus = PDF_DATAAVAIL_DONE;
169     return true;
170   }
171   return false;
172 }
173 
CheckAndLoadAllXref()174 bool CPDF_DataAvail::CheckAndLoadAllXref() {
175   if (!m_pCrossRefAvail) {
176     const CPDF_ReadValidator::Session read_session(GetValidator());
177     const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef();
178     if (GetValidator()->has_read_problems())
179       return false;
180 
181     if (last_xref_offset <= 0) {
182       m_docStatus = PDF_DATAAVAIL_ERROR;
183       return false;
184     }
185 
186     m_pCrossRefAvail = pdfium::MakeUnique<CPDF_CrossRefAvail>(GetSyntaxParser(),
187                                                               last_xref_offset);
188   }
189 
190   switch (m_pCrossRefAvail->CheckAvail()) {
191     case DocAvailStatus::DataAvailable:
192       break;
193     case DocAvailStatus::DataNotAvailable:
194       return false;
195     case DocAvailStatus::DataError:
196       m_docStatus = PDF_DATAAVAIL_ERROR;
197       return false;
198     default:
199       NOTREACHED();
200       return false;
201   }
202 
203   if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) &&
204       !m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) {
205     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
206     return false;
207   }
208 
209   m_docStatus = PDF_DATAAVAIL_ROOT;
210   return true;
211 }
212 
GetObject(uint32_t objnum,bool * pExistInFile)213 RetainPtr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum,
214                                                  bool* pExistInFile) {
215   CPDF_Parser* pParser = nullptr;
216 
217   if (pExistInFile)
218     *pExistInFile = true;
219 
220   pParser = m_pDocument ? m_pDocument->GetParser() : &m_parser;
221 
222   RetainPtr<CPDF_Object> pRet;
223   if (pParser) {
224     const CPDF_ReadValidator::Session read_session(GetValidator());
225     pRet = pParser->ParseIndirectObject(objnum);
226     if (GetValidator()->has_read_problems())
227       return nullptr;
228   }
229 
230   if (!pRet && pExistInFile)
231     *pExistInFile = false;
232 
233   return pRet;
234 }
235 
CheckInfo()236 bool CPDF_DataAvail::CheckInfo() {
237   const uint32_t dwInfoObjNum = m_parser.GetInfoObjNum();
238   if (dwInfoObjNum == CPDF_Object::kInvalidObjNum) {
239     m_docStatus = PDF_DATAAVAIL_PAGETREE;
240     return true;
241   }
242 
243   const CPDF_ReadValidator::Session read_session(GetValidator());
244   m_parser.ParseIndirectObject(dwInfoObjNum);
245   if (GetValidator()->has_read_problems())
246     return false;
247 
248   m_docStatus = PDF_DATAAVAIL_PAGETREE;
249   return true;
250 }
251 
CheckRoot()252 bool CPDF_DataAvail::CheckRoot() {
253   const uint32_t dwRootObjNum = m_parser.GetRootObjNum();
254   if (dwRootObjNum == CPDF_Object::kInvalidObjNum) {
255     m_docStatus = PDF_DATAAVAIL_ERROR;
256     return true;
257   }
258 
259   const CPDF_ReadValidator::Session read_session(GetValidator());
260   m_pRoot = ToDictionary(m_parser.ParseIndirectObject(dwRootObjNum));
261   if (GetValidator()->has_read_problems())
262     return false;
263 
264   const CPDF_Reference* pRef =
265       ToReference(m_pRoot ? m_pRoot->GetObjectFor("Pages") : nullptr);
266   if (!pRef) {
267     m_docStatus = PDF_DATAAVAIL_ERROR;
268     return false;
269   }
270 
271   m_PagesObjNum = pRef->GetRefObjNum();
272   m_docStatus = PDF_DATAAVAIL_INFO;
273   return true;
274 }
275 
PreparePageItem()276 bool CPDF_DataAvail::PreparePageItem() {
277   const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
278   const CPDF_Reference* pRef =
279       ToReference(pRoot ? pRoot->GetObjectFor("Pages") : nullptr);
280   if (!pRef) {
281     m_docStatus = PDF_DATAAVAIL_ERROR;
282     return false;
283   }
284 
285   m_PagesObjNum = pRef->GetRefObjNum();
286   m_docStatus = PDF_DATAAVAIL_PAGETREE;
287   return true;
288 }
289 
IsFirstCheck(uint32_t dwPage)290 bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
291   return m_pageMapCheckState.insert(dwPage).second;
292 }
293 
ResetFirstCheck(uint32_t dwPage)294 void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
295   m_pageMapCheckState.erase(dwPage);
296 }
297 
CheckPage()298 bool CPDF_DataAvail::CheckPage() {
299   std::vector<uint32_t> UnavailObjList;
300   for (uint32_t dwPageObjNum : m_PageObjList) {
301     bool bExists = false;
302     RetainPtr<CPDF_Object> pObj = GetObject(dwPageObjNum, &bExists);
303     if (!pObj) {
304       if (bExists)
305         UnavailObjList.push_back(dwPageObjNum);
306       continue;
307     }
308     CPDF_Array* pArray = ToArray(pObj.Get());
309     if (pArray) {
310       CPDF_ArrayLocker locker(pArray);
311       for (const auto& pArrayObj : locker) {
312         if (CPDF_Reference* pRef = ToReference(pArrayObj.Get()))
313           UnavailObjList.push_back(pRef->GetRefObjNum());
314       }
315     }
316     if (!pObj->IsDictionary())
317       continue;
318 
319     ByteString type = pObj->GetDict()->GetStringFor("Type");
320     if (type == "Pages") {
321       m_PagesArray.push_back(std::move(pObj));
322       continue;
323     }
324   }
325   m_PageObjList.clear();
326   if (!UnavailObjList.empty()) {
327     m_PageObjList = std::move(UnavailObjList);
328     return false;
329   }
330   size_t iPages = m_PagesArray.size();
331   for (size_t i = 0; i < iPages; ++i) {
332     RetainPtr<CPDF_Object> pPages = std::move(m_PagesArray[i]);
333     if (pPages && !GetPageKids(pPages.Get())) {
334       m_PagesArray.clear();
335       m_docStatus = PDF_DATAAVAIL_ERROR;
336       return false;
337     }
338   }
339   m_PagesArray.clear();
340   if (m_PageObjList.empty())
341     m_docStatus = PDF_DATAAVAIL_DONE;
342 
343   return true;
344 }
345 
GetPageKids(CPDF_Object * pPages)346 bool CPDF_DataAvail::GetPageKids(CPDF_Object* pPages) {
347   CPDF_Dictionary* pDict = pPages->GetDict();
348   CPDF_Object* pKids = pDict ? pDict->GetObjectFor("Kids") : nullptr;
349   if (!pKids)
350     return true;
351 
352   switch (pKids->GetType()) {
353     case CPDF_Object::kReference:
354       m_PageObjList.push_back(pKids->AsReference()->GetRefObjNum());
355       break;
356     case CPDF_Object::kArray: {
357       CPDF_Array* pKidsArray = pKids->AsArray();
358       for (size_t i = 0; i < pKidsArray->size(); ++i) {
359         if (CPDF_Reference* pRef = ToReference(pKidsArray->GetObjectAt(i)))
360           m_PageObjList.push_back(pRef->GetRefObjNum());
361       }
362       break;
363     }
364     default:
365       m_docStatus = PDF_DATAAVAIL_ERROR;
366       return false;
367   }
368   return true;
369 }
370 
CheckPages()371 bool CPDF_DataAvail::CheckPages() {
372   bool bExists = false;
373   RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
374   if (!bExists) {
375     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
376     return true;
377   }
378 
379   if (!pPages) {
380     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
381       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
382       return true;
383     }
384     return false;
385   }
386 
387   if (!GetPageKids(pPages.Get())) {
388     m_docStatus = PDF_DATAAVAIL_ERROR;
389     return false;
390   }
391 
392   m_docStatus = PDF_DATAAVAIL_PAGE;
393   return true;
394 }
395 
CheckHeader()396 bool CPDF_DataAvail::CheckHeader() {
397   switch (CheckHeaderAndLinearized()) {
398     case DocAvailStatus::DataAvailable:
399       m_docStatus = m_pLinearized ? PDF_DATAAVAIL_FIRSTPAGE
400                                   : PDF_DATAAVAIL_LOADALLCROSSREF;
401       return true;
402     case DocAvailStatus::DataNotAvailable:
403       return false;
404     case DocAvailStatus::DataError:
405       m_docStatus = PDF_DATAAVAIL_ERROR;
406       return true;
407     default:
408       NOTREACHED();
409       return false;
410   }
411 }
412 
CheckFirstPage()413 bool CPDF_DataAvail::CheckFirstPage() {
414   if (!m_pLinearized->GetFirstPageEndOffset() ||
415       !m_pLinearized->GetFileSize() ||
416       !m_pLinearized->GetMainXRefTableFirstEntryOffset()) {
417     m_docStatus = PDF_DATAAVAIL_ERROR;
418     return false;
419   }
420 
421   uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
422   dwEnd += 512;
423   if ((FX_FILESIZE)dwEnd > m_dwFileLen)
424     dwEnd = (uint32_t)m_dwFileLen;
425 
426   const FX_FILESIZE start_pos = m_dwFileLen > 1024 ? 1024 : m_dwFileLen;
427   const size_t data_size = dwEnd > 1024 ? static_cast<size_t>(dwEnd - 1024) : 0;
428   if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(start_pos,
429                                                              data_size))
430     return false;
431 
432   m_docStatus =
433       m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE;
434   return true;
435 }
436 
CheckHintTables()437 bool CPDF_DataAvail::CheckHintTables() {
438   const CPDF_ReadValidator::Session read_session(GetValidator());
439   m_pHintTables =
440       CPDF_HintTables::Parse(GetSyntaxParser(), m_pLinearized.get());
441 
442   if (GetValidator()->read_error()) {
443     m_docStatus = PDF_DATAAVAIL_ERROR;
444     return true;
445   }
446   if (GetValidator()->has_unavailable_data())
447     return false;
448 
449   m_docStatus = PDF_DATAAVAIL_DONE;
450   return true;
451 }
452 
ParseIndirectObjectAt(FX_FILESIZE pos,uint32_t objnum,CPDF_IndirectObjectHolder * pObjList) const453 RetainPtr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
454     FX_FILESIZE pos,
455     uint32_t objnum,
456     CPDF_IndirectObjectHolder* pObjList) const {
457   const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos();
458   GetSyntaxParser()->SetPos(pos);
459   RetainPtr<CPDF_Object> result = GetSyntaxParser()->GetIndirectObject(
460       pObjList, CPDF_SyntaxParser::ParseType::kLoose);
461   GetSyntaxParser()->SetPos(SavedPos);
462   return (result && (!objnum || result->GetObjNum() == objnum))
463              ? std::move(result)
464              : nullptr;
465 }
466 
IsLinearizedPDF()467 CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
468   switch (CheckHeaderAndLinearized()) {
469     case DocAvailStatus::DataAvailable:
470       return m_pLinearized ? DocLinearizationStatus::Linearized
471                            : DocLinearizationStatus::NotLinearized;
472     case DocAvailStatus::DataNotAvailable:
473       return DocLinearizationStatus::LinearizationUnknown;
474     case DocAvailStatus::DataError:
475       return DocLinearizationStatus::NotLinearized;
476     default:
477       NOTREACHED();
478       return DocLinearizationStatus::LinearizationUnknown;
479   }
480 }
481 
CheckHeaderAndLinearized()482 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() {
483   if (m_bHeaderAvail)
484     return DocAvailStatus::DataAvailable;
485 
486   const CPDF_ReadValidator::Session read_session(GetValidator());
487   const Optional<FX_FILESIZE> header_offset = GetHeaderOffset(GetValidator());
488   if (GetValidator()->has_read_problems())
489     return DocAvailStatus::DataNotAvailable;
490 
491   if (!header_offset)
492     return DocAvailStatus::DataError;
493 
494   m_parser.m_pSyntax =
495       pdfium::MakeUnique<CPDF_SyntaxParser>(GetValidator(), *header_offset);
496   m_pLinearized = m_parser.ParseLinearizedHeader();
497   if (GetValidator()->has_read_problems())
498     return DocAvailStatus::DataNotAvailable;
499 
500   m_bHeaderAvail = true;
501   return DocAvailStatus::DataAvailable;
502 }
503 
CheckPage(uint32_t dwPage)504 bool CPDF_DataAvail::CheckPage(uint32_t dwPage) {
505   while (true) {
506     switch (m_docStatus) {
507       case PDF_DATAAVAIL_PAGETREE:
508         if (!LoadDocPages())
509           return false;
510         break;
511       case PDF_DATAAVAIL_PAGE:
512         if (!LoadDocPage(dwPage))
513           return false;
514         break;
515       case PDF_DATAAVAIL_ERROR:
516         return LoadAllFile();
517       default:
518         m_bPagesTreeLoad = true;
519         m_bPagesLoad = true;
520         m_bCurPageDictLoadOK = true;
521         m_docStatus = PDF_DATAAVAIL_PAGE;
522         return true;
523     }
524   }
525 }
526 
CheckArrayPageNode(uint32_t dwPageNo,PageNode * pPageNode)527 bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
528                                         PageNode* pPageNode) {
529   bool bExists = false;
530   RetainPtr<CPDF_Object> pPages = GetObject(dwPageNo, &bExists);
531   if (!bExists) {
532     m_docStatus = PDF_DATAAVAIL_ERROR;
533     return false;
534   }
535 
536   if (!pPages)
537     return false;
538 
539   CPDF_Array* pArray = pPages->AsArray();
540   if (!pArray) {
541     m_docStatus = PDF_DATAAVAIL_ERROR;
542     return false;
543   }
544 
545   pPageNode->m_type = PDF_PAGENODE_PAGES;
546   for (size_t i = 0; i < pArray->size(); ++i) {
547     CPDF_Reference* pKid = ToReference(pArray->GetObjectAt(i));
548     if (!pKid)
549       continue;
550 
551     auto pNode = pdfium::MakeUnique<PageNode>();
552     pNode->m_dwPageNo = pKid->GetRefObjNum();
553     pPageNode->m_ChildNodes.push_back(std::move(pNode));
554   }
555   return true;
556 }
557 
CheckUnknownPageNode(uint32_t dwPageNo,PageNode * pPageNode)558 bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
559                                           PageNode* pPageNode) {
560   bool bExists = false;
561   RetainPtr<CPDF_Object> pPage = GetObject(dwPageNo, &bExists);
562   if (!bExists) {
563     m_docStatus = PDF_DATAAVAIL_ERROR;
564     return false;
565   }
566 
567   if (!pPage)
568     return false;
569 
570   if (pPage->IsArray()) {
571     pPageNode->m_dwPageNo = dwPageNo;
572     pPageNode->m_type = PDF_PAGENODE_ARRAY;
573     return true;
574   }
575 
576   if (!pPage->IsDictionary()) {
577     m_docStatus = PDF_DATAAVAIL_ERROR;
578     return false;
579   }
580 
581   pPageNode->m_dwPageNo = dwPageNo;
582   CPDF_Dictionary* pDict = pPage->GetDict();
583   const ByteString type = pDict->GetStringFor("Type");
584   if (type == "Page") {
585     pPageNode->m_type = PDF_PAGENODE_PAGE;
586     return true;
587   }
588 
589   if (type != "Pages") {
590     m_docStatus = PDF_DATAAVAIL_ERROR;
591     return false;
592   }
593 
594   pPageNode->m_type = PDF_PAGENODE_PAGES;
595   CPDF_Object* pKids = pDict->GetObjectFor("Kids");
596   if (!pKids) {
597     m_docStatus = PDF_DATAAVAIL_PAGE;
598     return true;
599   }
600 
601   switch (pKids->GetType()) {
602     case CPDF_Object::kReference: {
603       CPDF_Reference* pKid = pKids->AsReference();
604       auto pNode = pdfium::MakeUnique<PageNode>();
605       pNode->m_dwPageNo = pKid->GetRefObjNum();
606       pPageNode->m_ChildNodes.push_back(std::move(pNode));
607       break;
608     }
609     case CPDF_Object::kArray: {
610       CPDF_Array* pKidsArray = pKids->AsArray();
611       for (size_t i = 0; i < pKidsArray->size(); ++i) {
612         CPDF_Reference* pKid = ToReference(pKidsArray->GetObjectAt(i));
613         if (!pKid)
614           continue;
615 
616         auto pNode = pdfium::MakeUnique<PageNode>();
617         pNode->m_dwPageNo = pKid->GetRefObjNum();
618         pPageNode->m_ChildNodes.push_back(std::move(pNode));
619       }
620       break;
621     }
622     default:
623       break;
624   }
625   return true;
626 }
627 
CheckPageNode(const CPDF_DataAvail::PageNode & pageNode,int32_t iPage,int32_t & iCount,int level)628 bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode,
629                                    int32_t iPage,
630                                    int32_t& iCount,
631                                    int level) {
632   if (level >= kMaxPageRecursionDepth)
633     return false;
634 
635   int32_t iSize = pdfium::CollectionSize<int32_t>(pageNode.m_ChildNodes);
636   if (iSize <= 0 || iPage >= iSize) {
637     m_docStatus = PDF_DATAAVAIL_ERROR;
638     return false;
639   }
640   for (int32_t i = 0; i < iSize; ++i) {
641     PageNode* pNode = pageNode.m_ChildNodes[i].get();
642     if (!pNode)
643       continue;
644 
645     if (pNode->m_type == PDF_PAGENODE_UNKNOWN) {
646       // Updates the type for the unknown page node.
647       if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode))
648         return false;
649     }
650     if (pNode->m_type == PDF_PAGENODE_ARRAY) {
651       // Updates a more specific type for the array page node.
652       if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode))
653         return false;
654     }
655     switch (pNode->m_type) {
656       case PDF_PAGENODE_PAGE:
657         iCount++;
658         if (iPage == iCount && m_pDocument)
659           m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo);
660         break;
661       case PDF_PAGENODE_PAGES:
662         if (!CheckPageNode(*pNode, iPage, iCount, level + 1))
663           return false;
664         break;
665       case PDF_PAGENODE_UNKNOWN:
666       case PDF_PAGENODE_ARRAY:
667         // Already converted above, error if we get here.
668         return false;
669     }
670     if (iPage == iCount) {
671       m_docStatus = PDF_DATAAVAIL_DONE;
672       return true;
673     }
674   }
675   return true;
676 }
677 
LoadDocPage(uint32_t dwPage)678 bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) {
679   FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
680   int32_t iPage = safePage.ValueOrDie();
681   if (m_pDocument->GetPageCount() <= iPage ||
682       m_pDocument->IsPageLoaded(iPage)) {
683     m_docStatus = PDF_DATAAVAIL_DONE;
684     return true;
685   }
686   if (m_PageNode.m_type == PDF_PAGENODE_PAGE) {
687     m_docStatus = iPage == 0 ? PDF_DATAAVAIL_DONE : PDF_DATAAVAIL_ERROR;
688     return true;
689   }
690   int32_t iCount = -1;
691   return CheckPageNode(m_PageNode, iPage, iCount, 0);
692 }
693 
CheckPageCount()694 bool CPDF_DataAvail::CheckPageCount() {
695   bool bExists = false;
696   RetainPtr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
697   if (!bExists) {
698     m_docStatus = PDF_DATAAVAIL_ERROR;
699     return false;
700   }
701   if (!pPages)
702     return false;
703 
704   CPDF_Dictionary* pPagesDict = pPages->GetDict();
705   if (!pPagesDict) {
706     m_docStatus = PDF_DATAAVAIL_ERROR;
707     return false;
708   }
709   if (!pPagesDict->KeyExist("Kids"))
710     return true;
711 
712   return pPagesDict->GetIntegerFor("Count") > 0;
713 }
714 
LoadDocPages()715 bool CPDF_DataAvail::LoadDocPages() {
716   if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode))
717     return false;
718 
719   if (CheckPageCount()) {
720     m_docStatus = PDF_DATAAVAIL_PAGE;
721     return true;
722   }
723 
724   m_bTotalLoadPageTree = true;
725   return false;
726 }
727 
LoadPages()728 bool CPDF_DataAvail::LoadPages() {
729   while (!m_bPagesTreeLoad) {
730     if (!CheckPageStatus())
731       return false;
732   }
733 
734   if (m_bPagesLoad)
735     return true;
736 
737   m_pDocument->LoadPages();
738   return false;
739 }
740 
CheckLinearizedData()741 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() {
742   if (m_bLinearedDataOK)
743     return DataAvailable;
744   ASSERT(m_pLinearized);
745   if (!m_pLinearized->GetMainXRefTableFirstEntryOffset() || !m_pDocument ||
746       !m_pDocument->GetParser() || !m_pDocument->GetParser()->GetTrailer()) {
747     return DataError;
748   }
749 
750   if (!m_bMainXRefLoadTried) {
751     const FX_SAFE_FILESIZE prev =
752         m_pDocument->GetParser()->GetTrailer()->GetIntegerFor("Prev");
753     const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
754     if (main_xref_offset < 0)
755       return DataError;
756 
757     if (main_xref_offset == 0)
758       return DataAvailable;
759 
760     FX_SAFE_SIZE_T data_size = m_dwFileLen;
761     data_size -= main_xref_offset;
762     if (!data_size.IsValid())
763       return DataError;
764 
765     if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
766             main_xref_offset, data_size.ValueOrDie()))
767       return DataNotAvailable;
768 
769     CPDF_Parser::Error eRet =
770         m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
771     m_bMainXRefLoadTried = true;
772     if (eRet != CPDF_Parser::SUCCESS)
773       return DataError;
774 
775     if (!PreparePageItem())
776       return DataNotAvailable;
777 
778     m_bMainXRefLoadedOK = true;
779     m_bLinearedDataOK = true;
780   }
781 
782   return m_bLinearedDataOK ? DataAvailable : DataNotAvailable;
783 }
784 
IsPageAvail(uint32_t dwPage,DownloadHints * pHints)785 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
786     uint32_t dwPage,
787     DownloadHints* pHints) {
788   if (!m_pDocument)
789     return DataError;
790 
791   const FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
792   if (!safePage.IsValid())
793     return DataError;
794 
795   if (safePage.ValueOrDie() >= m_pDocument->GetPageCount()) {
796     // This is XFA page.
797     return DataAvailable;
798   }
799 
800   if (IsFirstCheck(dwPage)) {
801     m_bCurPageDictLoadOK = false;
802   }
803 
804   if (pdfium::ContainsKey(m_pagesLoadState, dwPage))
805     return DataAvailable;
806 
807   const HintsScope hints_scope(GetValidator(), pHints);
808   if (m_pLinearized) {
809     if (dwPage == m_pLinearized->GetFirstPageNo()) {
810       auto* pPageDict = m_pDocument->GetPageDictionary(safePage.ValueOrDie());
811       if (!pPageDict)
812         return DataError;
813 
814       auto page_num_obj = std::make_pair(
815           dwPage, pdfium::MakeUnique<CPDF_PageObjectAvail>(
816                       GetValidator(), m_pDocument.Get(), pPageDict));
817 
818       CPDF_PageObjectAvail* page_obj_avail =
819           m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
820       // TODO(art-snake): Check resources.
821       return page_obj_avail->CheckAvail();
822     }
823 
824     DocAvailStatus nResult = CheckLinearizedData();
825     if (nResult != DataAvailable)
826       return nResult;
827 
828     if (m_pHintTables) {
829       nResult = m_pHintTables->CheckPage(dwPage);
830       if (nResult != DataAvailable)
831         return nResult;
832       if (GetPageDictionary(dwPage)) {
833         m_pagesLoadState.insert(dwPage);
834         return DataAvailable;
835       }
836     }
837 
838     if (!m_bMainXRefLoadedOK) {
839       if (!LoadAllFile())
840         return DataNotAvailable;
841       m_pDocument->GetParser()->RebuildCrossRef();
842       ResetFirstCheck(dwPage);
843       return DataAvailable;
844     }
845     if (m_bTotalLoadPageTree) {
846       if (!LoadPages())
847         return DataNotAvailable;
848     } else {
849       if (!m_bCurPageDictLoadOK && !CheckPage(dwPage))
850         return DataNotAvailable;
851     }
852   } else {
853     if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && !CheckPage(dwPage)) {
854       return DataNotAvailable;
855     }
856   }
857 
858   if (CheckAcroForm() == DocFormStatus::FormNotAvailable)
859     return DataNotAvailable;
860 
861   auto* pPageDict = m_pDocument->GetPageDictionary(safePage.ValueOrDie());
862   if (!pPageDict)
863     return DataError;
864 
865   {
866     auto page_num_obj = std::make_pair(
867         dwPage, pdfium::MakeUnique<CPDF_PageObjectAvail>(
868                     GetValidator(), m_pDocument.Get(), pPageDict));
869     CPDF_PageObjectAvail* page_obj_avail =
870         m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
871     const DocAvailStatus status = page_obj_avail->CheckAvail();
872     if (status != DocAvailStatus::DataAvailable)
873       return status;
874   }
875 
876   const DocAvailStatus resources_status = CheckResources(pPageDict);
877   if (resources_status != DocAvailStatus::DataAvailable)
878     return resources_status;
879 
880   m_bCurPageDictLoadOK = false;
881   ResetFirstCheck(dwPage);
882   m_pagesLoadState.insert(dwPage);
883   return DataAvailable;
884 }
885 
CheckResources(CPDF_Dictionary * page)886 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckResources(
887     CPDF_Dictionary* page) {
888   ASSERT(page);
889   const CPDF_ReadValidator::Session read_session(GetValidator());
890   CPDF_Object* resources = GetResourceObject(page);
891   if (GetValidator()->has_read_problems())
892     return DocAvailStatus::DataNotAvailable;
893 
894   if (!resources)
895     return DocAvailStatus::DataAvailable;
896 
897   CPDF_PageObjectAvail* resource_avail =
898       m_PagesResourcesAvail
899           .insert(std::make_pair(
900               resources, pdfium::MakeUnique<CPDF_PageObjectAvail>(
901                              GetValidator(), m_pDocument.Get(), resources)))
902           .first->second.get();
903   return resource_avail->CheckAvail();
904 }
905 
GetValidator() const906 RetainPtr<CPDF_ReadValidator> CPDF_DataAvail::GetValidator() const {
907   return m_pFileRead;
908 }
909 
GetSyntaxParser() const910 CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const {
911   return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get()
912                      : m_parser.m_pSyntax.get();
913 }
914 
GetPageCount() const915 int CPDF_DataAvail::GetPageCount() const {
916   if (m_pLinearized)
917     return m_pLinearized->GetPageCount();
918   return m_pDocument ? m_pDocument->GetPageCount() : 0;
919 }
920 
GetPageDictionary(int index) const921 CPDF_Dictionary* CPDF_DataAvail::GetPageDictionary(int index) const {
922   if (!m_pDocument || index < 0 || index >= GetPageCount())
923     return nullptr;
924   CPDF_Dictionary* page = m_pDocument->GetPageDictionary(index);
925   if (page)
926     return page;
927   if (!m_pLinearized || !m_pHintTables)
928     return nullptr;
929 
930   if (index == static_cast<int>(m_pLinearized->GetFirstPageNo()))
931     return nullptr;
932   FX_FILESIZE szPageStartPos = 0;
933   FX_FILESIZE szPageLength = 0;
934   uint32_t dwObjNum = 0;
935   const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
936                                                      &szPageLength, &dwObjNum);
937   if (!bPagePosGot || !dwObjNum)
938     return nullptr;
939   // We should say to the document, which object is the page.
940   m_pDocument->SetPageObjNum(index, dwObjNum);
941   // Page object already can be parsed in document.
942   if (!m_pDocument->GetIndirectObject(dwObjNum)) {
943     m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
944         dwObjNum,
945         ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument.Get()));
946   }
947   if (!ValidatePage(index))
948     return nullptr;
949   return m_pDocument->GetPageDictionary(index);
950 }
951 
IsFormAvail(DownloadHints * pHints)952 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
953     DownloadHints* pHints) {
954   const HintsScope hints_scope(GetValidator(), pHints);
955   return CheckAcroForm();
956 }
957 
CheckAcroForm()958 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::CheckAcroForm() {
959   if (!m_pDocument)
960     return FormAvailable;
961 
962   if (m_pLinearized) {
963     DocAvailStatus nDocStatus = CheckLinearizedData();
964     if (nDocStatus == DataError)
965       return FormError;
966     if (nDocStatus == DataNotAvailable)
967       return FormNotAvailable;
968   }
969 
970   if (!m_pFormAvail) {
971     CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
972     if (!pRoot)
973       return FormAvailable;
974 
975     CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
976     if (!pAcroForm)
977       return FormNotExist;
978 
979     m_pFormAvail = pdfium::MakeUnique<CPDF_PageObjectAvail>(
980         GetValidator(), m_pDocument.Get(), pAcroForm);
981   }
982   switch (m_pFormAvail->CheckAvail()) {
983     case DocAvailStatus::DataError:
984       return DocFormStatus::FormError;
985     case DocAvailStatus::DataNotAvailable:
986       return DocFormStatus::FormNotAvailable;
987     case DocAvailStatus::DataAvailable:
988       return DocFormStatus::FormAvailable;
989     default:
990       NOTREACHED();
991   }
992   return DocFormStatus::FormError;
993 }
994 
ValidatePage(uint32_t dwPage) const995 bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) const {
996   FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
997   auto* pPageDict = m_pDocument->GetPageDictionary(safePage.ValueOrDie());
998   if (!pPageDict)
999     return false;
1000   CPDF_PageObjectAvail obj_avail(GetValidator(), m_pDocument.Get(), pPageDict);
1001   return obj_avail.CheckAvail() == DocAvailStatus::DataAvailable;
1002 }
1003 
1004 std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>>
ParseDocument(std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,std::unique_ptr<CPDF_Document::PageDataIface> pPageData,const char * password)1005 CPDF_DataAvail::ParseDocument(
1006     std::unique_ptr<CPDF_Document::RenderDataIface> pRenderData,
1007     std::unique_ptr<CPDF_Document::PageDataIface> pPageData,
1008     const char* password) {
1009   if (m_pDocument) {
1010     // We already returned parsed document.
1011     return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1012   }
1013   auto document = pdfium::MakeUnique<CPDF_Document>(std::move(pRenderData),
1014                                                     std::move(pPageData));
1015   document->AddObserver(this);
1016 
1017   CPDF_ReadValidator::Session read_session(GetValidator());
1018   CPDF_Parser::Error error =
1019       document->LoadLinearizedDoc(GetValidator(), password);
1020 
1021   // Additional check, that all ok.
1022   if (GetValidator()->has_read_problems()) {
1023     NOTREACHED();
1024     return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1025   }
1026 
1027   if (error != CPDF_Parser::SUCCESS)
1028     return std::make_pair(error, nullptr);
1029 
1030   m_pDocument = document.get();
1031   return std::make_pair(CPDF_Parser::SUCCESS, std::move(document));
1032 }
1033 
PageNode()1034 CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {}
1035 
~PageNode()1036 CPDF_DataAvail::PageNode::~PageNode() {}
1037