1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_data_avail.h"
8 
9 #include <algorithm>
10 #include <memory>
11 #include <utility>
12 
13 #include "core/fpdfapi/cpdf_modulemgr.h"
14 #include "core/fpdfapi/parser/cpdf_array.h"
15 #include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
16 #include "core/fpdfapi/parser/cpdf_dictionary.h"
17 #include "core/fpdfapi/parser/cpdf_document.h"
18 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
19 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
20 #include "core/fpdfapi/parser/cpdf_name.h"
21 #include "core/fpdfapi/parser/cpdf_number.h"
22 #include "core/fpdfapi/parser/cpdf_page_object_avail.h"
23 #include "core/fpdfapi/parser/cpdf_read_validator.h"
24 #include "core/fpdfapi/parser/cpdf_reference.h"
25 #include "core/fpdfapi/parser/cpdf_stream.h"
26 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
27 #include "core/fxcrt/cfx_memorystream.h"
28 #include "core/fxcrt/fx_extension.h"
29 #include "core/fxcrt/fx_safe_types.h"
30 #include "third_party/base/numerics/safe_conversions.h"
31 #include "third_party/base/ptr_util.h"
32 #include "third_party/base/stl_util.h"
33 
34 namespace {
35 
36 // static
GetResourceObject(const CPDF_Dictionary * pDict)37 const CPDF_Object* GetResourceObject(const CPDF_Dictionary* pDict) {
38   constexpr size_t kMaxHierarchyDepth = 64;
39   size_t depth = 0;
40 
41   const CPDF_Dictionary* dictionary_to_check = pDict;
42   while (dictionary_to_check) {
43     const CPDF_Object* result = dictionary_to_check->GetObjectFor("Resources");
44     if (result)
45       return result;
46     const CPDF_Object* parent = dictionary_to_check->GetObjectFor("Parent");
47     dictionary_to_check = parent ? parent->GetDict() : nullptr;
48 
49     if (++depth > kMaxHierarchyDepth) {
50       // We have cycle in parents hierarchy.
51       return nullptr;
52     }
53   }
54   return nullptr;
55 }
56 
57 class HintsScope {
58  public:
HintsScope(CPDF_ReadValidator * validator,CPDF_DataAvail::DownloadHints * hints)59   HintsScope(CPDF_ReadValidator* validator,
60              CPDF_DataAvail::DownloadHints* hints)
61       : validator_(validator) {
62     ASSERT(validator_);
63     validator_->SetDownloadHints(hints);
64   }
65 
~HintsScope()66   ~HintsScope() { validator_->SetDownloadHints(nullptr); }
67 
68  private:
69   UnownedPtr<CPDF_ReadValidator> validator_;
70 };
71 
72 }  // namespace
73 
~FileAvail()74 CPDF_DataAvail::FileAvail::~FileAvail() {}
75 
~DownloadHints()76 CPDF_DataAvail::DownloadHints::~DownloadHints() {}
77 
CPDF_DataAvail(FileAvail * pFileAvail,const RetainPtr<IFX_SeekableReadStream> & pFileRead,bool bSupportHintTable)78 CPDF_DataAvail::CPDF_DataAvail(
79     FileAvail* pFileAvail,
80     const RetainPtr<IFX_SeekableReadStream>& pFileRead,
81     bool bSupportHintTable)
82     : m_pFileAvail(pFileAvail),
83       m_pFileRead(
84           pdfium::MakeRetain<CPDF_ReadValidator>(pFileRead, m_pFileAvail)),
85       m_dwFileLen(m_pFileRead->GetSize()),
86       m_bSupportHintTable(bSupportHintTable) {}
87 
~CPDF_DataAvail()88 CPDF_DataAvail::~CPDF_DataAvail() {
89   m_pHintTables.reset();
90 }
91 
IsDocAvail(DownloadHints * pHints)92 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
93     DownloadHints* pHints) {
94   if (!m_dwFileLen)
95     return DataError;
96 
97   const HintsScope hints_scope(m_pFileRead.Get(), pHints);
98 
99   while (!m_bDocAvail) {
100     if (!CheckDocStatus())
101       return DataNotAvailable;
102   }
103 
104   return DataAvailable;
105 }
106 
CheckDocStatus()107 bool CPDF_DataAvail::CheckDocStatus() {
108   switch (m_docStatus) {
109     case PDF_DATAAVAIL_HEADER:
110       return CheckHeader();
111     case PDF_DATAAVAIL_FIRSTPAGE:
112       return CheckFirstPage();
113     case PDF_DATAAVAIL_HINTTABLE:
114       return CheckHintTables();
115     case PDF_DATAAVAIL_LOADALLCROSSREF:
116       return CheckAndLoadAllXref();
117     case PDF_DATAAVAIL_LOADALLFILE:
118       return LoadAllFile();
119     case PDF_DATAAVAIL_ROOT:
120       return CheckRoot();
121     case PDF_DATAAVAIL_INFO:
122       return CheckInfo();
123     case PDF_DATAAVAIL_PAGETREE:
124       if (m_bTotalLoadPageTree)
125         return CheckPages();
126       return LoadDocPages();
127     case PDF_DATAAVAIL_PAGE:
128       if (m_bTotalLoadPageTree)
129         return CheckPage();
130       m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD;
131       return true;
132     case PDF_DATAAVAIL_ERROR:
133       return LoadAllFile();
134     case PDF_DATAAVAIL_PAGE_LATERLOAD:
135       m_docStatus = PDF_DATAAVAIL_PAGE;
136     default:
137       m_bDocAvail = true;
138       return true;
139   }
140 }
141 
CheckPageStatus()142 bool CPDF_DataAvail::CheckPageStatus() {
143   switch (m_docStatus) {
144     case PDF_DATAAVAIL_PAGETREE:
145       return CheckPages();
146     case PDF_DATAAVAIL_PAGE:
147       return CheckPage();
148     case PDF_DATAAVAIL_ERROR:
149       return LoadAllFile();
150     default:
151       m_bPagesTreeLoad = true;
152       m_bPagesLoad = true;
153       return true;
154   }
155 }
156 
LoadAllFile()157 bool CPDF_DataAvail::LoadAllFile() {
158   if (GetValidator()->CheckWholeFileAndRequestIfUnavailable()) {
159     m_docStatus = PDF_DATAAVAIL_DONE;
160     return true;
161   }
162   return false;
163 }
164 
CheckAndLoadAllXref()165 bool CPDF_DataAvail::CheckAndLoadAllXref() {
166   if (!m_pCrossRefAvail) {
167     const CPDF_ReadValidator::Session read_session(GetValidator().Get());
168     const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef();
169     if (GetValidator()->has_read_problems())
170       return false;
171 
172     if (last_xref_offset <= 0) {
173       m_docStatus = PDF_DATAAVAIL_ERROR;
174       return false;
175     }
176 
177     m_pCrossRefAvail = pdfium::MakeUnique<CPDF_CrossRefAvail>(GetSyntaxParser(),
178                                                               last_xref_offset);
179   }
180 
181   switch (m_pCrossRefAvail->CheckAvail()) {
182     case DocAvailStatus::DataAvailable:
183       break;
184     case DocAvailStatus::DataNotAvailable:
185       return false;
186     case DocAvailStatus::DataError:
187       m_docStatus = PDF_DATAAVAIL_ERROR;
188       return false;
189     default:
190       NOTREACHED();
191       return false;
192   }
193 
194   if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) &&
195       !m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) {
196     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
197     return false;
198   }
199 
200   m_dwRootObjNum = m_parser.GetRootObjNum();
201   m_dwInfoObjNum = m_parser.GetInfoObjNum();
202   m_pCurrentParser = &m_parser;
203   m_docStatus = PDF_DATAAVAIL_ROOT;
204   return true;
205 }
206 
GetObject(uint32_t objnum,bool * pExistInFile)207 std::unique_ptr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum,
208                                                        bool* pExistInFile) {
209   CPDF_Parser* pParser = nullptr;
210 
211   if (pExistInFile)
212     *pExistInFile = true;
213 
214   pParser = m_pDocument ? m_pDocument->GetParser() : &m_parser;
215 
216   std::unique_ptr<CPDF_Object> pRet;
217   if (pParser) {
218     const CPDF_ReadValidator::Session read_session(GetValidator().Get());
219     pRet = pParser->ParseIndirectObject(nullptr, objnum);
220     if (GetValidator()->has_read_problems())
221       return nullptr;
222   }
223 
224   if (!pRet && pExistInFile)
225     *pExistInFile = false;
226 
227   return pRet;
228 }
229 
CheckInfo()230 bool CPDF_DataAvail::CheckInfo() {
231   bool bExist = false;
232   std::unique_ptr<CPDF_Object> pInfo = GetObject(m_dwInfoObjNum, &bExist);
233   if (bExist && !pInfo) {
234     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
235       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
236       return true;
237     }
238     return false;
239   }
240   m_docStatus = PDF_DATAAVAIL_PAGETREE;
241   return true;
242 }
243 
CheckRoot()244 bool CPDF_DataAvail::CheckRoot() {
245   bool bExist = false;
246   m_pRoot = GetObject(m_dwRootObjNum, &bExist);
247   if (!bExist) {
248     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
249     return true;
250   }
251 
252   if (!m_pRoot) {
253     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
254       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
255       return true;
256     }
257     return false;
258   }
259 
260   CPDF_Dictionary* pDict = m_pRoot->GetDict();
261   if (!pDict) {
262     m_docStatus = PDF_DATAAVAIL_ERROR;
263     return false;
264   }
265 
266   CPDF_Reference* pRef = ToReference(pDict->GetObjectFor("Pages"));
267   if (!pRef) {
268     m_docStatus = PDF_DATAAVAIL_ERROR;
269     return false;
270   }
271 
272   m_PagesObjNum = pRef->GetRefObjNum();
273 
274   m_docStatus = m_dwInfoObjNum ? PDF_DATAAVAIL_INFO : PDF_DATAAVAIL_PAGETREE;
275   return true;
276 }
277 
PreparePageItem()278 bool CPDF_DataAvail::PreparePageItem() {
279   const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
280   CPDF_Reference* pRef =
281       ToReference(pRoot ? pRoot->GetObjectFor("Pages") : nullptr);
282   if (!pRef) {
283     m_docStatus = PDF_DATAAVAIL_ERROR;
284     return false;
285   }
286 
287   m_PagesObjNum = pRef->GetRefObjNum();
288   m_pCurrentParser = m_pDocument->GetParser();
289   m_docStatus = PDF_DATAAVAIL_PAGETREE;
290   return true;
291 }
292 
IsFirstCheck(uint32_t dwPage)293 bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
294   return m_pageMapCheckState.insert(dwPage).second;
295 }
296 
ResetFirstCheck(uint32_t dwPage)297 void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
298   m_pageMapCheckState.erase(dwPage);
299 }
300 
CheckPage()301 bool CPDF_DataAvail::CheckPage() {
302   std::vector<uint32_t> UnavailObjList;
303   for (uint32_t dwPageObjNum : m_PageObjList) {
304     bool bExists = false;
305     std::unique_ptr<CPDF_Object> pObj = GetObject(dwPageObjNum, &bExists);
306     if (!pObj) {
307       if (bExists)
308         UnavailObjList.push_back(dwPageObjNum);
309       continue;
310     }
311     CPDF_Array* pArray = ToArray(pObj.get());
312     if (pArray) {
313       for (const auto& pArrayObj : *pArray) {
314         if (CPDF_Reference* pRef = ToReference(pArrayObj.get()))
315           UnavailObjList.push_back(pRef->GetRefObjNum());
316       }
317     }
318     if (!pObj->IsDictionary())
319       continue;
320 
321     ByteString type = pObj->GetDict()->GetStringFor("Type");
322     if (type == "Pages") {
323       m_PagesArray.push_back(std::move(pObj));
324       continue;
325     }
326   }
327   m_PageObjList.clear();
328   if (!UnavailObjList.empty()) {
329     m_PageObjList = std::move(UnavailObjList);
330     return false;
331   }
332   size_t iPages = m_PagesArray.size();
333   for (size_t i = 0; i < iPages; ++i) {
334     std::unique_ptr<CPDF_Object> pPages = std::move(m_PagesArray[i]);
335     if (pPages && !GetPageKids(m_pCurrentParser, pPages.get())) {
336       m_PagesArray.clear();
337       m_docStatus = PDF_DATAAVAIL_ERROR;
338       return false;
339     }
340   }
341   m_PagesArray.clear();
342   if (m_PageObjList.empty())
343     m_docStatus = PDF_DATAAVAIL_DONE;
344 
345   return true;
346 }
347 
GetPageKids(CPDF_Parser * pParser,CPDF_Object * pPages)348 bool CPDF_DataAvail::GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages) {
349   if (!pParser) {
350     m_docStatus = PDF_DATAAVAIL_ERROR;
351     return false;
352   }
353 
354   CPDF_Dictionary* pDict = pPages->GetDict();
355   CPDF_Object* pKids = pDict ? pDict->GetObjectFor("Kids") : nullptr;
356   if (!pKids)
357     return true;
358 
359   switch (pKids->GetType()) {
360     case CPDF_Object::REFERENCE:
361       m_PageObjList.push_back(pKids->AsReference()->GetRefObjNum());
362       break;
363     case CPDF_Object::ARRAY: {
364       CPDF_Array* pKidsArray = pKids->AsArray();
365       for (size_t i = 0; i < pKidsArray->GetCount(); ++i) {
366         if (CPDF_Reference* pRef = ToReference(pKidsArray->GetObjectAt(i)))
367           m_PageObjList.push_back(pRef->GetRefObjNum());
368       }
369       break;
370     }
371     default:
372       m_docStatus = PDF_DATAAVAIL_ERROR;
373       return false;
374   }
375   return true;
376 }
377 
CheckPages()378 bool CPDF_DataAvail::CheckPages() {
379   bool bExists = false;
380   std::unique_ptr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
381   if (!bExists) {
382     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
383     return true;
384   }
385 
386   if (!pPages) {
387     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
388       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
389       return true;
390     }
391     return false;
392   }
393 
394   if (!GetPageKids(m_pCurrentParser, pPages.get())) {
395     m_docStatus = PDF_DATAAVAIL_ERROR;
396     return false;
397   }
398 
399   m_docStatus = PDF_DATAAVAIL_PAGE;
400   return true;
401 }
402 
CheckHeader()403 bool CPDF_DataAvail::CheckHeader() {
404   switch (CheckHeaderAndLinearized()) {
405     case DocAvailStatus::DataAvailable:
406       m_docStatus = m_pLinearized ? PDF_DATAAVAIL_FIRSTPAGE
407                                   : PDF_DATAAVAIL_LOADALLCROSSREF;
408       return true;
409     case DocAvailStatus::DataNotAvailable:
410       return false;
411     case DocAvailStatus::DataError:
412       m_docStatus = PDF_DATAAVAIL_ERROR;
413       return true;
414     default:
415       NOTREACHED();
416       return false;
417   }
418 }
419 
CheckFirstPage()420 bool CPDF_DataAvail::CheckFirstPage() {
421   if (!m_pLinearized->GetFirstPageEndOffset() ||
422       !m_pLinearized->GetFileSize() ||
423       !m_pLinearized->GetMainXRefTableFirstEntryOffset()) {
424     m_docStatus = PDF_DATAAVAIL_ERROR;
425     return false;
426   }
427 
428   uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
429   dwEnd += 512;
430   if ((FX_FILESIZE)dwEnd > m_dwFileLen)
431     dwEnd = (uint32_t)m_dwFileLen;
432 
433   const FX_FILESIZE start_pos = m_dwFileLen > 1024 ? 1024 : m_dwFileLen;
434   const size_t data_size = dwEnd > 1024 ? static_cast<size_t>(dwEnd - 1024) : 0;
435   if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(start_pos,
436                                                              data_size))
437     return false;
438 
439   m_docStatus =
440       m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE;
441   return true;
442 }
443 
CheckHintTables()444 bool CPDF_DataAvail::CheckHintTables() {
445   if (m_pLinearized->GetPageCount() <= 1) {
446     m_docStatus = PDF_DATAAVAIL_DONE;
447     return true;
448   }
449   if (!m_pLinearized->HasHintTable()) {
450     m_docStatus = PDF_DATAAVAIL_ERROR;
451     return false;
452   }
453 
454   const FX_FILESIZE szHintStart = m_pLinearized->GetHintStart();
455   const uint32_t szHintLength = m_pLinearized->GetHintLength();
456 
457   if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(szHintStart,
458                                                              szHintLength))
459     return false;
460 
461   auto pHintTables = pdfium::MakeUnique<CPDF_HintTables>(GetValidator().Get(),
462                                                          m_pLinearized.get());
463   std::unique_ptr<CPDF_Object> pHintStream =
464       ParseIndirectObjectAt(szHintStart, 0);
465   CPDF_Stream* pStream = ToStream(pHintStream.get());
466   if (pStream && pHintTables->LoadHintStream(pStream))
467     m_pHintTables = std::move(pHintTables);
468 
469   m_docStatus = PDF_DATAAVAIL_DONE;
470   return true;
471 }
472 
ParseIndirectObjectAt(FX_FILESIZE pos,uint32_t objnum,CPDF_IndirectObjectHolder * pObjList)473 std::unique_ptr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
474     FX_FILESIZE pos,
475     uint32_t objnum,
476     CPDF_IndirectObjectHolder* pObjList) {
477   const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos();
478   GetSyntaxParser()->SetPos(pos);
479   std::unique_ptr<CPDF_Object> result = GetSyntaxParser()->GetIndirectObject(
480       pObjList, CPDF_SyntaxParser::ParseType::kLoose);
481   GetSyntaxParser()->SetPos(SavedPos);
482   return (result && (!objnum || result->GetObjNum() == objnum))
483              ? std::move(result)
484              : nullptr;
485 }
486 
IsLinearizedPDF()487 CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
488   switch (CheckHeaderAndLinearized()) {
489     case DocAvailStatus::DataAvailable:
490       return m_pLinearized ? DocLinearizationStatus::Linearized
491                            : DocLinearizationStatus::NotLinearized;
492     case DocAvailStatus::DataNotAvailable:
493       return DocLinearizationStatus::LinearizationUnknown;
494     case DocAvailStatus::DataError:
495       return DocLinearizationStatus::NotLinearized;
496     default:
497       NOTREACHED();
498       return DocLinearizationStatus::LinearizationUnknown;
499   }
500 }
501 
CheckHeaderAndLinearized()502 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() {
503   if (m_bHeaderAvail)
504     return DocAvailStatus::DataAvailable;
505 
506   const CPDF_ReadValidator::Session read_session(GetValidator().Get());
507   const int32_t header_offset = GetHeaderOffset(GetValidator());
508   if (GetValidator()->has_read_problems())
509     return DocAvailStatus::DataNotAvailable;
510 
511   if (header_offset == kInvalidHeaderOffset)
512     return DocAvailStatus::DataError;
513 
514   m_parser.m_pSyntax->InitParserWithValidator(GetValidator(), header_offset);
515   m_pLinearized = m_parser.ParseLinearizedHeader();
516   if (GetValidator()->has_read_problems())
517     return DocAvailStatus::DataNotAvailable;
518 
519   m_bHeaderAvail = true;
520   return DocAvailStatus::DataAvailable;
521 }
522 
CheckPage(uint32_t dwPage)523 bool CPDF_DataAvail::CheckPage(uint32_t dwPage) {
524   while (true) {
525     switch (m_docStatus) {
526       case PDF_DATAAVAIL_PAGETREE:
527         if (!LoadDocPages())
528           return false;
529         break;
530       case PDF_DATAAVAIL_PAGE:
531         if (!LoadDocPage(dwPage))
532           return false;
533         break;
534       case PDF_DATAAVAIL_ERROR:
535         return LoadAllFile();
536       default:
537         m_bPagesTreeLoad = true;
538         m_bPagesLoad = true;
539         m_bCurPageDictLoadOK = true;
540         m_docStatus = PDF_DATAAVAIL_PAGE;
541         return true;
542     }
543   }
544 }
545 
CheckArrayPageNode(uint32_t dwPageNo,PageNode * pPageNode)546 bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
547                                         PageNode* pPageNode) {
548   bool bExists = false;
549   std::unique_ptr<CPDF_Object> pPages = GetObject(dwPageNo, &bExists);
550   if (!bExists) {
551     m_docStatus = PDF_DATAAVAIL_ERROR;
552     return false;
553   }
554 
555   if (!pPages)
556     return false;
557 
558   CPDF_Array* pArray = pPages->AsArray();
559   if (!pArray) {
560     m_docStatus = PDF_DATAAVAIL_ERROR;
561     return false;
562   }
563 
564   pPageNode->m_type = PDF_PAGENODE_PAGES;
565   for (size_t i = 0; i < pArray->GetCount(); ++i) {
566     CPDF_Reference* pKid = ToReference(pArray->GetObjectAt(i));
567     if (!pKid)
568       continue;
569 
570     auto pNode = pdfium::MakeUnique<PageNode>();
571     pNode->m_dwPageNo = pKid->GetRefObjNum();
572     pPageNode->m_ChildNodes.push_back(std::move(pNode));
573   }
574   return true;
575 }
576 
CheckUnknownPageNode(uint32_t dwPageNo,PageNode * pPageNode)577 bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
578                                           PageNode* pPageNode) {
579   bool bExists = false;
580   std::unique_ptr<CPDF_Object> pPage = GetObject(dwPageNo, &bExists);
581   if (!bExists) {
582     m_docStatus = PDF_DATAAVAIL_ERROR;
583     return false;
584   }
585 
586   if (!pPage)
587     return false;
588 
589   if (pPage->IsArray()) {
590     pPageNode->m_dwPageNo = dwPageNo;
591     pPageNode->m_type = PDF_PAGENODE_ARRAY;
592     return true;
593   }
594 
595   if (!pPage->IsDictionary()) {
596     m_docStatus = PDF_DATAAVAIL_ERROR;
597     return false;
598   }
599 
600   pPageNode->m_dwPageNo = dwPageNo;
601   CPDF_Dictionary* pDict = pPage->GetDict();
602   const ByteString type = pDict->GetStringFor("Type");
603   if (type == "Page") {
604     pPageNode->m_type = PDF_PAGENODE_PAGE;
605     return true;
606   }
607 
608   if (type != "Pages") {
609     m_docStatus = PDF_DATAAVAIL_ERROR;
610     return false;
611   }
612 
613   pPageNode->m_type = PDF_PAGENODE_PAGES;
614   CPDF_Object* pKids = pDict->GetObjectFor("Kids");
615   if (!pKids) {
616     m_docStatus = PDF_DATAAVAIL_PAGE;
617     return true;
618   }
619 
620   switch (pKids->GetType()) {
621     case CPDF_Object::REFERENCE: {
622       CPDF_Reference* pKid = pKids->AsReference();
623       auto pNode = pdfium::MakeUnique<PageNode>();
624       pNode->m_dwPageNo = pKid->GetRefObjNum();
625       pPageNode->m_ChildNodes.push_back(std::move(pNode));
626       break;
627     }
628     case CPDF_Object::ARRAY: {
629       CPDF_Array* pKidsArray = pKids->AsArray();
630       for (size_t i = 0; i < pKidsArray->GetCount(); ++i) {
631         CPDF_Reference* pKid = ToReference(pKidsArray->GetObjectAt(i));
632         if (!pKid)
633           continue;
634 
635         auto pNode = pdfium::MakeUnique<PageNode>();
636         pNode->m_dwPageNo = pKid->GetRefObjNum();
637         pPageNode->m_ChildNodes.push_back(std::move(pNode));
638       }
639       break;
640     }
641     default:
642       break;
643   }
644   return true;
645 }
646 
CheckPageNode(const CPDF_DataAvail::PageNode & pageNode,int32_t iPage,int32_t & iCount,int level)647 bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode,
648                                    int32_t iPage,
649                                    int32_t& iCount,
650                                    int level) {
651   if (level >= kMaxPageRecursionDepth)
652     return false;
653 
654   int32_t iSize = pdfium::CollectionSize<int32_t>(pageNode.m_ChildNodes);
655   if (iSize <= 0 || iPage >= iSize) {
656     m_docStatus = PDF_DATAAVAIL_ERROR;
657     return false;
658   }
659   for (int32_t i = 0; i < iSize; ++i) {
660     PageNode* pNode = pageNode.m_ChildNodes[i].get();
661     if (!pNode)
662       continue;
663 
664     if (pNode->m_type == PDF_PAGENODE_UNKNOWN) {
665       // Updates the type for the unknown page node.
666       if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode))
667         return false;
668     }
669     if (pNode->m_type == PDF_PAGENODE_ARRAY) {
670       // Updates a more specific type for the array page node.
671       if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode))
672         return false;
673     }
674     switch (pNode->m_type) {
675       case PDF_PAGENODE_PAGE:
676         iCount++;
677         if (iPage == iCount && m_pDocument)
678           m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo);
679         break;
680       case PDF_PAGENODE_PAGES:
681         if (!CheckPageNode(*pNode, iPage, iCount, level + 1))
682           return false;
683         break;
684       case PDF_PAGENODE_UNKNOWN:
685       case PDF_PAGENODE_ARRAY:
686         // Already converted above, error if we get here.
687         return false;
688     }
689     if (iPage == iCount) {
690       m_docStatus = PDF_DATAAVAIL_DONE;
691       return true;
692     }
693   }
694   return true;
695 }
696 
LoadDocPage(uint32_t dwPage)697 bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) {
698   FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
699   int32_t iPage = safePage.ValueOrDie();
700   if (m_pDocument->GetPageCount() <= iPage ||
701       m_pDocument->IsPageLoaded(iPage)) {
702     m_docStatus = PDF_DATAAVAIL_DONE;
703     return true;
704   }
705   if (m_PageNode.m_type == PDF_PAGENODE_PAGE) {
706     m_docStatus = iPage == 0 ? PDF_DATAAVAIL_DONE : PDF_DATAAVAIL_ERROR;
707     return true;
708   }
709   int32_t iCount = -1;
710   return CheckPageNode(m_PageNode, iPage, iCount, 0);
711 }
712 
CheckPageCount()713 bool CPDF_DataAvail::CheckPageCount() {
714   bool bExists = false;
715   std::unique_ptr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
716   if (!bExists) {
717     m_docStatus = PDF_DATAAVAIL_ERROR;
718     return false;
719   }
720   if (!pPages)
721     return false;
722 
723   CPDF_Dictionary* pPagesDict = pPages->GetDict();
724   if (!pPagesDict) {
725     m_docStatus = PDF_DATAAVAIL_ERROR;
726     return false;
727   }
728   if (!pPagesDict->KeyExist("Kids"))
729     return true;
730 
731   return pPagesDict->GetIntegerFor("Count") > 0;
732 }
733 
LoadDocPages()734 bool CPDF_DataAvail::LoadDocPages() {
735   if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode))
736     return false;
737 
738   if (CheckPageCount()) {
739     m_docStatus = PDF_DATAAVAIL_PAGE;
740     return true;
741   }
742 
743   m_bTotalLoadPageTree = true;
744   return false;
745 }
746 
LoadPages()747 bool CPDF_DataAvail::LoadPages() {
748   while (!m_bPagesTreeLoad) {
749     if (!CheckPageStatus())
750       return false;
751   }
752 
753   if (m_bPagesLoad)
754     return true;
755 
756   m_pDocument->LoadPages();
757   return false;
758 }
759 
CheckLinearizedData()760 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() {
761   if (m_bLinearedDataOK)
762     return DataAvailable;
763   ASSERT(m_pLinearized);
764   if (!m_pLinearized->GetMainXRefTableFirstEntryOffset() || !m_pDocument ||
765       !m_pDocument->GetParser() || !m_pDocument->GetParser()->GetTrailer()) {
766     return DataError;
767   }
768 
769   if (!m_bMainXRefLoadTried) {
770     const FX_SAFE_FILESIZE main_xref_offset =
771         m_pDocument->GetParser()->GetTrailer()->GetIntegerFor("Prev");
772     if (!main_xref_offset.IsValid())
773       return DataError;
774 
775     if (main_xref_offset.ValueOrDie() == 0)
776       return DataAvailable;
777 
778     FX_SAFE_SIZE_T data_size = m_dwFileLen;
779     data_size -= main_xref_offset.ValueOrDie();
780     if (!data_size.IsValid())
781       return DataError;
782 
783     if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
784             main_xref_offset.ValueOrDie(), data_size.ValueOrDie()))
785       return DataNotAvailable;
786 
787     CPDF_Parser::Error eRet =
788         m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
789     m_bMainXRefLoadTried = true;
790     if (eRet != CPDF_Parser::SUCCESS)
791       return DataError;
792 
793     if (!PreparePageItem())
794       return DataNotAvailable;
795 
796     m_bMainXRefLoadedOK = true;
797     m_bLinearedDataOK = true;
798   }
799 
800   return m_bLinearedDataOK ? DataAvailable : DataNotAvailable;
801 }
802 
IsPageAvail(uint32_t dwPage,DownloadHints * pHints)803 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
804     uint32_t dwPage,
805     DownloadHints* pHints) {
806   if (!m_pDocument)
807     return DataError;
808 
809   const FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
810   if (!safePage.IsValid())
811     return DataError;
812 
813   if (safePage.ValueOrDie() >= m_pDocument->GetPageCount()) {
814     // This is XFA page.
815     return DataAvailable;
816   }
817 
818   if (IsFirstCheck(dwPage)) {
819     m_bCurPageDictLoadOK = false;
820   }
821 
822   if (pdfium::ContainsKey(m_pagesLoadState, dwPage))
823     return DataAvailable;
824 
825   const HintsScope hints_scope(GetValidator().Get(), pHints);
826 
827   if (m_pLinearized) {
828     if (dwPage == m_pLinearized->GetFirstPageNo()) {
829       CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
830       if (!pPageDict)
831         return DataError;
832 
833       auto page_num_obj = std::make_pair(
834           dwPage, pdfium::MakeUnique<CPDF_PageObjectAvail>(
835                       GetValidator().Get(), m_pDocument, pPageDict));
836 
837       CPDF_PageObjectAvail* page_obj_avail =
838           m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
839       // TODO(art-snake): Check resources.
840       return page_obj_avail->CheckAvail();
841     }
842 
843     DocAvailStatus nResult = CheckLinearizedData();
844     if (nResult != DataAvailable)
845       return nResult;
846 
847     if (m_pHintTables) {
848       nResult = m_pHintTables->CheckPage(dwPage);
849       if (nResult != DataAvailable)
850         return nResult;
851       if (GetPage(dwPage)) {
852         m_pagesLoadState.insert(dwPage);
853         return DataAvailable;
854       }
855     }
856 
857     if (!m_bMainXRefLoadedOK) {
858       if (!LoadAllFile())
859         return DataNotAvailable;
860       m_pDocument->GetParser()->RebuildCrossRef();
861       ResetFirstCheck(dwPage);
862       return DataAvailable;
863     }
864     if (m_bTotalLoadPageTree) {
865       if (!LoadPages())
866         return DataNotAvailable;
867     } else {
868       if (!m_bCurPageDictLoadOK && !CheckPage(dwPage))
869         return DataNotAvailable;
870     }
871   } else {
872     if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && !CheckPage(dwPage)) {
873       return DataNotAvailable;
874     }
875   }
876 
877   if (CheckAcroForm() == DocFormStatus::FormNotAvailable)
878     return DataNotAvailable;
879 
880   CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
881   if (!pPageDict)
882     return DataError;
883 
884   {
885     auto page_num_obj = std::make_pair(
886         dwPage, pdfium::MakeUnique<CPDF_PageObjectAvail>(
887                     GetValidator().Get(), m_pDocument, pPageDict));
888     CPDF_PageObjectAvail* page_obj_avail =
889         m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
890     const DocAvailStatus status = page_obj_avail->CheckAvail();
891     if (status != DocAvailStatus::DataAvailable)
892       return status;
893   }
894 
895   const DocAvailStatus resources_status = CheckResources(pPageDict);
896   if (resources_status != DocAvailStatus::DataAvailable)
897     return resources_status;
898 
899   m_bCurPageDictLoadOK = false;
900   ResetFirstCheck(dwPage);
901   m_pagesLoadState.insert(dwPage);
902   return DataAvailable;
903 }
904 
CheckResources(const CPDF_Dictionary * page)905 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckResources(
906     const CPDF_Dictionary* page) {
907   ASSERT(page);
908   const CPDF_ReadValidator::Session read_session(GetValidator().Get());
909   const CPDF_Object* resources = GetResourceObject(page);
910   if (GetValidator()->has_read_problems())
911     return DocAvailStatus::DataNotAvailable;
912 
913   if (!resources)
914     return DocAvailStatus::DataAvailable;
915 
916   CPDF_PageObjectAvail* resource_avail =
917       m_PagesResourcesAvail
918           .insert(std::make_pair(
919               resources, pdfium::MakeUnique<CPDF_PageObjectAvail>(
920                              GetValidator().Get(), m_pDocument, resources)))
921           .first->second.get();
922   return resource_avail->CheckAvail();
923 }
924 
GetFileRead() const925 RetainPtr<IFX_SeekableReadStream> CPDF_DataAvail::GetFileRead() const {
926   return m_pFileRead;
927 }
928 
GetValidator() const929 RetainPtr<CPDF_ReadValidator> CPDF_DataAvail::GetValidator() const {
930   return m_pFileRead;
931 }
932 
GetSyntaxParser() const933 CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const {
934   return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get()
935                      : m_parser.m_pSyntax.get();
936 }
937 
GetPageCount() const938 int CPDF_DataAvail::GetPageCount() const {
939   if (m_pLinearized)
940     return m_pLinearized->GetPageCount();
941   return m_pDocument ? m_pDocument->GetPageCount() : 0;
942 }
943 
GetPage(int index)944 CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) {
945   if (!m_pDocument || index < 0 || index >= GetPageCount())
946     return nullptr;
947   CPDF_Dictionary* page = m_pDocument->GetPage(index);
948   if (page)
949     return page;
950   if (!m_pLinearized || !m_pHintTables)
951     return nullptr;
952 
953   if (index == static_cast<int>(m_pLinearized->GetFirstPageNo()))
954     return nullptr;
955   FX_FILESIZE szPageStartPos = 0;
956   FX_FILESIZE szPageLength = 0;
957   uint32_t dwObjNum = 0;
958   const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
959                                                      &szPageLength, &dwObjNum);
960   if (!bPagePosGot || !dwObjNum)
961     return nullptr;
962   // We should say to the document, which object is the page.
963   m_pDocument->SetPageObjNum(index, dwObjNum);
964   // Page object already can be parsed in document.
965   if (!m_pDocument->GetIndirectObject(dwObjNum)) {
966     m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
967         dwObjNum, ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument));
968   }
969   if (!ValidatePage(index))
970     return nullptr;
971   return m_pDocument->GetPage(index);
972 }
973 
IsFormAvail(DownloadHints * pHints)974 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
975     DownloadHints* pHints) {
976   const HintsScope hints_scope(GetValidator().Get(), pHints);
977   return CheckAcroForm();
978 }
979 
CheckAcroForm()980 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::CheckAcroForm() {
981   if (!m_pDocument)
982     return FormAvailable;
983 
984   if (m_pLinearized) {
985     DocAvailStatus nDocStatus = CheckLinearizedData();
986     if (nDocStatus == DataError)
987       return FormError;
988     if (nDocStatus == DataNotAvailable)
989       return FormNotAvailable;
990   }
991 
992   if (!m_pFormAvail) {
993     const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
994     if (!pRoot)
995       return FormAvailable;
996 
997     CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
998     if (!pAcroForm)
999       return FormNotExist;
1000 
1001     m_pFormAvail = pdfium::MakeUnique<CPDF_PageObjectAvail>(
1002         GetValidator().Get(), m_pDocument, pAcroForm);
1003   }
1004   switch (m_pFormAvail->CheckAvail()) {
1005     case DocAvailStatus::DataError:
1006       return DocFormStatus::FormError;
1007     case DocAvailStatus::DataNotAvailable:
1008       return DocFormStatus::FormNotAvailable;
1009     case DocAvailStatus::DataAvailable:
1010       return DocFormStatus::FormAvailable;
1011     default:
1012       NOTREACHED();
1013   }
1014   return DocFormStatus::FormError;
1015 }
1016 
ValidatePage(uint32_t dwPage)1017 bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) {
1018   FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
1019   CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
1020   if (!pPageDict)
1021     return false;
1022   CPDF_PageObjectAvail obj_avail(GetValidator().Get(), m_pDocument, pPageDict);
1023   return obj_avail.CheckAvail() == DocAvailStatus::DataAvailable;
1024 }
1025 
1026 std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>>
ParseDocument(const char * password)1027 CPDF_DataAvail::ParseDocument(const char* password) {
1028   if (m_pDocument) {
1029     // We already returned parsed document.
1030     return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1031   }
1032   auto parser = pdfium::MakeUnique<CPDF_Parser>();
1033   parser->SetPassword(password);
1034   auto document = pdfium::MakeUnique<CPDF_Document>(std::move(parser));
1035 
1036   CPDF_ReadValidator::Session read_session(GetValidator().Get());
1037   CPDF_Parser::Error error = document->GetParser()->StartLinearizedParse(
1038       GetFileRead(), document.get());
1039 
1040   // Additional check, that all ok.
1041   if (GetValidator()->has_read_problems()) {
1042     NOTREACHED();
1043     return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
1044   }
1045 
1046   if (error != CPDF_Parser::SUCCESS)
1047     return std::make_pair(error, nullptr);
1048 
1049   m_pDocument = document.get();
1050   return std::make_pair(CPDF_Parser::SUCCESS, std::move(document));
1051 }
1052 
PageNode()1053 CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {}
1054 
~PageNode()1055 CPDF_DataAvail::PageNode::~PageNode() {}
1056