1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/include/fpdfapi/fpdf_parser.h"
8 
9 #include <algorithm>
10 #include <memory>
11 #include <set>
12 #include <utility>
13 #include <vector>
14 
15 #include "core/include/fpdfapi/fpdf_module.h"
16 #include "core/include/fpdfapi/fpdf_page.h"
17 #include "core/include/fxcrt/fx_ext.h"
18 #include "core/include/fxcrt/fx_safe_types.h"
19 #include "core/src/fpdfapi/fpdf_page/pageint.h"
20 #include "core/src/fpdfapi/fpdf_parser/parser_int.h"
21 #include "third_party/base/stl_util.h"
22 
23 namespace {
24 
25 // A limit on the size of the xref table. Theoretical limits are higher, but
26 // this may be large enough in practice.
27 const int32_t kMaxXRefSize = 1048576;
28 
29 // A limit on the maximum object number in the xref table. Theoretical limits
30 // are higher, but this may be large enough in practice.
31 const FX_DWORD kMaxObjectNumber = 1048576;
32 
33 struct SearchTagRecord {
34   const char* m_pTag;
35   FX_DWORD m_Len;
36   FX_DWORD m_Offset;
37 };
38 
39 template <typename T>
40 class ScopedSetInsertion {
41  public:
ScopedSetInsertion(std::set<T> * org_set,T elem)42   ScopedSetInsertion(std::set<T>* org_set, T elem)
43       : m_Set(org_set), m_Entry(elem) {
44     m_Set->insert(m_Entry);
45   }
~ScopedSetInsertion()46   ~ScopedSetInsertion() { m_Set->erase(m_Entry); }
47 
48  private:
49   std::set<T>* const m_Set;
50   const T m_Entry;
51 };
52 
CompareFileSize(const void * p1,const void * p2)53 int CompareFileSize(const void* p1, const void* p2) {
54   return *(FX_FILESIZE*)p1 - *(FX_FILESIZE*)p2;
55 }
56 
GetHeaderOffset(IFX_FileRead * pFile)57 int32_t GetHeaderOffset(IFX_FileRead* pFile) {
58   const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);
59   const size_t kBufSize = 4;
60   uint8_t buf[kBufSize];
61   int32_t offset = 0;
62   while (offset <= 1024) {
63     if (!pFile->ReadBlock(buf, offset, kBufSize))
64       return -1;
65 
66     if (*(FX_DWORD*)buf == tag)
67       return offset;
68 
69     ++offset;
70   }
71   return -1;
72 }
73 
GetDirectInteger(CPDF_Dictionary * pDict,const CFX_ByteStringC & key)74 int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key) {
75   CPDF_Number* pObj = ToNumber(pDict->GetElement(key));
76   return pObj ? pObj->GetInteger() : 0;
77 }
78 
GetVarInt(const uint8_t * p,int32_t n)79 FX_DWORD GetVarInt(const uint8_t* p, int32_t n) {
80   FX_DWORD result = 0;
81   for (int32_t i = 0; i < n; ++i)
82     result = result * 256 + p[i];
83   return result;
84 }
85 
GetStreamNCount(CPDF_StreamAcc * pObjStream)86 int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) {
87   return pObjStream->GetDict()->GetInteger("N");
88 }
89 
GetStreamFirst(CPDF_StreamAcc * pObjStream)90 int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) {
91   return pObjStream->GetDict()->GetInteger("First");
92 }
93 
CanReadFromBitStream(const CFX_BitStream * hStream,const FX_SAFE_DWORD & num_bits)94 bool CanReadFromBitStream(const CFX_BitStream* hStream,
95                           const FX_SAFE_DWORD& num_bits) {
96   return (num_bits.IsValid() &&
97           hStream->BitsRemaining() >= num_bits.ValueOrDie());
98 }
99 
100 }  // namespace
101 
102 // TODO(thestig) Using unique_ptr with ReleaseDeleter is still not ideal.
103 // Come up or wait for something better.
104 using ScopedFileStream =
105     std::unique_ptr<IFX_FileStream, ReleaseDeleter<IFX_FileStream>>;
106 
IsSignatureDict(const CPDF_Dictionary * pDict)107 bool IsSignatureDict(const CPDF_Dictionary* pDict) {
108   CPDF_Object* pType = pDict->GetElementValue("Type");
109   if (!pType)
110     pType = pDict->GetElementValue("FT");
111   return pType && pType->GetString() == "Sig";
112 }
113 
CPDF_Parser()114 CPDF_Parser::CPDF_Parser() {
115   m_pDocument = NULL;
116   m_pTrailer = NULL;
117   m_pEncryptDict = NULL;
118   m_pLinearized = NULL;
119   m_dwFirstPageNo = 0;
120   m_dwXrefStartObjNum = 0;
121   m_bOwnFileRead = TRUE;
122   m_FileVersion = 0;
123   m_bForceUseSecurityHandler = FALSE;
124 }
~CPDF_Parser()125 CPDF_Parser::~CPDF_Parser() {
126   CloseParser(FALSE);
127 }
128 
GetLastObjNum() const129 FX_DWORD CPDF_Parser::GetLastObjNum() const {
130   return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first;
131 }
132 
IsValidObjectNumber(FX_DWORD objnum) const133 bool CPDF_Parser::IsValidObjectNumber(FX_DWORD objnum) const {
134   return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first;
135 }
136 
SetEncryptDictionary(CPDF_Dictionary * pDict)137 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {
138   m_pEncryptDict = pDict;
139 }
140 
GetObjectPositionOrZero(FX_DWORD objnum) const141 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(FX_DWORD objnum) const {
142   auto it = m_ObjectInfo.find(objnum);
143   return it != m_ObjectInfo.end() ? it->second.pos : 0;
144 }
145 
ShrinkObjectMap(FX_DWORD objnum)146 void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) {
147   if (objnum == 0) {
148     m_ObjectInfo.clear();
149     return;
150   }
151 
152   auto it = m_ObjectInfo.lower_bound(objnum);
153   while (it != m_ObjectInfo.end()) {
154     auto saved_it = it++;
155     m_ObjectInfo.erase(saved_it);
156   }
157 
158   if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1))
159     m_ObjectInfo[objnum - 1].pos = 0;
160 }
161 
CloseParser(FX_BOOL bReParse)162 void CPDF_Parser::CloseParser(FX_BOOL bReParse) {
163   m_bVersionUpdated = FALSE;
164   if (!bReParse) {
165     delete m_pDocument;
166     m_pDocument = NULL;
167   }
168   if (m_pTrailer) {
169     m_pTrailer->Release();
170     m_pTrailer = NULL;
171   }
172   ReleaseEncryptHandler();
173   SetEncryptDictionary(NULL);
174   if (m_bOwnFileRead && m_Syntax.m_pFileAccess) {
175     m_Syntax.m_pFileAccess->Release();
176     m_Syntax.m_pFileAccess = NULL;
177   }
178   FX_POSITION pos = m_ObjectStreamMap.GetStartPosition();
179   while (pos) {
180     void* objnum;
181     CPDF_StreamAcc* pStream;
182     m_ObjectStreamMap.GetNextAssoc(pos, objnum, (void*&)pStream);
183     delete pStream;
184   }
185   m_ObjectStreamMap.RemoveAll();
186   m_ObjCache.clear();
187 
188   m_SortedOffset.RemoveAll();
189   m_ObjectInfo.clear();
190   m_V5Type.RemoveAll();
191   m_ObjVersion.RemoveAll();
192   int32_t iLen = m_Trailers.GetSize();
193   for (int32_t i = 0; i < iLen; ++i) {
194     if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i))
195       trailer->Release();
196   }
197   m_Trailers.RemoveAll();
198   if (m_pLinearized) {
199     m_pLinearized->Release();
200     m_pLinearized = NULL;
201   }
202 }
203 CPDF_SecurityHandler* FPDF_CreateStandardSecurityHandler();
204 CPDF_SecurityHandler* FPDF_CreatePubKeyHandler(void*);
StartParse(IFX_FileRead * pFileAccess,FX_BOOL bReParse,FX_BOOL bOwnFileRead)205 FX_DWORD CPDF_Parser::StartParse(IFX_FileRead* pFileAccess,
206                                  FX_BOOL bReParse,
207                                  FX_BOOL bOwnFileRead) {
208   CloseParser(bReParse);
209   m_bXRefStream = FALSE;
210   m_LastXRefOffset = 0;
211   m_bOwnFileRead = bOwnFileRead;
212 
213   int32_t offset = GetHeaderOffset(pFileAccess);
214   if (offset == -1) {
215     if (bOwnFileRead && pFileAccess)
216       pFileAccess->Release();
217     return PDFPARSE_ERROR_FORMAT;
218   }
219   m_Syntax.InitParser(pFileAccess, offset);
220 
221   uint8_t ch;
222   if (!m_Syntax.GetCharAt(5, ch))
223     return PDFPARSE_ERROR_FORMAT;
224   if (std::isdigit(ch))
225     m_FileVersion = FXSYS_toDecimalDigit(ch) * 10;
226 
227   if (!m_Syntax.GetCharAt(7, ch))
228     return PDFPARSE_ERROR_FORMAT;
229   if (std::isdigit(ch))
230     m_FileVersion += FXSYS_toDecimalDigit(ch);
231 
232   if (m_Syntax.m_FileLen < m_Syntax.m_HeaderOffset + 9)
233     return PDFPARSE_ERROR_FORMAT;
234 
235   m_Syntax.RestorePos(m_Syntax.m_FileLen - m_Syntax.m_HeaderOffset - 9);
236   if (!bReParse)
237     m_pDocument = new CPDF_Document(this);
238 
239   FX_BOOL bXRefRebuilt = FALSE;
240   if (m_Syntax.SearchWord("startxref", TRUE, FALSE, 4096)) {
241     FX_FILESIZE startxref_offset = m_Syntax.SavePos();
242     void* pResult = FXSYS_bsearch(&startxref_offset, m_SortedOffset.GetData(),
243                                   m_SortedOffset.GetSize(), sizeof(FX_FILESIZE),
244                                   CompareFileSize);
245     if (!pResult)
246       m_SortedOffset.Add(startxref_offset);
247 
248     m_Syntax.GetKeyword();
249     bool bNumber;
250     CFX_ByteString xrefpos_str = m_Syntax.GetNextWord(&bNumber);
251     if (!bNumber)
252       return PDFPARSE_ERROR_FORMAT;
253 
254     m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);
255     if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
256         !LoadAllCrossRefV5(m_LastXRefOffset)) {
257       if (!RebuildCrossRef())
258         return PDFPARSE_ERROR_FORMAT;
259 
260       bXRefRebuilt = TRUE;
261       m_LastXRefOffset = 0;
262     }
263   } else {
264     if (!RebuildCrossRef())
265       return PDFPARSE_ERROR_FORMAT;
266 
267     bXRefRebuilt = TRUE;
268   }
269   FX_DWORD dwRet = SetEncryptHandler();
270   if (dwRet != PDFPARSE_ERROR_SUCCESS)
271     return dwRet;
272 
273   m_pDocument->LoadDoc();
274   if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
275     if (bXRefRebuilt)
276       return PDFPARSE_ERROR_FORMAT;
277 
278     ReleaseEncryptHandler();
279     if (!RebuildCrossRef())
280       return PDFPARSE_ERROR_FORMAT;
281 
282     dwRet = SetEncryptHandler();
283     if (dwRet != PDFPARSE_ERROR_SUCCESS)
284       return dwRet;
285 
286     m_pDocument->LoadDoc();
287     if (!m_pDocument->GetRoot())
288       return PDFPARSE_ERROR_FORMAT;
289   }
290   FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
291               sizeof(FX_FILESIZE), CompareFileSize);
292   if (GetRootObjNum() == 0) {
293     ReleaseEncryptHandler();
294     if (!RebuildCrossRef() || GetRootObjNum() == 0)
295       return PDFPARSE_ERROR_FORMAT;
296 
297     dwRet = SetEncryptHandler();
298     if (dwRet != PDFPARSE_ERROR_SUCCESS)
299       return dwRet;
300   }
301   if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
302     CPDF_Reference* pMetadata =
303         ToReference(m_pDocument->GetRoot()->GetElement("Metadata"));
304     if (pMetadata)
305       m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum();
306   }
307   return PDFPARSE_ERROR_SUCCESS;
308 }
SetEncryptHandler()309 FX_DWORD CPDF_Parser::SetEncryptHandler() {
310   ReleaseEncryptHandler();
311   SetEncryptDictionary(NULL);
312   if (!m_pTrailer) {
313     return PDFPARSE_ERROR_FORMAT;
314   }
315   CPDF_Object* pEncryptObj = m_pTrailer->GetElement("Encrypt");
316   if (pEncryptObj) {
317     if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) {
318       SetEncryptDictionary(pEncryptDict);
319     } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) {
320       pEncryptObj =
321           m_pDocument->GetIndirectObject(pRef->GetRefObjNum(), nullptr);
322       if (pEncryptObj)
323         SetEncryptDictionary(pEncryptObj->GetDict());
324     }
325   }
326   if (m_bForceUseSecurityHandler) {
327     FX_DWORD err = PDFPARSE_ERROR_HANDLER;
328     if (!m_pSecurityHandler) {
329       return PDFPARSE_ERROR_HANDLER;
330     }
331     if (!m_pSecurityHandler->OnInit(this, m_pEncryptDict)) {
332       return err;
333     }
334     std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler(
335         m_pSecurityHandler->CreateCryptoHandler());
336     if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get())) {
337       return PDFPARSE_ERROR_HANDLER;
338     }
339     m_Syntax.SetEncrypt(pCryptoHandler.release());
340   } else if (m_pEncryptDict) {
341     CFX_ByteString filter = m_pEncryptDict->GetString("Filter");
342     std::unique_ptr<CPDF_SecurityHandler> pSecurityHandler;
343     FX_DWORD err = PDFPARSE_ERROR_HANDLER;
344     if (filter == "Standard") {
345       pSecurityHandler.reset(FPDF_CreateStandardSecurityHandler());
346       err = PDFPARSE_ERROR_PASSWORD;
347     }
348     if (!pSecurityHandler) {
349       return PDFPARSE_ERROR_HANDLER;
350     }
351     if (!pSecurityHandler->OnInit(this, m_pEncryptDict)) {
352       return err;
353     }
354     m_pSecurityHandler = std::move(pSecurityHandler);
355     std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler(
356         m_pSecurityHandler->CreateCryptoHandler());
357     if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get())) {
358       return PDFPARSE_ERROR_HANDLER;
359     }
360     m_Syntax.SetEncrypt(pCryptoHandler.release());
361   }
362   return PDFPARSE_ERROR_SUCCESS;
363 }
ReleaseEncryptHandler()364 void CPDF_Parser::ReleaseEncryptHandler() {
365   m_Syntax.m_pCryptoHandler.reset();
366   if (!m_bForceUseSecurityHandler) {
367     m_pSecurityHandler.reset();
368   }
369 }
370 
GetObjectOffset(FX_DWORD objnum) const371 FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum) const {
372   if (!IsValidObjectNumber(objnum))
373     return 0;
374 
375   if (m_V5Type[objnum] == 1)
376     return GetObjectPositionOrZero(objnum);
377 
378   if (m_V5Type[objnum] == 2) {
379     FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
380     return GetObjectPositionOrZero(pos);
381   }
382   return 0;
383 }
384 
LoadAllCrossRefV4(FX_FILESIZE xrefpos)385 FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
386   if (!LoadCrossRefV4(xrefpos, 0, TRUE)) {
387     return FALSE;
388   }
389   m_pTrailer = LoadTrailerV4();
390   if (!m_pTrailer) {
391     return FALSE;
392   }
393 
394   int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
395   if (xrefsize <= 0 || xrefsize > kMaxXRefSize) {
396     return FALSE;
397   }
398   ShrinkObjectMap(xrefsize);
399   m_V5Type.SetSize(xrefsize);
400   CFX_FileSizeArray CrossRefList;
401   CFX_FileSizeArray XRefStreamList;
402   CrossRefList.Add(xrefpos);
403   XRefStreamList.Add(GetDirectInteger(m_pTrailer, "XRefStm"));
404 
405   std::set<FX_FILESIZE> seen_xrefpos;
406   seen_xrefpos.insert(xrefpos);
407   // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not
408   // numerical, GetDirectInteger() returns 0. Loading will end.
409   xrefpos = GetDirectInteger(m_pTrailer, "Prev");
410   while (xrefpos) {
411     // Check for circular references.
412     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
413       return FALSE;
414     seen_xrefpos.insert(xrefpos);
415     CrossRefList.InsertAt(0, xrefpos);
416     LoadCrossRefV4(xrefpos, 0, TRUE);
417     std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
418         LoadTrailerV4());
419     if (!pDict)
420       return FALSE;
421     xrefpos = GetDirectInteger(pDict.get(), "Prev");
422 
423     XRefStreamList.InsertAt(0, pDict->GetInteger("XRefStm"));
424     m_Trailers.Add(pDict.release());
425   }
426   for (int32_t i = 0; i < CrossRefList.GetSize(); i++) {
427     if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE))
428       return FALSE;
429   }
430   return TRUE;
431 }
LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,FX_DWORD dwObjCount)432 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
433                                                  FX_DWORD dwObjCount) {
434   if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) {
435     return FALSE;
436   }
437   m_pTrailer = LoadTrailerV4();
438   if (!m_pTrailer) {
439     return FALSE;
440   }
441   int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
442   if (xrefsize == 0) {
443     return FALSE;
444   }
445   CFX_FileSizeArray CrossRefList, XRefStreamList;
446   CrossRefList.Add(xrefpos);
447   XRefStreamList.Add(GetDirectInteger(m_pTrailer, "XRefStm"));
448 
449   std::set<FX_FILESIZE> seen_xrefpos;
450   seen_xrefpos.insert(xrefpos);
451   xrefpos = GetDirectInteger(m_pTrailer, "Prev");
452   while (xrefpos) {
453     // Check for circular references.
454     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
455       return FALSE;
456     seen_xrefpos.insert(xrefpos);
457     CrossRefList.InsertAt(0, xrefpos);
458     LoadCrossRefV4(xrefpos, 0, TRUE);
459     std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
460         LoadTrailerV4());
461     if (!pDict) {
462       return FALSE;
463     }
464     xrefpos = GetDirectInteger(pDict.get(), "Prev");
465 
466     XRefStreamList.InsertAt(0, pDict->GetInteger("XRefStm"));
467     m_Trailers.Add(pDict.release());
468   }
469   for (int32_t i = 1; i < CrossRefList.GetSize(); i++)
470     if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) {
471       return FALSE;
472     }
473   return TRUE;
474 }
LoadLinearizedCrossRefV4(FX_FILESIZE pos,FX_DWORD dwObjCount)475 FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
476                                               FX_DWORD dwObjCount) {
477   FX_FILESIZE dwStartPos = pos - m_Syntax.m_HeaderOffset;
478   m_Syntax.RestorePos(dwStartPos);
479   void* pResult =
480       FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
481                     sizeof(FX_FILESIZE), CompareFileSize);
482   if (!pResult) {
483     m_SortedOffset.Add(pos);
484   }
485   FX_DWORD start_objnum = 0;
486   FX_DWORD count = dwObjCount;
487   FX_FILESIZE SavedPos = m_Syntax.SavePos();
488   const int32_t recordsize = 20;
489   std::vector<char> buf(1024 * recordsize + 1);
490   buf[1024 * recordsize] = '\0';
491   int32_t nBlocks = count / 1024 + 1;
492   for (int32_t block = 0; block < nBlocks; block++) {
493     int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
494     FX_DWORD dwReadSize = block_size * recordsize;
495     if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_Syntax.m_FileLen) {
496       return FALSE;
497     }
498     if (!m_Syntax.ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
499                             dwReadSize)) {
500       return FALSE;
501     }
502     for (int32_t i = 0; i < block_size; i++) {
503       FX_DWORD objnum = start_objnum + block * 1024 + i;
504       char* pEntry = &buf[i * recordsize];
505       if (pEntry[17] == 'f') {
506         m_ObjectInfo[objnum].pos = 0;
507         m_V5Type.SetAtGrow(objnum, 0);
508       } else {
509         int32_t offset = FXSYS_atoi(pEntry);
510         if (offset == 0) {
511           for (int32_t c = 0; c < 10; c++) {
512             if (!std::isdigit(pEntry[c]))
513               return FALSE;
514           }
515         }
516         m_ObjectInfo[objnum].pos = offset;
517         int32_t version = FXSYS_atoi(pEntry + 11);
518         if (version >= 1) {
519           m_bVersionUpdated = TRUE;
520         }
521         m_ObjVersion.SetAtGrow(objnum, version);
522         if (m_ObjectInfo[objnum].pos < m_Syntax.m_FileLen) {
523           void* pResult = FXSYS_bsearch(
524               &m_ObjectInfo[objnum].pos, m_SortedOffset.GetData(),
525               m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), CompareFileSize);
526           if (!pResult) {
527             m_SortedOffset.Add(m_ObjectInfo[objnum].pos);
528           }
529         }
530         m_V5Type.SetAtGrow(objnum, 1);
531       }
532     }
533   }
534   m_Syntax.RestorePos(SavedPos + count * recordsize);
535   return TRUE;
536 }
537 
FindPosInOffsets(FX_FILESIZE pos) const538 bool CPDF_Parser::FindPosInOffsets(FX_FILESIZE pos) const {
539   return FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
540                        sizeof(FX_FILESIZE), CompareFileSize);
541 }
542 
LoadCrossRefV4(FX_FILESIZE pos,FX_FILESIZE streampos,FX_BOOL bSkip)543 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
544                                  FX_FILESIZE streampos,
545                                  FX_BOOL bSkip) {
546   m_Syntax.RestorePos(pos);
547   if (m_Syntax.GetKeyword() != "xref")
548     return false;
549 
550   if (!FindPosInOffsets(pos))
551     m_SortedOffset.Add(pos);
552 
553   if (streampos && !FindPosInOffsets(streampos))
554       m_SortedOffset.Add(streampos);
555 
556   while (1) {
557     FX_FILESIZE SavedPos = m_Syntax.SavePos();
558     bool bIsNumber;
559     CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
560     if (word.IsEmpty())
561       return false;
562 
563     if (!bIsNumber) {
564       m_Syntax.RestorePos(SavedPos);
565       break;
566     }
567     FX_DWORD start_objnum = FXSYS_atoi(word);
568     if (start_objnum >= kMaxObjectNumber)
569       return false;
570 
571     FX_DWORD count = m_Syntax.GetDirectNum();
572     m_Syntax.ToNextWord();
573     SavedPos = m_Syntax.SavePos();
574     const int32_t recordsize = 20;
575     m_dwXrefStartObjNum = start_objnum;
576     if (!bSkip) {
577       std::vector<char> buf(1024 * recordsize + 1);
578       buf[1024 * recordsize] = '\0';
579       int32_t nBlocks = count / 1024 + 1;
580       for (int32_t block = 0; block < nBlocks; block++) {
581         int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
582         m_Syntax.ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
583                            block_size * recordsize);
584         for (int32_t i = 0; i < block_size; i++) {
585           FX_DWORD objnum = start_objnum + block * 1024 + i;
586           char* pEntry = &buf[i * recordsize];
587           if (pEntry[17] == 'f') {
588             m_ObjectInfo[objnum].pos = 0;
589             m_V5Type.SetAtGrow(objnum, 0);
590           } else {
591             FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
592             if (offset == 0) {
593               for (int32_t c = 0; c < 10; c++) {
594                 if (!std::isdigit(pEntry[c]))
595                   return false;
596               }
597             }
598             m_ObjectInfo[objnum].pos = offset;
599             int32_t version = FXSYS_atoi(pEntry + 11);
600             if (version >= 1) {
601               m_bVersionUpdated = TRUE;
602             }
603             m_ObjVersion.SetAtGrow(objnum, version);
604             if (m_ObjectInfo[objnum].pos < m_Syntax.m_FileLen &&
605                 !FindPosInOffsets(m_ObjectInfo[objnum].pos)) {
606               m_SortedOffset.Add(m_ObjectInfo[objnum].pos);
607             }
608             m_V5Type.SetAtGrow(objnum, 1);
609           }
610         }
611       }
612     }
613     m_Syntax.RestorePos(SavedPos + count * recordsize);
614   }
615   return !streampos || LoadCrossRefV5(&streampos, FALSE);
616 }
617 
LoadAllCrossRefV5(FX_FILESIZE xrefpos)618 FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
619   if (!LoadCrossRefV5(&xrefpos, TRUE)) {
620     return FALSE;
621   }
622   std::set<FX_FILESIZE> seen_xrefpos;
623   while (xrefpos) {
624     seen_xrefpos.insert(xrefpos);
625     if (!LoadCrossRefV5(&xrefpos, FALSE)) {
626       return FALSE;
627     }
628     // Check for circular references.
629     if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) {
630       return FALSE;
631     }
632   }
633   m_ObjectStreamMap.InitHashTable(101, FALSE);
634   m_bXRefStream = TRUE;
635   return TRUE;
636 }
637 
RebuildCrossRef()638 FX_BOOL CPDF_Parser::RebuildCrossRef() {
639   m_ObjectInfo.clear();
640   m_V5Type.RemoveAll();
641   m_SortedOffset.RemoveAll();
642   m_ObjVersion.RemoveAll();
643   if (m_pTrailer) {
644     m_pTrailer->Release();
645     m_pTrailer = NULL;
646   }
647   int32_t status = 0;
648   int32_t inside_index = 0;
649   FX_DWORD objnum = 0;
650   FX_DWORD gennum = 0;
651   int32_t depth = 0;
652   const FX_DWORD kBufferSize = 4096;
653   std::vector<uint8_t> buffer(kBufferSize);
654   FX_FILESIZE pos = m_Syntax.m_HeaderOffset;
655   FX_FILESIZE start_pos = 0;
656   FX_FILESIZE start_pos1 = 0;
657   FX_FILESIZE last_obj = -1;
658   FX_FILESIZE last_xref = -1;
659   FX_FILESIZE last_trailer = -1;
660   while (pos < m_Syntax.m_FileLen) {
661     const FX_FILESIZE saved_pos = pos;
662     bool bOverFlow = false;
663     FX_DWORD size = std::min((FX_DWORD)(m_Syntax.m_FileLen - pos), kBufferSize);
664     if (!m_Syntax.m_pFileAccess->ReadBlock(buffer.data(), pos, size))
665       break;
666 
667     for (FX_DWORD i = 0; i < size; i++) {
668       uint8_t byte = buffer[i];
669       switch (status) {
670         case 0:
671           if (PDFCharIsWhitespace(byte))
672             status = 1;
673 
674           if (std::isdigit(byte)) {
675             --i;
676             status = 1;
677           }
678 
679           if (byte == '%') {
680             inside_index = 0;
681             status = 9;
682           }
683 
684           if (byte == '(') {
685             status = 10;
686             depth = 1;
687           }
688 
689           if (byte == '<') {
690             inside_index = 1;
691             status = 11;
692           }
693 
694           if (byte == '\\')
695             status = 13;
696 
697           if (byte == 't') {
698             status = 7;
699             inside_index = 1;
700           }
701           break;
702         case 1:
703           if (PDFCharIsWhitespace(byte)) {
704             break;
705           } else if (std::isdigit(byte)) {
706             start_pos = pos + i;
707             status = 2;
708             objnum = FXSYS_toDecimalDigit(byte);
709           } else if (byte == 't') {
710             status = 7;
711             inside_index = 1;
712           } else if (byte == 'x') {
713             status = 8;
714             inside_index = 1;
715           } else {
716             --i;
717             status = 0;
718           }
719           break;
720         case 2:
721           if (std::isdigit(byte)) {
722             objnum = objnum * 10 + FXSYS_toDecimalDigit(byte);
723             break;
724           } else if (PDFCharIsWhitespace(byte)) {
725             status = 3;
726           } else {
727             --i;
728             status = 14;
729             inside_index = 0;
730           }
731           break;
732         case 3:
733           if (std::isdigit(byte)) {
734             start_pos1 = pos + i;
735             status = 4;
736             gennum = FXSYS_toDecimalDigit(byte);
737           } else if (PDFCharIsWhitespace(byte)) {
738             break;
739           } else if (byte == 't') {
740             status = 7;
741             inside_index = 1;
742           } else {
743             --i;
744             status = 0;
745           }
746           break;
747         case 4:
748           if (std::isdigit(byte)) {
749             gennum = gennum * 10 + FXSYS_toDecimalDigit(byte);
750             break;
751           } else if (PDFCharIsWhitespace(byte)) {
752             status = 5;
753           } else {
754             --i;
755             status = 0;
756           }
757           break;
758         case 5:
759           if (byte == 'o') {
760             status = 6;
761             inside_index = 1;
762           } else if (PDFCharIsWhitespace(byte)) {
763             break;
764           } else if (std::isdigit(byte)) {
765             objnum = gennum;
766             gennum = FXSYS_toDecimalDigit(byte);
767             start_pos = start_pos1;
768             start_pos1 = pos + i;
769             status = 4;
770           } else if (byte == 't') {
771             status = 7;
772             inside_index = 1;
773           } else {
774             --i;
775             status = 0;
776           }
777           break;
778         case 6:
779           switch (inside_index) {
780             case 1:
781               if (byte != 'b') {
782                 --i;
783                 status = 0;
784               } else {
785                 inside_index++;
786               }
787               break;
788             case 2:
789               if (byte != 'j') {
790                 --i;
791                 status = 0;
792               } else {
793                 inside_index++;
794               }
795               break;
796             case 3:
797               if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
798                 if (objnum > 0x1000000) {
799                   status = 0;
800                   break;
801                 }
802                 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset;
803                 last_obj = start_pos;
804                 void* pResult =
805                     FXSYS_bsearch(&obj_pos, m_SortedOffset.GetData(),
806                                   m_SortedOffset.GetSize(), sizeof(FX_FILESIZE),
807                                   CompareFileSize);
808                 if (!pResult) {
809                   m_SortedOffset.Add(obj_pos);
810                 }
811                 FX_FILESIZE obj_end = 0;
812                 CPDF_Object* pObject = ParseIndirectObjectAtByStrict(
813                     m_pDocument, obj_pos, objnum, NULL, &obj_end);
814                 if (CPDF_Stream* pStream = ToStream(pObject)) {
815                   if (CPDF_Dictionary* pDict = pStream->GetDict()) {
816                     if ((pDict->KeyExist("Type")) &&
817                         (pDict->GetString("Type") == "XRef" &&
818                          pDict->KeyExist("Size"))) {
819                       CPDF_Object* pRoot = pDict->GetElement("Root");
820                       if (pRoot && pRoot->GetDict() &&
821                           pRoot->GetDict()->GetElement("Pages")) {
822                         if (m_pTrailer)
823                           m_pTrailer->Release();
824                         m_pTrailer = ToDictionary(pDict->Clone());
825                         }
826                       }
827                   }
828                 }
829                 FX_FILESIZE offset = 0;
830                 m_Syntax.RestorePos(obj_pos);
831                 offset = m_Syntax.FindTag("obj", 0);
832                 if (offset == -1) {
833                   offset = 0;
834                 } else {
835                   offset += 3;
836                 }
837                 FX_FILESIZE nLen = obj_end - obj_pos - offset;
838                 if ((FX_DWORD)nLen > size - i) {
839                   pos = obj_end + m_Syntax.m_HeaderOffset;
840                   bOverFlow = true;
841                 } else {
842                   i += (FX_DWORD)nLen;
843                 }
844                 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&
845                     m_ObjectInfo[objnum].pos) {
846                   if (pObject) {
847                     FX_DWORD oldgen = m_ObjVersion.GetAt(objnum);
848                     m_ObjectInfo[objnum].pos = obj_pos;
849                     m_ObjVersion.SetAt(objnum, (int16_t)gennum);
850                     if (oldgen != gennum) {
851                       m_bVersionUpdated = TRUE;
852                     }
853                   }
854                 } else {
855                   m_ObjectInfo[objnum].pos = obj_pos;
856                   m_V5Type.SetAtGrow(objnum, 1);
857                   m_ObjVersion.SetAtGrow(objnum, (int16_t)gennum);
858                 }
859                 if (pObject) {
860                   pObject->Release();
861                 }
862               }
863               --i;
864               status = 0;
865               break;
866           }
867           break;
868         case 7:
869           if (inside_index == 7) {
870             if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
871               last_trailer = pos + i - 7;
872               m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset);
873               CPDF_Object* pObj =
874                   m_Syntax.GetObject(m_pDocument, 0, 0, nullptr, true);
875               if (pObj) {
876                 if (!pObj->IsDictionary() && !pObj->AsStream()) {
877                   pObj->Release();
878                 } else {
879                   CPDF_Stream* pStream = pObj->AsStream();
880                   if (CPDF_Dictionary* pTrailer =
881                           pStream ? pStream->GetDict() : pObj->AsDictionary()) {
882                     if (m_pTrailer) {
883                       CPDF_Object* pRoot = pTrailer->GetElement("Root");
884                       CPDF_Reference* pRef = ToReference(pRoot);
885                       if (!pRoot ||
886                           (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) &&
887                            m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) {
888                         auto it = pTrailer->begin();
889                         while (it != pTrailer->end()) {
890                           const CFX_ByteString& key = it->first;
891                           CPDF_Object* pElement = it->second;
892                           ++it;
893                           FX_DWORD dwObjNum =
894                               pElement ? pElement->GetObjNum() : 0;
895                           if (dwObjNum) {
896                             m_pTrailer->SetAtReference(key, m_pDocument,
897                                                        dwObjNum);
898                           } else {
899                             m_pTrailer->SetAt(key, pElement->Clone());
900                           }
901                         }
902                         pObj->Release();
903                       } else {
904                         pObj->Release();
905                       }
906                     } else {
907                       if (pObj->IsStream()) {
908                         m_pTrailer = ToDictionary(pTrailer->Clone());
909                         pObj->Release();
910                       } else {
911                         m_pTrailer = pTrailer;
912                       }
913                       FX_FILESIZE dwSavePos = m_Syntax.SavePos();
914                       CFX_ByteString strWord = m_Syntax.GetKeyword();
915                       if (!strWord.Compare("startxref")) {
916                         bool bNumber;
917                         CFX_ByteString bsOffset =
918                             m_Syntax.GetNextWord(&bNumber);
919                         if (bNumber) {
920                           m_LastXRefOffset = FXSYS_atoi(bsOffset);
921                         }
922                       }
923                       m_Syntax.RestorePos(dwSavePos);
924                     }
925                   } else {
926                     pObj->Release();
927                   }
928                 }
929               }
930             }
931             --i;
932             status = 0;
933           } else if (byte == "trailer"[inside_index]) {
934             inside_index++;
935           } else {
936             --i;
937             status = 0;
938           }
939           break;
940         case 8:
941           if (inside_index == 4) {
942             last_xref = pos + i - 4;
943             status = 1;
944           } else if (byte == "xref"[inside_index]) {
945             inside_index++;
946           } else {
947             --i;
948             status = 0;
949           }
950           break;
951         case 9:
952           if (byte == '\r' || byte == '\n') {
953             status = 0;
954           }
955           break;
956         case 10:
957           if (byte == ')') {
958             if (depth > 0) {
959               depth--;
960             }
961           } else if (byte == '(') {
962             depth++;
963           }
964           if (!depth) {
965             status = 0;
966           }
967           break;
968         case 11:
969           if (byte == '>' || (byte == '<' && inside_index == 1))
970             status = 0;
971           inside_index = 0;
972           break;
973         case 13:
974           if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
975             --i;
976             status = 0;
977           }
978           break;
979         case 14:
980           if (PDFCharIsWhitespace(byte)) {
981             status = 0;
982           } else if (byte == '%' || byte == '(' || byte == '<' ||
983                      byte == '\\') {
984             status = 0;
985             --i;
986           } else if (inside_index == 6) {
987             status = 0;
988             --i;
989           } else if (byte == "endobj"[inside_index]) {
990             inside_index++;
991           }
992           break;
993       }
994       if (bOverFlow) {
995         size = 0;
996         break;
997       }
998     }
999     pos += size;
1000 
1001     // If the position has not changed at all in a loop iteration, then break
1002     // out to prevent infinite looping.
1003     if (pos == saved_pos)
1004       break;
1005   }
1006   if (last_xref != -1 && last_xref > last_obj) {
1007     last_trailer = last_xref;
1008   } else if (last_trailer == -1 || last_xref < last_obj) {
1009     last_trailer = m_Syntax.m_FileLen;
1010   }
1011   FX_FILESIZE offset = last_trailer - m_Syntax.m_HeaderOffset;
1012   void* pResult =
1013       FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
1014                     sizeof(FX_FILESIZE), CompareFileSize);
1015   if (!pResult) {
1016     m_SortedOffset.Add(offset);
1017   }
1018   return m_pTrailer && !m_ObjectInfo.empty();
1019 }
1020 
LoadCrossRefV5(FX_FILESIZE * pos,FX_BOOL bMainXRef)1021 FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) {
1022   CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0, nullptr);
1023   if (!pObject)
1024     return FALSE;
1025   if (m_pDocument) {
1026     FX_BOOL bInserted = FALSE;
1027     CPDF_Dictionary* pDict = m_pDocument->GetRoot();
1028     if (!pDict || pDict->GetObjNum() != pObject->m_ObjNum) {
1029       bInserted = m_pDocument->InsertIndirectObject(pObject->m_ObjNum, pObject);
1030     } else {
1031       if (pObject->IsStream())
1032         pObject->Release();
1033     }
1034     if (!bInserted)
1035       return FALSE;
1036   }
1037 
1038   CPDF_Stream* pStream = pObject->AsStream();
1039   if (!pStream)
1040     return FALSE;
1041 
1042   *pos = pStream->GetDict()->GetInteger("Prev");
1043   int32_t size = pStream->GetDict()->GetInteger("Size");
1044   if (size < 0) {
1045     pStream->Release();
1046     return FALSE;
1047   }
1048   if (bMainXRef) {
1049     m_pTrailer = ToDictionary(pStream->GetDict()->Clone());
1050     ShrinkObjectMap(size);
1051     if (m_V5Type.SetSize(size)) {
1052       FXSYS_memset(m_V5Type.GetData(), 0, size);
1053     }
1054   } else {
1055     m_Trailers.Add(ToDictionary(pStream->GetDict()->Clone()));
1056   }
1057   std::vector<std::pair<int32_t, int32_t> > arrIndex;
1058   CPDF_Array* pArray = pStream->GetDict()->GetArray("Index");
1059   if (pArray) {
1060     FX_DWORD nPairSize = pArray->GetCount() / 2;
1061     for (FX_DWORD i = 0; i < nPairSize; i++) {
1062       CPDF_Object* pStartNumObj = pArray->GetElement(i * 2);
1063       CPDF_Object* pCountObj = pArray->GetElement(i * 2 + 1);
1064       if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) {
1065         int nStartNum = pStartNumObj->GetInteger();
1066         int nCount = pCountObj->GetInteger();
1067         if (nStartNum >= 0 && nCount > 0) {
1068           arrIndex.push_back(std::make_pair(nStartNum, nCount));
1069         }
1070       }
1071     }
1072   }
1073   if (arrIndex.size() == 0) {
1074     arrIndex.push_back(std::make_pair(0, size));
1075   }
1076   pArray = pStream->GetDict()->GetArray("W");
1077   if (!pArray) {
1078     pStream->Release();
1079     return FALSE;
1080   }
1081   CFX_DWordArray WidthArray;
1082   FX_SAFE_DWORD dwAccWidth = 0;
1083   for (FX_DWORD i = 0; i < pArray->GetCount(); i++) {
1084     WidthArray.Add(pArray->GetInteger(i));
1085     dwAccWidth += WidthArray[i];
1086   }
1087   if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) {
1088     pStream->Release();
1089     return FALSE;
1090   }
1091   FX_DWORD totalWidth = dwAccWidth.ValueOrDie();
1092   CPDF_StreamAcc acc;
1093   acc.LoadAllData(pStream);
1094   const uint8_t* pData = acc.GetData();
1095   FX_DWORD dwTotalSize = acc.GetSize();
1096   FX_DWORD segindex = 0;
1097   for (FX_DWORD i = 0; i < arrIndex.size(); i++) {
1098     int32_t startnum = arrIndex[i].first;
1099     if (startnum < 0) {
1100       continue;
1101     }
1102     m_dwXrefStartObjNum =
1103         pdfium::base::checked_cast<FX_DWORD, int32_t>(startnum);
1104     FX_DWORD count =
1105         pdfium::base::checked_cast<FX_DWORD, int32_t>(arrIndex[i].second);
1106     FX_SAFE_DWORD dwCaculatedSize = segindex;
1107     dwCaculatedSize += count;
1108     dwCaculatedSize *= totalWidth;
1109     if (!dwCaculatedSize.IsValid() ||
1110         dwCaculatedSize.ValueOrDie() > dwTotalSize) {
1111       continue;
1112     }
1113     const uint8_t* segstart = pData + segindex * totalWidth;
1114     FX_SAFE_DWORD dwMaxObjNum = startnum;
1115     dwMaxObjNum += count;
1116     FX_DWORD dwV5Size =
1117         pdfium::base::checked_cast<FX_DWORD, int32_t>(m_V5Type.GetSize());
1118     if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) {
1119       continue;
1120     }
1121     for (FX_DWORD j = 0; j < count; j++) {
1122       int32_t type = 1;
1123       const uint8_t* entrystart = segstart + j * totalWidth;
1124       if (WidthArray[0]) {
1125         type = GetVarInt(entrystart, WidthArray[0]);
1126       }
1127       if (m_V5Type[startnum + j] == 255) {
1128         FX_FILESIZE offset =
1129             GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1130         m_ObjectInfo[startnum + j].pos = offset;
1131         void* pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(),
1132                                       m_SortedOffset.GetSize(),
1133                                       sizeof(FX_FILESIZE), CompareFileSize);
1134         if (!pResult) {
1135           m_SortedOffset.Add(offset);
1136         }
1137         continue;
1138       }
1139       if (m_V5Type[startnum + j]) {
1140         continue;
1141       }
1142       m_V5Type[startnum + j] = type;
1143       if (type == 0) {
1144         m_ObjectInfo[startnum + j].pos = 0;
1145       } else {
1146         FX_FILESIZE offset =
1147             GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1148         m_ObjectInfo[startnum + j].pos = offset;
1149         if (type == 1) {
1150           void* pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(),
1151                                         m_SortedOffset.GetSize(),
1152                                         sizeof(FX_FILESIZE), CompareFileSize);
1153           if (!pResult) {
1154             m_SortedOffset.Add(offset);
1155           }
1156         } else {
1157           if (offset < 0 || offset >= m_V5Type.GetSize()) {
1158             pStream->Release();
1159             return FALSE;
1160           }
1161           m_V5Type[offset] = 255;
1162         }
1163       }
1164     }
1165     segindex += count;
1166   }
1167   pStream->Release();
1168   return TRUE;
1169 }
GetIDArray()1170 CPDF_Array* CPDF_Parser::GetIDArray() {
1171   CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetElement("ID") : NULL;
1172   if (!pID)
1173     return nullptr;
1174 
1175   if (CPDF_Reference* pRef = pID->AsReference()) {
1176     pID = ParseIndirectObject(nullptr, pRef->GetRefObjNum());
1177     m_pTrailer->SetAt("ID", pID);
1178   }
1179   return ToArray(pID);
1180 }
GetRootObjNum()1181 FX_DWORD CPDF_Parser::GetRootObjNum() {
1182   CPDF_Reference* pRef =
1183       ToReference(m_pTrailer ? m_pTrailer->GetElement("Root") : nullptr);
1184   return pRef ? pRef->GetRefObjNum() : 0;
1185 }
GetInfoObjNum()1186 FX_DWORD CPDF_Parser::GetInfoObjNum() {
1187   CPDF_Reference* pRef =
1188       ToReference(m_pTrailer ? m_pTrailer->GetElement("Info") : nullptr);
1189   return pRef ? pRef->GetRefObjNum() : 0;
1190 }
IsFormStream(FX_DWORD objnum,FX_BOOL & bForm)1191 FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm) {
1192   bForm = FALSE;
1193   if (!IsValidObjectNumber(objnum))
1194     return TRUE;
1195   if (m_V5Type[objnum] == 0)
1196     return TRUE;
1197   if (m_V5Type[objnum] == 2)
1198     return TRUE;
1199   FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1200   void* pResult =
1201       FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
1202                     sizeof(FX_FILESIZE), CompareFileSize);
1203   if (!pResult) {
1204     return TRUE;
1205   }
1206   if ((FX_FILESIZE*)pResult - (FX_FILESIZE*)m_SortedOffset.GetData() ==
1207       m_SortedOffset.GetSize() - 1) {
1208     return FALSE;
1209   }
1210   FX_FILESIZE size = ((FX_FILESIZE*)pResult)[1] - pos;
1211   FX_FILESIZE SavedPos = m_Syntax.SavePos();
1212   m_Syntax.RestorePos(pos);
1213   const char kFormStream[] = "/Form\0stream";
1214   const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1);
1215   bForm = m_Syntax.SearchMultiWord(kFormStreamStr, TRUE, size) == 0;
1216   m_Syntax.RestorePos(SavedPos);
1217   return TRUE;
1218 }
1219 
ParseIndirectObject(CPDF_IndirectObjectHolder * pObjList,FX_DWORD objnum,PARSE_CONTEXT * pContext)1220 CPDF_Object* CPDF_Parser::ParseIndirectObject(
1221     CPDF_IndirectObjectHolder* pObjList,
1222     FX_DWORD objnum,
1223     PARSE_CONTEXT* pContext) {
1224   if (!IsValidObjectNumber(objnum))
1225     return nullptr;
1226 
1227   // Prevent circular parsing the same object.
1228   if (pdfium::ContainsKey(m_ParsingObjNums, objnum))
1229     return nullptr;
1230   ScopedSetInsertion<FX_DWORD> local_insert(&m_ParsingObjNums, objnum);
1231 
1232   if (m_V5Type[objnum] == 1 || m_V5Type[objnum] == 255) {
1233     FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1234     if (pos <= 0)
1235       return nullptr;
1236     return ParseIndirectObjectAt(pObjList, pos, objnum, pContext);
1237   }
1238   if (m_V5Type[objnum] != 2)
1239     return nullptr;
1240 
1241   CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
1242   if (!pObjStream)
1243     return nullptr;
1244 
1245   ScopedFileStream file(FX_CreateMemoryStream(
1246       (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE));
1247   CPDF_SyntaxParser syntax;
1248   syntax.InitParser(file.get(), 0);
1249   const int32_t offset = GetStreamFirst(pObjStream);
1250 
1251   // Read object numbers from |pObjStream| into a cache.
1252   if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) {
1253     for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) {
1254       FX_DWORD thisnum = syntax.GetDirectNum();
1255       FX_DWORD thisoff = syntax.GetDirectNum();
1256       m_ObjCache[pObjStream][thisnum] = thisoff;
1257     }
1258   }
1259 
1260   const auto it = m_ObjCache[pObjStream].find(objnum);
1261   if (it == m_ObjCache[pObjStream].end())
1262     return nullptr;
1263 
1264   syntax.RestorePos(offset + it->second);
1265   return syntax.GetObject(pObjList, 0, 0, pContext, true);
1266 }
1267 
GetObjectStream(FX_DWORD objnum)1268 CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum) {
1269   CPDF_StreamAcc* pStreamAcc = nullptr;
1270   if (m_ObjectStreamMap.Lookup((void*)(uintptr_t)objnum, (void*&)pStreamAcc))
1271     return pStreamAcc;
1272 
1273   const CPDF_Stream* pStream = ToStream(
1274       m_pDocument ? m_pDocument->GetIndirectObject(objnum, nullptr) : nullptr);
1275   if (!pStream)
1276     return nullptr;
1277 
1278   pStreamAcc = new CPDF_StreamAcc;
1279   pStreamAcc->LoadAllData(pStream);
1280   m_ObjectStreamMap.SetAt((void*)(uintptr_t)objnum, pStreamAcc);
1281   return pStreamAcc;
1282 }
1283 
GetObjectSize(FX_DWORD objnum) const1284 FX_FILESIZE CPDF_Parser::GetObjectSize(FX_DWORD objnum) const {
1285   if (!IsValidObjectNumber(objnum))
1286     return 0;
1287 
1288   if (m_V5Type[objnum] == 2)
1289     objnum = GetObjectPositionOrZero(objnum);
1290 
1291   if (m_V5Type[objnum] == 1 || m_V5Type[objnum] == 255) {
1292     FX_FILESIZE offset = GetObjectPositionOrZero(objnum);
1293     if (offset == 0)
1294       return 0;
1295 
1296     FX_FILESIZE* pResult = static_cast<FX_FILESIZE*>(FXSYS_bsearch(
1297         &offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
1298         sizeof(FX_FILESIZE), CompareFileSize));
1299     if (!pResult)
1300       return 0;
1301 
1302     if (pResult - m_SortedOffset.GetData() == m_SortedOffset.GetSize() - 1)
1303       return 0;
1304 
1305     return pResult[1] - offset;
1306   }
1307   return 0;
1308 }
1309 
GetIndirectBinary(FX_DWORD objnum,uint8_t * & pBuffer,FX_DWORD & size)1310 void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum,
1311                                     uint8_t*& pBuffer,
1312                                     FX_DWORD& size) {
1313   pBuffer = NULL;
1314   size = 0;
1315   if (!IsValidObjectNumber(objnum))
1316     return;
1317 
1318   if (m_V5Type[objnum] == 2) {
1319     CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
1320     if (!pObjStream)
1321       return;
1322 
1323     int32_t offset = GetStreamFirst(pObjStream);
1324     const uint8_t* pData = pObjStream->GetData();
1325     FX_DWORD totalsize = pObjStream->GetSize();
1326     ScopedFileStream file(
1327         FX_CreateMemoryStream((uint8_t*)pData, (size_t)totalsize, FALSE));
1328     CPDF_SyntaxParser syntax;
1329     syntax.InitParser(file.get(), 0);
1330     for (int i = GetStreamNCount(pObjStream); i > 0; --i) {
1331       FX_DWORD thisnum = syntax.GetDirectNum();
1332       FX_DWORD thisoff = syntax.GetDirectNum();
1333       if (thisnum != objnum)
1334         continue;
1335 
1336       if (i == 1) {
1337         size = totalsize - (thisoff + offset);
1338       } else {
1339         syntax.GetDirectNum();  // Skip nextnum.
1340         FX_DWORD nextoff = syntax.GetDirectNum();
1341         size = nextoff - thisoff;
1342       }
1343       pBuffer = FX_Alloc(uint8_t, size);
1344       FXSYS_memcpy(pBuffer, pData + thisoff + offset, size);
1345       return;
1346     }
1347     return;
1348   }
1349 
1350   if (m_V5Type[objnum] != 1)
1351     return;
1352 
1353   FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1354   if (pos == 0) {
1355     return;
1356   }
1357   FX_FILESIZE SavedPos = m_Syntax.SavePos();
1358   m_Syntax.RestorePos(pos);
1359   bool bIsNumber;
1360   CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
1361   if (!bIsNumber) {
1362     m_Syntax.RestorePos(SavedPos);
1363     return;
1364   }
1365   FX_DWORD parser_objnum = FXSYS_atoi(word);
1366   if (parser_objnum && parser_objnum != objnum) {
1367     m_Syntax.RestorePos(SavedPos);
1368     return;
1369   }
1370   word = m_Syntax.GetNextWord(&bIsNumber);
1371   if (!bIsNumber) {
1372     m_Syntax.RestorePos(SavedPos);
1373     return;
1374   }
1375   if (m_Syntax.GetKeyword() != "obj") {
1376     m_Syntax.RestorePos(SavedPos);
1377     return;
1378   }
1379   void* pResult =
1380       FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
1381                     sizeof(FX_FILESIZE), CompareFileSize);
1382   if (!pResult) {
1383     m_Syntax.RestorePos(SavedPos);
1384     return;
1385   }
1386   FX_FILESIZE nextoff = ((FX_FILESIZE*)pResult)[1];
1387   FX_BOOL bNextOffValid = FALSE;
1388   if (nextoff != pos) {
1389     m_Syntax.RestorePos(nextoff);
1390     word = m_Syntax.GetNextWord(&bIsNumber);
1391     if (word == "xref") {
1392       bNextOffValid = TRUE;
1393     } else if (bIsNumber) {
1394       word = m_Syntax.GetNextWord(&bIsNumber);
1395       if (bIsNumber && m_Syntax.GetKeyword() == "obj") {
1396         bNextOffValid = TRUE;
1397       }
1398     }
1399   }
1400   if (!bNextOffValid) {
1401     m_Syntax.RestorePos(pos);
1402     while (1) {
1403       if (m_Syntax.GetKeyword() == "endobj") {
1404         break;
1405       }
1406       if (m_Syntax.SavePos() == m_Syntax.m_FileLen) {
1407         break;
1408       }
1409     }
1410     nextoff = m_Syntax.SavePos();
1411   }
1412   size = (FX_DWORD)(nextoff - pos);
1413   pBuffer = FX_Alloc(uint8_t, size);
1414   m_Syntax.RestorePos(pos);
1415   m_Syntax.ReadBlock(pBuffer, size);
1416   m_Syntax.RestorePos(SavedPos);
1417 }
1418 
ParseIndirectObjectAt(CPDF_IndirectObjectHolder * pObjList,FX_FILESIZE pos,FX_DWORD objnum,PARSE_CONTEXT * pContext)1419 CPDF_Object* CPDF_Parser::ParseIndirectObjectAt(
1420     CPDF_IndirectObjectHolder* pObjList,
1421     FX_FILESIZE pos,
1422     FX_DWORD objnum,
1423     PARSE_CONTEXT* pContext) {
1424   FX_FILESIZE SavedPos = m_Syntax.SavePos();
1425   m_Syntax.RestorePos(pos);
1426   bool bIsNumber;
1427   CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
1428   if (!bIsNumber) {
1429     m_Syntax.RestorePos(SavedPos);
1430     return NULL;
1431   }
1432   FX_FILESIZE objOffset = m_Syntax.SavePos();
1433   objOffset -= word.GetLength();
1434   FX_DWORD parser_objnum = FXSYS_atoi(word);
1435   if (objnum && parser_objnum != objnum) {
1436     m_Syntax.RestorePos(SavedPos);
1437     return NULL;
1438   }
1439   word = m_Syntax.GetNextWord(&bIsNumber);
1440   if (!bIsNumber) {
1441     m_Syntax.RestorePos(SavedPos);
1442     return NULL;
1443   }
1444   FX_DWORD parser_gennum = FXSYS_atoi(word);
1445   if (m_Syntax.GetKeyword() != "obj") {
1446     m_Syntax.RestorePos(SavedPos);
1447     return NULL;
1448   }
1449   CPDF_Object* pObj =
1450       m_Syntax.GetObject(pObjList, objnum, parser_gennum, pContext, true);
1451   m_Syntax.SavePos();
1452   CFX_ByteString bsWord = m_Syntax.GetKeyword();
1453   if (bsWord == "endobj") {
1454     m_Syntax.SavePos();
1455   }
1456   m_Syntax.RestorePos(SavedPos);
1457   if (pObj) {
1458     if (!objnum)
1459       pObj->m_ObjNum = parser_objnum;
1460     pObj->m_GenNum = parser_gennum;
1461   }
1462   return pObj;
1463 }
ParseIndirectObjectAtByStrict(CPDF_IndirectObjectHolder * pObjList,FX_FILESIZE pos,FX_DWORD objnum,PARSE_CONTEXT * pContext,FX_FILESIZE * pResultPos)1464 CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict(
1465     CPDF_IndirectObjectHolder* pObjList,
1466     FX_FILESIZE pos,
1467     FX_DWORD objnum,
1468     PARSE_CONTEXT* pContext,
1469     FX_FILESIZE* pResultPos) {
1470   FX_FILESIZE SavedPos = m_Syntax.SavePos();
1471   m_Syntax.RestorePos(pos);
1472   bool bIsNumber;
1473   CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
1474   if (!bIsNumber) {
1475     m_Syntax.RestorePos(SavedPos);
1476     return NULL;
1477   }
1478   FX_DWORD parser_objnum = FXSYS_atoi(word);
1479   if (objnum && parser_objnum != objnum) {
1480     m_Syntax.RestorePos(SavedPos);
1481     return NULL;
1482   }
1483   word = m_Syntax.GetNextWord(&bIsNumber);
1484   if (!bIsNumber) {
1485     m_Syntax.RestorePos(SavedPos);
1486     return NULL;
1487   }
1488   FX_DWORD gennum = FXSYS_atoi(word);
1489   if (m_Syntax.GetKeyword() != "obj") {
1490     m_Syntax.RestorePos(SavedPos);
1491     return NULL;
1492   }
1493   CPDF_Object* pObj =
1494       m_Syntax.GetObjectByStrict(pObjList, objnum, gennum, pContext);
1495   if (pResultPos) {
1496     *pResultPos = m_Syntax.m_Pos;
1497   }
1498   m_Syntax.RestorePos(SavedPos);
1499   return pObj;
1500 }
1501 
LoadTrailerV4()1502 CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() {
1503   if (m_Syntax.GetKeyword() != "trailer")
1504     return nullptr;
1505 
1506   std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj(
1507       m_Syntax.GetObject(m_pDocument, 0, 0, nullptr, true));
1508   if (!ToDictionary(pObj.get()))
1509     return nullptr;
1510   return pObj.release()->AsDictionary();
1511 }
1512 
GetPermissions(FX_BOOL bCheckRevision)1513 FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) {
1514   if (!m_pSecurityHandler) {
1515     return (FX_DWORD)-1;
1516   }
1517   FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions();
1518   if (m_pEncryptDict && m_pEncryptDict->GetString("Filter") == "Standard") {
1519     dwPermission &= 0xFFFFFFFC;
1520     dwPermission |= 0xFFFFF0C0;
1521     if (bCheckRevision && m_pEncryptDict->GetInteger("R") == 2) {
1522       dwPermission &= 0xFFFFF0FF;
1523     }
1524   }
1525   return dwPermission;
1526 }
IsOwner()1527 FX_BOOL CPDF_Parser::IsOwner() {
1528   return !m_pSecurityHandler || m_pSecurityHandler->IsOwner();
1529 }
SetSecurityHandler(CPDF_SecurityHandler * pSecurityHandler,FX_BOOL bForced)1530 void CPDF_Parser::SetSecurityHandler(CPDF_SecurityHandler* pSecurityHandler,
1531                                      FX_BOOL bForced) {
1532   m_bForceUseSecurityHandler = bForced;
1533   m_pSecurityHandler.reset(pSecurityHandler);
1534   if (m_bForceUseSecurityHandler) {
1535     return;
1536   }
1537   m_Syntax.m_pCryptoHandler.reset(pSecurityHandler->CreateCryptoHandler());
1538   m_Syntax.m_pCryptoHandler->Init(NULL, pSecurityHandler);
1539 }
IsLinearizedFile(IFX_FileRead * pFileAccess,FX_DWORD offset)1540 FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess,
1541                                       FX_DWORD offset) {
1542   m_Syntax.InitParser(pFileAccess, offset);
1543   m_Syntax.RestorePos(m_Syntax.m_HeaderOffset + 9);
1544   FX_FILESIZE SavedPos = m_Syntax.SavePos();
1545   bool bIsNumber;
1546   CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
1547   if (!bIsNumber) {
1548     return FALSE;
1549   }
1550   FX_DWORD objnum = FXSYS_atoi(word);
1551   word = m_Syntax.GetNextWord(&bIsNumber);
1552   if (!bIsNumber) {
1553     return FALSE;
1554   }
1555   FX_DWORD gennum = FXSYS_atoi(word);
1556   if (m_Syntax.GetKeyword() != "obj") {
1557     m_Syntax.RestorePos(SavedPos);
1558     return FALSE;
1559   }
1560   m_pLinearized = m_Syntax.GetObject(nullptr, objnum, gennum, nullptr, true);
1561   if (!m_pLinearized) {
1562     return FALSE;
1563   }
1564 
1565   CPDF_Dictionary* pDict = m_pLinearized->GetDict();
1566   if (pDict && pDict->GetElement("Linearized")) {
1567     m_Syntax.GetNextWord(nullptr);
1568 
1569     CPDF_Object* pLen = pDict->GetElement("L");
1570     if (!pLen) {
1571       m_pLinearized->Release();
1572       m_pLinearized = NULL;
1573       return FALSE;
1574     }
1575     if (pLen->GetInteger() != (int)pFileAccess->GetSize()) {
1576       return FALSE;
1577     }
1578 
1579     if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P")))
1580       m_dwFirstPageNo = pNo->GetInteger();
1581 
1582     if (CPDF_Number* pTable = ToNumber(pDict->GetElement("T")))
1583       m_LastXRefOffset = pTable->GetInteger();
1584 
1585     return TRUE;
1586   }
1587   m_pLinearized->Release();
1588   m_pLinearized = NULL;
1589   return FALSE;
1590 }
StartAsynParse(IFX_FileRead * pFileAccess,FX_BOOL bReParse,FX_BOOL bOwnFileRead)1591 FX_DWORD CPDF_Parser::StartAsynParse(IFX_FileRead* pFileAccess,
1592                                      FX_BOOL bReParse,
1593                                      FX_BOOL bOwnFileRead) {
1594   CloseParser(bReParse);
1595   m_bXRefStream = FALSE;
1596   m_LastXRefOffset = 0;
1597   m_bOwnFileRead = bOwnFileRead;
1598   int32_t offset = GetHeaderOffset(pFileAccess);
1599   if (offset == -1) {
1600     return PDFPARSE_ERROR_FORMAT;
1601   }
1602   if (!IsLinearizedFile(pFileAccess, offset)) {
1603     m_Syntax.m_pFileAccess = NULL;
1604     return StartParse(pFileAccess, bReParse, bOwnFileRead);
1605   }
1606   if (!bReParse) {
1607     m_pDocument = new CPDF_Document(this);
1608   }
1609   FX_FILESIZE dwFirstXRefOffset = m_Syntax.SavePos();
1610   FX_BOOL bXRefRebuilt = FALSE;
1611   FX_BOOL bLoadV4 = FALSE;
1612   if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE)) &&
1613       !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) {
1614     if (!RebuildCrossRef()) {
1615       return PDFPARSE_ERROR_FORMAT;
1616     }
1617     bXRefRebuilt = TRUE;
1618     m_LastXRefOffset = 0;
1619   }
1620   if (bLoadV4) {
1621     m_pTrailer = LoadTrailerV4();
1622     if (!m_pTrailer) {
1623       return PDFPARSE_ERROR_SUCCESS;
1624     }
1625 
1626     int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
1627     if (xrefsize > 0) {
1628       ShrinkObjectMap(xrefsize);
1629       m_V5Type.SetSize(xrefsize);
1630     }
1631   }
1632   FX_DWORD dwRet = SetEncryptHandler();
1633   if (dwRet != PDFPARSE_ERROR_SUCCESS) {
1634     return dwRet;
1635   }
1636   m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
1637   if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
1638     if (bXRefRebuilt) {
1639       return PDFPARSE_ERROR_FORMAT;
1640     }
1641     ReleaseEncryptHandler();
1642     if (!RebuildCrossRef()) {
1643       return PDFPARSE_ERROR_FORMAT;
1644     }
1645     dwRet = SetEncryptHandler();
1646     if (dwRet != PDFPARSE_ERROR_SUCCESS) {
1647       return dwRet;
1648     }
1649     m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
1650     if (!m_pDocument->GetRoot()) {
1651       return PDFPARSE_ERROR_FORMAT;
1652     }
1653   }
1654   FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
1655               sizeof(FX_FILESIZE), CompareFileSize);
1656   if (GetRootObjNum() == 0) {
1657     ReleaseEncryptHandler();
1658     if (!RebuildCrossRef() || GetRootObjNum() == 0)
1659       return PDFPARSE_ERROR_FORMAT;
1660 
1661     dwRet = SetEncryptHandler();
1662     if (dwRet != PDFPARSE_ERROR_SUCCESS) {
1663       return dwRet;
1664     }
1665   }
1666   if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1667     if (CPDF_Reference* pMetadata =
1668             ToReference(m_pDocument->GetRoot()->GetElement("Metadata")))
1669       m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum();
1670   }
1671   return PDFPARSE_ERROR_SUCCESS;
1672 }
LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos)1673 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
1674   if (!LoadCrossRefV5(&xrefpos, FALSE)) {
1675     return FALSE;
1676   }
1677   std::set<FX_FILESIZE> seen_xrefpos;
1678   while (xrefpos) {
1679     seen_xrefpos.insert(xrefpos);
1680     if (!LoadCrossRefV5(&xrefpos, FALSE)) {
1681       return FALSE;
1682     }
1683     // Check for circular references.
1684     if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) {
1685       return FALSE;
1686     }
1687   }
1688   m_ObjectStreamMap.InitHashTable(101, FALSE);
1689   m_bXRefStream = TRUE;
1690   return TRUE;
1691 }
LoadLinearizedMainXRefTable()1692 FX_DWORD CPDF_Parser::LoadLinearizedMainXRefTable() {
1693   FX_DWORD dwSaveMetadataObjnum = m_Syntax.m_MetadataObjnum;
1694   m_Syntax.m_MetadataObjnum = 0;
1695   if (m_pTrailer) {
1696     m_pTrailer->Release();
1697     m_pTrailer = NULL;
1698   }
1699   m_Syntax.RestorePos(m_LastXRefOffset - m_Syntax.m_HeaderOffset);
1700   uint8_t ch = 0;
1701   FX_DWORD dwCount = 0;
1702   m_Syntax.GetNextChar(ch);
1703   while (PDFCharIsWhitespace(ch)) {
1704     ++dwCount;
1705     if (m_Syntax.m_FileLen >=
1706         (FX_FILESIZE)(m_Syntax.SavePos() + m_Syntax.m_HeaderOffset)) {
1707       break;
1708     }
1709     m_Syntax.GetNextChar(ch);
1710   }
1711   m_LastXRefOffset += dwCount;
1712   FX_POSITION pos = m_ObjectStreamMap.GetStartPosition();
1713   while (pos) {
1714     void* objnum;
1715     CPDF_StreamAcc* pStream;
1716     m_ObjectStreamMap.GetNextAssoc(pos, objnum, (void*&)pStream);
1717     delete pStream;
1718   }
1719   m_ObjectStreamMap.RemoveAll();
1720   m_ObjCache.clear();
1721 
1722   if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) &&
1723       !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
1724     m_LastXRefOffset = 0;
1725     m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;
1726     return PDFPARSE_ERROR_FORMAT;
1727   }
1728   FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
1729               sizeof(FX_FILESIZE), CompareFileSize);
1730   m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;
1731   return PDFPARSE_ERROR_SUCCESS;
1732 }
1733 
1734 // static
1735 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
1736 
CPDF_SyntaxParser()1737 CPDF_SyntaxParser::CPDF_SyntaxParser() {
1738   m_pFileAccess = NULL;
1739   m_pFileBuf = NULL;
1740   m_BufSize = CPDF_ModuleMgr::kFileBufSize;
1741   m_pFileBuf = NULL;
1742   m_MetadataObjnum = 0;
1743   m_dwWordPos = 0;
1744   m_bFileStream = FALSE;
1745 }
1746 
~CPDF_SyntaxParser()1747 CPDF_SyntaxParser::~CPDF_SyntaxParser() {
1748   FX_Free(m_pFileBuf);
1749 }
1750 
GetCharAt(FX_FILESIZE pos,uint8_t & ch)1751 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
1752   CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
1753   m_Pos = pos;
1754   return GetNextChar(ch);
1755 }
1756 
GetNextChar(uint8_t & ch)1757 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
1758   FX_FILESIZE pos = m_Pos + m_HeaderOffset;
1759   if (pos >= m_FileLen) {
1760     return FALSE;
1761   }
1762   if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
1763     FX_FILESIZE read_pos = pos;
1764     FX_DWORD read_size = m_BufSize;
1765     if ((FX_FILESIZE)read_size > m_FileLen) {
1766       read_size = (FX_DWORD)m_FileLen;
1767     }
1768     if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
1769       if (m_FileLen < (FX_FILESIZE)read_size) {
1770         read_pos = 0;
1771         read_size = (FX_DWORD)m_FileLen;
1772       } else {
1773         read_pos = m_FileLen - read_size;
1774       }
1775     }
1776     if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) {
1777       return FALSE;
1778     }
1779     m_BufOffset = read_pos;
1780   }
1781   ch = m_pFileBuf[pos - m_BufOffset];
1782   m_Pos++;
1783   return TRUE;
1784 }
GetCharAtBackward(FX_FILESIZE pos,uint8_t & ch)1785 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {
1786   pos += m_HeaderOffset;
1787   if (pos >= m_FileLen) {
1788     return FALSE;
1789   }
1790   if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
1791     FX_FILESIZE read_pos;
1792     if (pos < (FX_FILESIZE)m_BufSize) {
1793       read_pos = 0;
1794     } else {
1795       read_pos = pos - m_BufSize + 1;
1796     }
1797     FX_DWORD read_size = m_BufSize;
1798     if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
1799       if (m_FileLen < (FX_FILESIZE)read_size) {
1800         read_pos = 0;
1801         read_size = (FX_DWORD)m_FileLen;
1802       } else {
1803         read_pos = m_FileLen - read_size;
1804       }
1805     }
1806     if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) {
1807       return FALSE;
1808     }
1809     m_BufOffset = read_pos;
1810   }
1811   ch = m_pFileBuf[pos - m_BufOffset];
1812   return TRUE;
1813 }
ReadBlock(uint8_t * pBuf,FX_DWORD size)1814 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) {
1815   if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) {
1816     return FALSE;
1817   }
1818   m_Pos += size;
1819   return TRUE;
1820 }
1821 
GetNextWordInternal(bool * bIsNumber)1822 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
1823   m_WordSize = 0;
1824   if (bIsNumber)
1825     *bIsNumber = true;
1826   uint8_t ch;
1827   if (!GetNextChar(ch)) {
1828     return;
1829   }
1830   while (1) {
1831     while (PDFCharIsWhitespace(ch)) {
1832       if (!GetNextChar(ch))
1833         return;
1834     }
1835     if (ch != '%')
1836       break;
1837 
1838     while (1) {
1839       if (!GetNextChar(ch))
1840         return;
1841       if (PDFCharIsLineEnding(ch))
1842         break;
1843     }
1844   }
1845 
1846   if (PDFCharIsDelimiter(ch)) {
1847     if (bIsNumber)
1848       *bIsNumber = false;
1849     m_WordBuffer[m_WordSize++] = ch;
1850     if (ch == '/') {
1851       while (1) {
1852         if (!GetNextChar(ch))
1853           return;
1854 
1855         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
1856           m_Pos--;
1857           return;
1858         }
1859 
1860         if (m_WordSize < sizeof(m_WordBuffer) - 1)
1861           m_WordBuffer[m_WordSize++] = ch;
1862       }
1863     } else if (ch == '<') {
1864       if (!GetNextChar(ch))
1865         return;
1866       if (ch == '<')
1867         m_WordBuffer[m_WordSize++] = ch;
1868       else
1869         m_Pos--;
1870     } else if (ch == '>') {
1871       if (!GetNextChar(ch))
1872         return;
1873       if (ch == '>')
1874         m_WordBuffer[m_WordSize++] = ch;
1875       else
1876         m_Pos--;
1877     }
1878     return;
1879   }
1880 
1881   while (1) {
1882     if (m_WordSize < sizeof(m_WordBuffer) - 1)
1883       m_WordBuffer[m_WordSize++] = ch;
1884 
1885     if (!PDFCharIsNumeric(ch))
1886       if (bIsNumber)
1887         *bIsNumber = false;
1888     if (!GetNextChar(ch))
1889       return;
1890 
1891     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
1892       m_Pos--;
1893       break;
1894     }
1895   }
1896 }
1897 
ReadString()1898 CFX_ByteString CPDF_SyntaxParser::ReadString() {
1899   uint8_t ch;
1900   if (!GetNextChar(ch)) {
1901     return CFX_ByteString();
1902   }
1903   CFX_ByteTextBuf buf;
1904   int32_t parlevel = 0;
1905   int32_t status = 0, iEscCode = 0;
1906   while (1) {
1907     switch (status) {
1908       case 0:
1909         if (ch == ')') {
1910           if (parlevel == 0) {
1911             return buf.GetByteString();
1912           }
1913           parlevel--;
1914           buf.AppendChar(')');
1915         } else if (ch == '(') {
1916           parlevel++;
1917           buf.AppendChar('(');
1918         } else if (ch == '\\') {
1919           status = 1;
1920         } else {
1921           buf.AppendChar(ch);
1922         }
1923         break;
1924       case 1:
1925         if (ch >= '0' && ch <= '7') {
1926           iEscCode = FXSYS_toDecimalDigit(ch);
1927           status = 2;
1928           break;
1929         }
1930         if (ch == 'n') {
1931           buf.AppendChar('\n');
1932         } else if (ch == 'r') {
1933           buf.AppendChar('\r');
1934         } else if (ch == 't') {
1935           buf.AppendChar('\t');
1936         } else if (ch == 'b') {
1937           buf.AppendChar('\b');
1938         } else if (ch == 'f') {
1939           buf.AppendChar('\f');
1940         } else if (ch == '\r') {
1941           status = 4;
1942           break;
1943         } else if (ch == '\n') {
1944         } else {
1945           buf.AppendChar(ch);
1946         }
1947         status = 0;
1948         break;
1949       case 2:
1950         if (ch >= '0' && ch <= '7') {
1951           iEscCode = iEscCode * 8 + FXSYS_toDecimalDigit(ch);
1952           status = 3;
1953         } else {
1954           buf.AppendChar(iEscCode);
1955           status = 0;
1956           continue;
1957         }
1958         break;
1959       case 3:
1960         if (ch >= '0' && ch <= '7') {
1961           iEscCode = iEscCode * 8 + FXSYS_toDecimalDigit(ch);
1962           buf.AppendChar(iEscCode);
1963           status = 0;
1964         } else {
1965           buf.AppendChar(iEscCode);
1966           status = 0;
1967           continue;
1968         }
1969         break;
1970       case 4:
1971         status = 0;
1972         if (ch != '\n') {
1973           continue;
1974         }
1975         break;
1976     }
1977     if (!GetNextChar(ch)) {
1978       break;
1979     }
1980   }
1981   GetNextChar(ch);
1982   return buf.GetByteString();
1983 }
ReadHexString()1984 CFX_ByteString CPDF_SyntaxParser::ReadHexString() {
1985   uint8_t ch;
1986   if (!GetNextChar(ch))
1987     return CFX_ByteString();
1988 
1989   CFX_BinaryBuf buf;
1990   bool bFirst = true;
1991   uint8_t code = 0;
1992   while (1) {
1993     if (ch == '>')
1994       break;
1995 
1996     if (std::isxdigit(ch)) {
1997       int val = FXSYS_toHexDigit(ch);
1998       if (bFirst) {
1999         code = val * 16;
2000       } else {
2001         code += val;
2002         buf.AppendByte((uint8_t)code);
2003       }
2004       bFirst = !bFirst;
2005     }
2006 
2007     if (!GetNextChar(ch))
2008       break;
2009   }
2010   if (!bFirst)
2011     buf.AppendByte((uint8_t)code);
2012 
2013   return buf.GetByteString();
2014 }
ToNextLine()2015 void CPDF_SyntaxParser::ToNextLine() {
2016   uint8_t ch;
2017   while (GetNextChar(ch)) {
2018     if (ch == '\n') {
2019       break;
2020     }
2021     if (ch == '\r') {
2022       GetNextChar(ch);
2023       if (ch != '\n') {
2024         --m_Pos;
2025       }
2026       break;
2027     }
2028   }
2029 }
ToNextWord()2030 void CPDF_SyntaxParser::ToNextWord() {
2031   uint8_t ch;
2032   if (!GetNextChar(ch))
2033     return;
2034 
2035   while (1) {
2036     while (PDFCharIsWhitespace(ch)) {
2037       m_dwWordPos = m_Pos;
2038       if (!GetNextChar(ch))
2039         return;
2040     }
2041 
2042     if (ch != '%')
2043       break;
2044 
2045     while (1) {
2046       if (!GetNextChar(ch))
2047         return;
2048       if (PDFCharIsLineEnding(ch))
2049         break;
2050     }
2051   }
2052   m_Pos--;
2053 }
2054 
GetNextWord(bool * bIsNumber)2055 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
2056   GetNextWordInternal(bIsNumber);
2057   return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);
2058 }
2059 
GetKeyword()2060 CFX_ByteString CPDF_SyntaxParser::GetKeyword() {
2061   return GetNextWord(nullptr);
2062 }
2063 
GetObject(CPDF_IndirectObjectHolder * pObjList,FX_DWORD objnum,FX_DWORD gennum,PARSE_CONTEXT * pContext,FX_BOOL bDecrypt)2064 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList,
2065                                           FX_DWORD objnum,
2066                                           FX_DWORD gennum,
2067                                           PARSE_CONTEXT* pContext,
2068                                           FX_BOOL bDecrypt) {
2069   CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
2070   if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) {
2071     return NULL;
2072   }
2073   FX_FILESIZE SavedPos = m_Pos;
2074   FX_BOOL bTypeOnly = pContext && (pContext->m_Flags & PDFPARSE_TYPEONLY);
2075   bool bIsNumber;
2076   CFX_ByteString word = GetNextWord(&bIsNumber);
2077   if (word.GetLength() == 0) {
2078     if (bTypeOnly)
2079       return (CPDF_Object*)PDFOBJ_INVALID;
2080     return NULL;
2081   }
2082   if (bIsNumber) {
2083     FX_FILESIZE SavedPos = m_Pos;
2084     CFX_ByteString nextword = GetNextWord(&bIsNumber);
2085     if (bIsNumber) {
2086       CFX_ByteString nextword2 = GetNextWord(nullptr);
2087       if (nextword2 == "R") {
2088         FX_DWORD objnum = FXSYS_atoi(word);
2089         if (bTypeOnly)
2090           return (CPDF_Object*)PDFOBJ_REFERENCE;
2091         return new CPDF_Reference(pObjList, objnum);
2092       }
2093     }
2094     m_Pos = SavedPos;
2095     if (bTypeOnly)
2096       return (CPDF_Object*)PDFOBJ_NUMBER;
2097     return new CPDF_Number(word);
2098   }
2099   if (word == "true" || word == "false") {
2100     if (bTypeOnly)
2101       return (CPDF_Object*)PDFOBJ_BOOLEAN;
2102     return new CPDF_Boolean(word == "true");
2103   }
2104   if (word == "null") {
2105     if (bTypeOnly)
2106       return (CPDF_Object*)PDFOBJ_NULL;
2107     return new CPDF_Null;
2108   }
2109   if (word == "(") {
2110     if (bTypeOnly)
2111       return (CPDF_Object*)PDFOBJ_STRING;
2112     CFX_ByteString str = ReadString();
2113     if (m_pCryptoHandler && bDecrypt) {
2114       m_pCryptoHandler->Decrypt(objnum, gennum, str);
2115     }
2116     return new CPDF_String(str, FALSE);
2117   }
2118   if (word == "<") {
2119     if (bTypeOnly)
2120       return (CPDF_Object*)PDFOBJ_STRING;
2121     CFX_ByteString str = ReadHexString();
2122     if (m_pCryptoHandler && bDecrypt) {
2123       m_pCryptoHandler->Decrypt(objnum, gennum, str);
2124     }
2125     return new CPDF_String(str, TRUE);
2126   }
2127   if (word == "[") {
2128     if (bTypeOnly)
2129       return (CPDF_Object*)PDFOBJ_ARRAY;
2130     CPDF_Array* pArray = new CPDF_Array;
2131     while (CPDF_Object* pObj =
2132                GetObject(pObjList, objnum, gennum, nullptr, true)) {
2133       pArray->Add(pObj);
2134     }
2135     return pArray;
2136   }
2137   if (word[0] == '/') {
2138     if (bTypeOnly)
2139       return (CPDF_Object*)PDFOBJ_NAME;
2140     return new CPDF_Name(
2141         PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
2142   }
2143   if (word == "<<") {
2144     if (bTypeOnly)
2145       return (CPDF_Object*)PDFOBJ_DICTIONARY;
2146 
2147     if (pContext)
2148       pContext->m_DictStart = SavedPos;
2149 
2150     int32_t nKeys = 0;
2151     FX_FILESIZE dwSignValuePos = 0;
2152     std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
2153         new CPDF_Dictionary);
2154     while (1) {
2155       CFX_ByteString key = GetNextWord(nullptr);
2156       if (key.IsEmpty())
2157         return nullptr;
2158 
2159       FX_FILESIZE SavedPos = m_Pos - key.GetLength();
2160       if (key == ">>")
2161         break;
2162 
2163       if (key == "endobj") {
2164         m_Pos = SavedPos;
2165         break;
2166       }
2167       if (key[0] != '/')
2168         continue;
2169 
2170       ++nKeys;
2171       key = PDF_NameDecode(key);
2172       if (key.IsEmpty())
2173         continue;
2174 
2175       if (key == "/Contents")
2176         dwSignValuePos = m_Pos;
2177 
2178       CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, nullptr, true);
2179       if (!pObj)
2180         continue;
2181 
2182       CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1);
2183       pDict->SetAt(keyNoSlash, pObj);
2184     }
2185 
2186     // Only when this is a signature dictionary and has contents, we reset the
2187     // contents to the un-decrypted form.
2188     if (IsSignatureDict(pDict.get()) && dwSignValuePos) {
2189       CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
2190       m_Pos = dwSignValuePos;
2191       CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, nullptr, FALSE);
2192       pDict->SetAt("Contents", pObj);
2193     }
2194     if (pContext) {
2195       pContext->m_DictEnd = m_Pos;
2196       if (pContext->m_Flags & PDFPARSE_NOSTREAM) {
2197         return pDict.release();
2198       }
2199     }
2200     FX_FILESIZE SavedPos = m_Pos;
2201     CFX_ByteString nextword = GetNextWord(nullptr);
2202     if (nextword != "stream") {
2203       m_Pos = SavedPos;
2204       return pDict.release();
2205     }
2206 
2207     return ReadStream(pDict.release(), pContext, objnum, gennum);
2208   }
2209   if (word == ">>") {
2210     m_Pos = SavedPos;
2211     return nullptr;
2212   }
2213   if (bTypeOnly)
2214     return (CPDF_Object*)PDFOBJ_INVALID;
2215 
2216   return nullptr;
2217 }
2218 
GetObjectByStrict(CPDF_IndirectObjectHolder * pObjList,FX_DWORD objnum,FX_DWORD gennum,PARSE_CONTEXT * pContext)2219 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(
2220     CPDF_IndirectObjectHolder* pObjList,
2221     FX_DWORD objnum,
2222     FX_DWORD gennum,
2223     PARSE_CONTEXT* pContext) {
2224   CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
2225   if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) {
2226     return NULL;
2227   }
2228   FX_FILESIZE SavedPos = m_Pos;
2229   FX_BOOL bTypeOnly = pContext && (pContext->m_Flags & PDFPARSE_TYPEONLY);
2230   bool bIsNumber;
2231   CFX_ByteString word = GetNextWord(&bIsNumber);
2232   if (word.GetLength() == 0) {
2233     if (bTypeOnly)
2234       return (CPDF_Object*)PDFOBJ_INVALID;
2235     return nullptr;
2236   }
2237   if (bIsNumber) {
2238     FX_FILESIZE SavedPos = m_Pos;
2239     CFX_ByteString nextword = GetNextWord(&bIsNumber);
2240     if (bIsNumber) {
2241       CFX_ByteString nextword2 = GetNextWord(nullptr);
2242       if (nextword2 == "R") {
2243         if (bTypeOnly)
2244           return (CPDF_Object*)PDFOBJ_REFERENCE;
2245         FX_DWORD objnum = FXSYS_atoi(word);
2246         return new CPDF_Reference(pObjList, objnum);
2247       }
2248     }
2249     m_Pos = SavedPos;
2250     if (bTypeOnly)
2251       return (CPDF_Object*)PDFOBJ_NUMBER;
2252     return new CPDF_Number(word);
2253   }
2254   if (word == "true" || word == "false") {
2255     if (bTypeOnly)
2256       return (CPDF_Object*)PDFOBJ_BOOLEAN;
2257     return new CPDF_Boolean(word == "true");
2258   }
2259   if (word == "null") {
2260     if (bTypeOnly)
2261       return (CPDF_Object*)PDFOBJ_NULL;
2262     return new CPDF_Null;
2263   }
2264   if (word == "(") {
2265     if (bTypeOnly)
2266       return (CPDF_Object*)PDFOBJ_STRING;
2267     CFX_ByteString str = ReadString();
2268     if (m_pCryptoHandler)
2269       m_pCryptoHandler->Decrypt(objnum, gennum, str);
2270     return new CPDF_String(str, FALSE);
2271   }
2272   if (word == "<") {
2273     if (bTypeOnly)
2274       return (CPDF_Object*)PDFOBJ_STRING;
2275     CFX_ByteString str = ReadHexString();
2276     if (m_pCryptoHandler)
2277       m_pCryptoHandler->Decrypt(objnum, gennum, str);
2278     return new CPDF_String(str, TRUE);
2279   }
2280   if (word == "[") {
2281     if (bTypeOnly)
2282       return (CPDF_Object*)PDFOBJ_ARRAY;
2283     std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray(
2284         new CPDF_Array);
2285     while (CPDF_Object* pObj =
2286                GetObject(pObjList, objnum, gennum, nullptr, true)) {
2287       pArray->Add(pObj);
2288     }
2289     return m_WordBuffer[0] == ']' ? pArray.release() : nullptr;
2290   }
2291   if (word[0] == '/') {
2292     if (bTypeOnly)
2293       return (CPDF_Object*)PDFOBJ_NAME;
2294     return new CPDF_Name(
2295         PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
2296   }
2297   if (word == "<<") {
2298     if (bTypeOnly)
2299       return (CPDF_Object*)PDFOBJ_DICTIONARY;
2300     if (pContext)
2301       pContext->m_DictStart = SavedPos;
2302 
2303     std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
2304         new CPDF_Dictionary);
2305     while (1) {
2306       FX_FILESIZE SavedPos = m_Pos;
2307       CFX_ByteString key = GetNextWord(nullptr);
2308       if (key.IsEmpty())
2309         return nullptr;
2310 
2311       if (key == ">>")
2312         break;
2313 
2314       if (key == "endobj") {
2315         m_Pos = SavedPos;
2316         break;
2317       }
2318       if (key[0] != '/')
2319         continue;
2320 
2321       key = PDF_NameDecode(key);
2322       std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj(
2323           GetObject(pObjList, objnum, gennum, nullptr, true));
2324       if (!obj) {
2325         uint8_t ch;
2326         while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {
2327         }
2328         return nullptr;
2329       }
2330       if (key.GetLength() > 1) {
2331         pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1),
2332                      obj.release());
2333       }
2334     }
2335     if (pContext) {
2336       pContext->m_DictEnd = m_Pos;
2337       if (pContext->m_Flags & PDFPARSE_NOSTREAM) {
2338         return pDict.release();
2339       }
2340     }
2341     FX_FILESIZE SavedPos = m_Pos;
2342     CFX_ByteString nextword = GetNextWord(nullptr);
2343     if (nextword != "stream") {
2344       m_Pos = SavedPos;
2345       return pDict.release();
2346     }
2347 
2348     return ReadStream(pDict.release(), pContext, objnum, gennum);
2349   }
2350   if (word == ">>") {
2351     m_Pos = SavedPos;
2352     return nullptr;
2353   }
2354   if (bTypeOnly)
2355     return (CPDF_Object*)PDFOBJ_INVALID;
2356   return nullptr;
2357 }
2358 
ReadEOLMarkers(FX_FILESIZE pos)2359 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
2360   unsigned char byte1 = 0;
2361   unsigned char byte2 = 0;
2362   GetCharAt(pos, byte1);
2363   GetCharAt(pos + 1, byte2);
2364   unsigned int markers = 0;
2365   if (byte1 == '\r' && byte2 == '\n') {
2366     markers = 2;
2367   } else if (byte1 == '\r' || byte1 == '\n') {
2368     markers = 1;
2369   }
2370   return markers;
2371 }
ReadStream(CPDF_Dictionary * pDict,PARSE_CONTEXT * pContext,FX_DWORD objnum,FX_DWORD gennum)2372 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
2373                                            PARSE_CONTEXT* pContext,
2374                                            FX_DWORD objnum,
2375                                            FX_DWORD gennum) {
2376   CPDF_Object* pLenObj = pDict->GetElement("Length");
2377   FX_FILESIZE len = -1;
2378   CPDF_Reference* pLenObjRef = ToReference(pLenObj);
2379 
2380   bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() &&
2381                                          pLenObjRef->GetRefObjNum() != objnum);
2382   if (pLenObj && differingObjNum)
2383     len = pLenObj->GetInteger();
2384 
2385   // Locate the start of stream.
2386   ToNextLine();
2387   FX_FILESIZE streamStartPos = m_Pos;
2388   if (pContext) {
2389     pContext->m_DataStart = streamStartPos;
2390   }
2391 
2392   const CFX_ByteStringC kEndStreamStr("endstream");
2393   const CFX_ByteStringC kEndObjStr("endobj");
2394   CPDF_CryptoHandler* pCryptoHandler =
2395       objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();
2396   if (!pCryptoHandler) {
2397     FX_BOOL bSearchForKeyword = TRUE;
2398     if (len >= 0) {
2399       pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
2400       pos += len;
2401       if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) {
2402         m_Pos = pos.ValueOrDie();
2403       }
2404       m_Pos += ReadEOLMarkers(m_Pos);
2405       FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
2406       GetNextWordInternal(nullptr);
2407       // Earlier version of PDF specification doesn't require EOL marker before
2408       // 'endstream' keyword. If keyword 'endstream' follows the bytes in
2409       // specified length, it signals the end of stream.
2410       if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(),
2411                        kEndStreamStr.GetLength()) == 0) {
2412         bSearchForKeyword = FALSE;
2413       }
2414     }
2415     if (bSearchForKeyword) {
2416       // If len is not available, len needs to be calculated
2417       // by searching the keywords "endstream" or "endobj".
2418       m_Pos = streamStartPos;
2419       FX_FILESIZE endStreamOffset = 0;
2420       while (endStreamOffset >= 0) {
2421         endStreamOffset = FindTag(kEndStreamStr, 0);
2422         if (endStreamOffset < 0) {
2423           // Can't find any "endstream".
2424           break;
2425         }
2426         if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,
2427                         kEndStreamStr, TRUE)) {
2428           // Stop searching when the keyword "endstream" is found.
2429           endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();
2430           break;
2431         }
2432       }
2433       m_Pos = streamStartPos;
2434       FX_FILESIZE endObjOffset = 0;
2435       while (endObjOffset >= 0) {
2436         endObjOffset = FindTag(kEndObjStr, 0);
2437         if (endObjOffset < 0) {
2438           // Can't find any "endobj".
2439           break;
2440         }
2441         if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,
2442                         TRUE)) {
2443           // Stop searching when the keyword "endobj" is found.
2444           endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();
2445           break;
2446         }
2447       }
2448       if (endStreamOffset < 0 && endObjOffset < 0) {
2449         // Can't find "endstream" or "endobj".
2450         pDict->Release();
2451         return nullptr;
2452       }
2453       if (endStreamOffset < 0 && endObjOffset >= 0) {
2454         // Correct the position of end stream.
2455         endStreamOffset = endObjOffset;
2456       } else if (endStreamOffset >= 0 && endObjOffset < 0) {
2457         // Correct the position of end obj.
2458         endObjOffset = endStreamOffset;
2459       } else if (endStreamOffset > endObjOffset) {
2460         endStreamOffset = endObjOffset;
2461       }
2462       len = endStreamOffset;
2463       int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
2464       if (numMarkers == 2) {
2465         len -= 2;
2466       } else {
2467         numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
2468         if (numMarkers == 1) {
2469           len -= 1;
2470         }
2471       }
2472       if (len < 0) {
2473         pDict->Release();
2474         return nullptr;
2475       }
2476       pDict->SetAtInteger("Length", len);
2477     }
2478     m_Pos = streamStartPos;
2479   }
2480   if (len < 0) {
2481     pDict->Release();
2482     return nullptr;
2483   }
2484   uint8_t* pData = nullptr;
2485   if (len > 0) {
2486     pData = FX_Alloc(uint8_t, len);
2487     ReadBlock(pData, len);
2488     if (pCryptoHandler) {
2489       CFX_BinaryBuf dest_buf;
2490       dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));
2491       void* context = pCryptoHandler->DecryptStart(objnum, gennum);
2492       pCryptoHandler->DecryptStream(context, pData, len, dest_buf);
2493       pCryptoHandler->DecryptFinish(context, dest_buf);
2494       FX_Free(pData);
2495       pData = dest_buf.GetBuffer();
2496       len = dest_buf.GetSize();
2497       dest_buf.DetachBuffer();
2498     }
2499   }
2500   CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);
2501   if (pContext) {
2502     pContext->m_DataEnd = pContext->m_DataStart + len;
2503   }
2504   streamStartPos = m_Pos;
2505   FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
2506   GetNextWordInternal(nullptr);
2507   int numMarkers = ReadEOLMarkers(m_Pos);
2508   if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 &&
2509       FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) ==
2510           0) {
2511     m_Pos = streamStartPos;
2512   }
2513   return pStream;
2514 }
InitParser(IFX_FileRead * pFileAccess,FX_DWORD HeaderOffset)2515 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,
2516                                    FX_DWORD HeaderOffset) {
2517   FX_Free(m_pFileBuf);
2518   m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);
2519   m_HeaderOffset = HeaderOffset;
2520   m_FileLen = pFileAccess->GetSize();
2521   m_Pos = 0;
2522   m_pFileAccess = pFileAccess;
2523   m_BufOffset = 0;
2524   pFileAccess->ReadBlock(
2525       m_pFileBuf, 0,
2526       (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));
2527 }
GetDirectNum()2528 int32_t CPDF_SyntaxParser::GetDirectNum() {
2529   bool bIsNumber;
2530   GetNextWordInternal(&bIsNumber);
2531   if (!bIsNumber)
2532     return 0;
2533 
2534   m_WordBuffer[m_WordSize] = 0;
2535   return FXSYS_atoi(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));
2536 }
2537 
IsWholeWord(FX_FILESIZE startpos,FX_FILESIZE limit,const CFX_ByteStringC & tag,FX_BOOL checkKeyword)2538 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
2539                                     FX_FILESIZE limit,
2540                                     const CFX_ByteStringC& tag,
2541                                     FX_BOOL checkKeyword) {
2542   const FX_DWORD taglen = tag.GetLength();
2543   bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
2544   bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
2545                      !PDFCharIsWhitespace(tag[taglen - 1]);
2546   uint8_t ch;
2547   if (bCheckRight && startpos + (int32_t)taglen <= limit &&
2548       GetCharAt(startpos + (int32_t)taglen, ch)) {
2549     if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
2550         (checkKeyword && PDFCharIsDelimiter(ch))) {
2551       return false;
2552     }
2553   }
2554 
2555   if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
2556     if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
2557         (checkKeyword && PDFCharIsDelimiter(ch))) {
2558       return false;
2559     }
2560   }
2561   return true;
2562 }
2563 
SearchWord(const CFX_ByteStringC & tag,FX_BOOL bWholeWord,FX_BOOL bForward,FX_FILESIZE limit)2564 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
2565                                       FX_BOOL bWholeWord,
2566                                       FX_BOOL bForward,
2567                                       FX_FILESIZE limit) {
2568   int32_t taglen = tag.GetLength();
2569   if (taglen == 0) {
2570     return FALSE;
2571   }
2572   FX_FILESIZE pos = m_Pos;
2573   int32_t offset = 0;
2574   if (!bForward) {
2575     offset = taglen - 1;
2576   }
2577   const uint8_t* tag_data = tag.GetPtr();
2578   uint8_t byte;
2579   while (1) {
2580     if (bForward) {
2581       if (limit) {
2582         if (pos >= m_Pos + limit) {
2583           return FALSE;
2584         }
2585       }
2586       if (!GetCharAt(pos, byte)) {
2587         return FALSE;
2588       }
2589     } else {
2590       if (limit) {
2591         if (pos <= m_Pos - limit) {
2592           return FALSE;
2593         }
2594       }
2595       if (!GetCharAtBackward(pos, byte)) {
2596         return FALSE;
2597       }
2598     }
2599     if (byte == tag_data[offset]) {
2600       if (bForward) {
2601         offset++;
2602         if (offset < taglen) {
2603           pos++;
2604           continue;
2605         }
2606       } else {
2607         offset--;
2608         if (offset >= 0) {
2609           pos--;
2610           continue;
2611         }
2612       }
2613       FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
2614       if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) {
2615         m_Pos = startpos;
2616         return TRUE;
2617       }
2618     }
2619     if (bForward) {
2620       offset = byte == tag_data[0] ? 1 : 0;
2621       pos++;
2622     } else {
2623       offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
2624       pos--;
2625     }
2626     if (pos < 0) {
2627       return FALSE;
2628     }
2629   }
2630   return FALSE;
2631 }
2632 
SearchMultiWord(const CFX_ByteStringC & tags,FX_BOOL bWholeWord,FX_FILESIZE limit)2633 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,
2634                                            FX_BOOL bWholeWord,
2635                                            FX_FILESIZE limit) {
2636   int32_t ntags = 1;
2637   for (int i = 0; i < tags.GetLength(); ++i) {
2638     if (tags[i] == 0) {
2639       ++ntags;
2640     }
2641   }
2642 
2643   std::vector<SearchTagRecord> patterns(ntags);
2644   FX_DWORD start = 0;
2645   FX_DWORD itag = 0;
2646   FX_DWORD max_len = 0;
2647   for (int i = 0; i <= tags.GetLength(); ++i) {
2648     if (tags[i] == 0) {
2649       FX_DWORD len = i - start;
2650       max_len = std::max(len, max_len);
2651       patterns[itag].m_pTag = tags.GetCStr() + start;
2652       patterns[itag].m_Len = len;
2653       patterns[itag].m_Offset = 0;
2654       start = i + 1;
2655       ++itag;
2656     }
2657   }
2658 
2659   const FX_FILESIZE pos_limit = m_Pos + limit;
2660   for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) {
2661     uint8_t byte;
2662     if (!GetCharAt(pos, byte))
2663       break;
2664 
2665     for (int i = 0; i < ntags; ++i) {
2666       SearchTagRecord& pat = patterns[i];
2667       if (pat.m_pTag[pat.m_Offset] != byte) {
2668         pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
2669         continue;
2670       }
2671 
2672       ++pat.m_Offset;
2673       if (pat.m_Offset != pat.m_Len)
2674         continue;
2675 
2676       if (!bWholeWord ||
2677           IsWholeWord(pos - pat.m_Len, limit,
2678                       CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) {
2679         return i;
2680       }
2681 
2682       pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
2683     }
2684   }
2685   return -1;
2686 }
2687 
FindTag(const CFX_ByteStringC & tag,FX_FILESIZE limit)2688 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,
2689                                        FX_FILESIZE limit) {
2690   int32_t taglen = tag.GetLength();
2691   int32_t match = 0;
2692   limit += m_Pos;
2693   FX_FILESIZE startpos = m_Pos;
2694   while (1) {
2695     uint8_t ch;
2696     if (!GetNextChar(ch)) {
2697       return -1;
2698     }
2699     if (ch == tag[match]) {
2700       match++;
2701       if (match == taglen) {
2702         return m_Pos - startpos - taglen;
2703       }
2704     } else {
2705       match = ch == tag[0] ? 1 : 0;
2706     }
2707     if (limit && m_Pos == limit) {
2708       return -1;
2709     }
2710   }
2711   return -1;
2712 }
GetBinary(uint8_t * buffer,FX_DWORD size)2713 void CPDF_SyntaxParser::GetBinary(uint8_t* buffer, FX_DWORD size) {
2714   FX_DWORD offset = 0;
2715   uint8_t ch;
2716   while (1) {
2717     if (!GetNextChar(ch)) {
2718       return;
2719     }
2720     buffer[offset++] = ch;
2721     if (offset == size) {
2722       break;
2723     }
2724   }
2725 }
2726 
2727 class CPDF_DataAvail final : public IPDF_DataAvail {
2728  public:
2729   CPDF_DataAvail(IFX_FileAvail* pFileAvail,
2730                  IFX_FileRead* pFileRead,
2731                  FX_BOOL bSupportHintTable);
2732   ~CPDF_DataAvail() override;
2733 
2734   // IPDF_DataAvail:
2735   DocAvailStatus IsDocAvail(IFX_DownloadHints* pHints) override;
2736   void SetDocument(CPDF_Document* pDoc) override;
2737   DocAvailStatus IsPageAvail(int iPage, IFX_DownloadHints* pHints) override;
2738   DocFormStatus IsFormAvail(IFX_DownloadHints* pHints) override;
2739   DocLinearizationStatus IsLinearizedPDF() override;
IsLinearized()2740   FX_BOOL IsLinearized() override { return m_bLinearized; }
2741   void GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, FX_DWORD* pSize) override;
2742 
2743   int GetPageCount() const;
2744   CPDF_Dictionary* GetPage(int index);
2745 
2746   friend class CPDF_HintTables;
2747 
2748  protected:
2749   static const int kMaxDataAvailRecursionDepth = 64;
2750   static int s_CurrentDataAvailRecursionDepth;
2751   static const int kMaxPageRecursionDepth = 1024;
2752 
2753   FX_DWORD GetObjectSize(FX_DWORD objnum, FX_FILESIZE& offset);
2754   FX_BOOL IsObjectsAvail(CFX_ArrayTemplate<CPDF_Object*>& obj_array,
2755                          FX_BOOL bParsePage,
2756                          IFX_DownloadHints* pHints,
2757                          CFX_ArrayTemplate<CPDF_Object*>& ret_array);
2758   FX_BOOL CheckDocStatus(IFX_DownloadHints* pHints);
2759   FX_BOOL CheckHeader(IFX_DownloadHints* pHints);
2760   FX_BOOL CheckFirstPage(IFX_DownloadHints* pHints);
2761   FX_BOOL CheckHintTables(IFX_DownloadHints* pHints);
2762   FX_BOOL CheckEnd(IFX_DownloadHints* pHints);
2763   FX_BOOL CheckCrossRef(IFX_DownloadHints* pHints);
2764   FX_BOOL CheckCrossRefItem(IFX_DownloadHints* pHints);
2765   FX_BOOL CheckTrailer(IFX_DownloadHints* pHints);
2766   FX_BOOL CheckRoot(IFX_DownloadHints* pHints);
2767   FX_BOOL CheckInfo(IFX_DownloadHints* pHints);
2768   FX_BOOL CheckPages(IFX_DownloadHints* pHints);
2769   FX_BOOL CheckPage(IFX_DownloadHints* pHints);
2770   FX_BOOL CheckResources(IFX_DownloadHints* pHints);
2771   FX_BOOL CheckAnnots(IFX_DownloadHints* pHints);
2772   FX_BOOL CheckAcroForm(IFX_DownloadHints* pHints);
2773   FX_BOOL CheckAcroFormSubObject(IFX_DownloadHints* pHints);
2774   FX_BOOL CheckTrailerAppend(IFX_DownloadHints* pHints);
2775   FX_BOOL CheckPageStatus(IFX_DownloadHints* pHints);
2776   FX_BOOL CheckAllCrossRefStream(IFX_DownloadHints* pHints);
2777 
2778   int32_t CheckCrossRefStream(IFX_DownloadHints* pHints,
2779                               FX_FILESIZE& xref_offset);
2780   FX_BOOL IsLinearizedFile(uint8_t* pData, FX_DWORD dwLen);
2781   void SetStartOffset(FX_FILESIZE dwOffset);
2782   FX_BOOL GetNextToken(CFX_ByteString& token);
2783   FX_BOOL GetNextChar(uint8_t& ch);
2784   CPDF_Object* ParseIndirectObjectAt(
2785       FX_FILESIZE pos,
2786       FX_DWORD objnum,
2787       CPDF_IndirectObjectHolder* pObjList = NULL);
2788   CPDF_Object* GetObject(FX_DWORD objnum,
2789                          IFX_DownloadHints* pHints,
2790                          FX_BOOL* pExistInFile);
2791   FX_BOOL GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages);
2792   FX_BOOL PreparePageItem();
2793   FX_BOOL LoadPages(IFX_DownloadHints* pHints);
2794   FX_BOOL LoadAllXref(IFX_DownloadHints* pHints);
2795   FX_BOOL LoadAllFile(IFX_DownloadHints* pHints);
2796   DocAvailStatus CheckLinearizedData(IFX_DownloadHints* pHints);
2797   FX_BOOL CheckPageAnnots(int iPage, IFX_DownloadHints* pHints);
2798 
2799   DocAvailStatus CheckLinearizedFirstPage(int iPage, IFX_DownloadHints* pHints);
2800   FX_BOOL HaveResourceAncestor(CPDF_Dictionary* pDict);
2801   FX_BOOL CheckPage(int32_t iPage, IFX_DownloadHints* pHints);
2802   FX_BOOL LoadDocPages(IFX_DownloadHints* pHints);
2803   FX_BOOL LoadDocPage(int32_t iPage, IFX_DownloadHints* pHints);
2804   FX_BOOL CheckPageNode(CPDF_PageNode& pageNodes,
2805                         int32_t iPage,
2806                         int32_t& iCount,
2807                         IFX_DownloadHints* pHints,
2808                         int level);
2809   FX_BOOL CheckUnkownPageNode(FX_DWORD dwPageNo,
2810                               CPDF_PageNode* pPageNode,
2811                               IFX_DownloadHints* pHints);
2812   FX_BOOL CheckArrayPageNode(FX_DWORD dwPageNo,
2813                              CPDF_PageNode* pPageNode,
2814                              IFX_DownloadHints* pHints);
2815   FX_BOOL CheckPageCount(IFX_DownloadHints* pHints);
2816   bool IsFirstCheck(int iPage);
2817   void ResetFirstCheck(int iPage);
2818   FX_BOOL IsDataAvail(FX_FILESIZE offset,
2819                       FX_DWORD size,
2820                       IFX_DownloadHints* pHints);
2821 
2822   CPDF_Parser m_parser;
2823 
2824   CPDF_SyntaxParser m_syntaxParser;
2825 
2826   CPDF_Object* m_pRoot;
2827 
2828   FX_DWORD m_dwRootObjNum;
2829 
2830   FX_DWORD m_dwInfoObjNum;
2831 
2832   CPDF_Object* m_pLinearized;
2833 
2834   CPDF_Object* m_pTrailer;
2835 
2836   FX_BOOL m_bDocAvail;
2837 
2838   FX_FILESIZE m_dwHeaderOffset;
2839 
2840   FX_FILESIZE m_dwLastXRefOffset;
2841 
2842   FX_FILESIZE m_dwXRefOffset;
2843 
2844   FX_FILESIZE m_dwTrailerOffset;
2845 
2846   FX_FILESIZE m_dwCurrentOffset;
2847 
2848   PDF_DATAAVAIL_STATUS m_docStatus;
2849 
2850   FX_FILESIZE m_dwFileLen;
2851 
2852   CPDF_Document* m_pDocument;
2853 
2854   std::set<FX_DWORD> m_ObjectSet;
2855 
2856   CFX_ArrayTemplate<CPDF_Object*> m_objs_array;
2857 
2858   FX_FILESIZE m_Pos;
2859 
2860   FX_FILESIZE m_bufferOffset;
2861 
2862   FX_DWORD m_bufferSize;
2863 
2864   CFX_ByteString m_WordBuf;
2865 
2866   uint8_t m_bufferData[512];
2867 
2868   CFX_FileSizeArray m_CrossOffset;
2869 
2870   CFX_DWordArray m_XRefStreamList;
2871 
2872   CFX_DWordArray m_PageObjList;
2873 
2874   FX_DWORD m_PagesObjNum;
2875 
2876   FX_BOOL m_bLinearized;
2877 
2878   FX_DWORD m_dwFirstPageNo;
2879 
2880   FX_BOOL m_bLinearedDataOK;
2881 
2882   FX_BOOL m_bMainXRefLoadTried;
2883 
2884   FX_BOOL m_bMainXRefLoadedOK;
2885 
2886   FX_BOOL m_bPagesTreeLoad;
2887 
2888   FX_BOOL m_bPagesLoad;
2889 
2890   CPDF_Parser* m_pCurrentParser;
2891 
2892   FX_FILESIZE m_dwCurrentXRefSteam;
2893 
2894   FX_BOOL m_bAnnotsLoad;
2895 
2896   FX_BOOL m_bHaveAcroForm;
2897 
2898   FX_DWORD m_dwAcroFormObjNum;
2899 
2900   FX_BOOL m_bAcroFormLoad;
2901 
2902   CPDF_Object* m_pAcroForm;
2903 
2904   CFX_ArrayTemplate<CPDF_Object*> m_arrayAcroforms;
2905 
2906   CPDF_Dictionary* m_pPageDict;
2907 
2908   CPDF_Object* m_pPageResource;
2909 
2910   FX_BOOL m_bNeedDownLoadResource;
2911 
2912   FX_BOOL m_bPageLoadedOK;
2913 
2914   FX_BOOL m_bLinearizedFormParamLoad;
2915 
2916   CFX_ArrayTemplate<CPDF_Object*> m_PagesArray;
2917 
2918   FX_DWORD m_dwEncryptObjNum;
2919 
2920   FX_FILESIZE m_dwPrevXRefOffset;
2921 
2922   FX_BOOL m_bTotalLoadPageTree;
2923 
2924   FX_BOOL m_bCurPageDictLoadOK;
2925 
2926   CPDF_PageNode m_pageNodes;
2927 
2928   std::set<FX_DWORD> m_pageMapCheckState;
2929   std::set<FX_DWORD> m_pagesLoadState;
2930 
2931   std::unique_ptr<CPDF_HintTables> m_pHintTables;
2932   FX_BOOL m_bSupportHintTable;
2933 };
2934 
IPDF_DataAvail(IFX_FileAvail * pFileAvail,IFX_FileRead * pFileRead)2935 IPDF_DataAvail::IPDF_DataAvail(IFX_FileAvail* pFileAvail,
2936                                IFX_FileRead* pFileRead)
2937     : m_pFileAvail(pFileAvail), m_pFileRead(pFileRead) {}
2938 
2939 // static
Create(IFX_FileAvail * pFileAvail,IFX_FileRead * pFileRead)2940 IPDF_DataAvail* IPDF_DataAvail::Create(IFX_FileAvail* pFileAvail,
2941                                        IFX_FileRead* pFileRead) {
2942   return new CPDF_DataAvail(pFileAvail, pFileRead, TRUE);
2943 }
2944 
2945 // static
2946 int CPDF_DataAvail::s_CurrentDataAvailRecursionDepth = 0;
2947 
CPDF_DataAvail(IFX_FileAvail * pFileAvail,IFX_FileRead * pFileRead,FX_BOOL bSupportHintTable)2948 CPDF_DataAvail::CPDF_DataAvail(IFX_FileAvail* pFileAvail,
2949                                IFX_FileRead* pFileRead,
2950                                FX_BOOL bSupportHintTable)
2951     : IPDF_DataAvail(pFileAvail, pFileRead) {
2952   m_Pos = 0;
2953   m_dwFileLen = 0;
2954   if (m_pFileRead) {
2955     m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize();
2956   }
2957   m_dwCurrentOffset = 0;
2958   m_dwXRefOffset = 0;
2959   m_bufferOffset = 0;
2960   m_dwFirstPageNo = 0;
2961   m_bufferSize = 0;
2962   m_PagesObjNum = 0;
2963   m_dwCurrentXRefSteam = 0;
2964   m_dwAcroFormObjNum = 0;
2965   m_dwInfoObjNum = 0;
2966   m_pDocument = 0;
2967   m_dwEncryptObjNum = 0;
2968   m_dwPrevXRefOffset = 0;
2969   m_dwLastXRefOffset = 0;
2970   m_bDocAvail = FALSE;
2971   m_bMainXRefLoadTried = FALSE;
2972   m_bDocAvail = FALSE;
2973   m_bLinearized = FALSE;
2974   m_bPagesLoad = FALSE;
2975   m_bPagesTreeLoad = FALSE;
2976   m_bMainXRefLoadedOK = FALSE;
2977   m_bAnnotsLoad = FALSE;
2978   m_bHaveAcroForm = FALSE;
2979   m_bAcroFormLoad = FALSE;
2980   m_bPageLoadedOK = FALSE;
2981   m_bNeedDownLoadResource = FALSE;
2982   m_bLinearizedFormParamLoad = FALSE;
2983   m_pLinearized = NULL;
2984   m_pRoot = NULL;
2985   m_pTrailer = NULL;
2986   m_pCurrentParser = NULL;
2987   m_pAcroForm = NULL;
2988   m_pPageDict = NULL;
2989   m_pPageResource = NULL;
2990   m_docStatus = PDF_DATAAVAIL_HEADER;
2991   m_parser.m_bOwnFileRead = FALSE;
2992   m_bTotalLoadPageTree = FALSE;
2993   m_bCurPageDictLoadOK = FALSE;
2994   m_bLinearedDataOK = FALSE;
2995   m_bSupportHintTable = bSupportHintTable;
2996 }
~CPDF_DataAvail()2997 CPDF_DataAvail::~CPDF_DataAvail() {
2998   if (m_pLinearized) {
2999     m_pLinearized->Release();
3000   }
3001   if (m_pRoot) {
3002     m_pRoot->Release();
3003   }
3004   if (m_pTrailer) {
3005     m_pTrailer->Release();
3006   }
3007 
3008   int iSize = m_arrayAcroforms.GetSize();
3009   for (int i = 0; i < iSize; ++i) {
3010     m_arrayAcroforms.GetAt(i)->Release();
3011   }
3012 }
SetDocument(CPDF_Document * pDoc)3013 void CPDF_DataAvail::SetDocument(CPDF_Document* pDoc) {
3014   m_pDocument = pDoc;
3015 }
GetObjectSize(FX_DWORD objnum,FX_FILESIZE & offset)3016 FX_DWORD CPDF_DataAvail::GetObjectSize(FX_DWORD objnum, FX_FILESIZE& offset) {
3017   CPDF_Parser* pParser = (CPDF_Parser*)(m_pDocument->GetParser());
3018   if (!pParser || !pParser->IsValidObjectNumber(objnum))
3019     return 0;
3020 
3021   if (pParser->m_V5Type[objnum] == 2)
3022     objnum = pParser->m_ObjectInfo[objnum].pos;
3023 
3024   if (pParser->m_V5Type[objnum] == 1 || pParser->m_V5Type[objnum] == 255) {
3025     offset = pParser->m_ObjectInfo[objnum].pos;
3026     if (offset == 0) {
3027       return 0;
3028     }
3029     void* pResult = FXSYS_bsearch(&offset, pParser->m_SortedOffset.GetData(),
3030                                   pParser->m_SortedOffset.GetSize(),
3031                                   sizeof(FX_FILESIZE), CompareFileSize);
3032     if (!pResult) {
3033       return 0;
3034     }
3035     if ((FX_FILESIZE*)pResult -
3036             (FX_FILESIZE*)pParser->m_SortedOffset.GetData() ==
3037         pParser->m_SortedOffset.GetSize() - 1) {
3038       return 0;
3039     }
3040     return (FX_DWORD)(((FX_FILESIZE*)pResult)[1] - offset);
3041   }
3042   return 0;
3043 }
IsObjectsAvail(CFX_ArrayTemplate<CPDF_Object * > & obj_array,FX_BOOL bParsePage,IFX_DownloadHints * pHints,CFX_ArrayTemplate<CPDF_Object * > & ret_array)3044 FX_BOOL CPDF_DataAvail::IsObjectsAvail(
3045     CFX_ArrayTemplate<CPDF_Object*>& obj_array,
3046     FX_BOOL bParsePage,
3047     IFX_DownloadHints* pHints,
3048     CFX_ArrayTemplate<CPDF_Object*>& ret_array) {
3049   if (!obj_array.GetSize()) {
3050     return TRUE;
3051   }
3052   FX_DWORD count = 0;
3053   CFX_ArrayTemplate<CPDF_Object*> new_obj_array;
3054   int32_t i = 0;
3055   for (i = 0; i < obj_array.GetSize(); i++) {
3056     CPDF_Object* pObj = obj_array[i];
3057     if (!pObj)
3058       continue;
3059 
3060     int32_t type = pObj->GetType();
3061     switch (type) {
3062       case PDFOBJ_ARRAY: {
3063         CPDF_Array* pArray = pObj->GetArray();
3064         for (FX_DWORD k = 0; k < pArray->GetCount(); k++) {
3065           new_obj_array.Add(pArray->GetElement(k));
3066         }
3067       } break;
3068       case PDFOBJ_STREAM:
3069         pObj = pObj->GetDict();
3070       case PDFOBJ_DICTIONARY: {
3071         CPDF_Dictionary* pDict = pObj->GetDict();
3072         if (pDict && pDict->GetString("Type") == "Page" && !bParsePage) {
3073           continue;
3074         }
3075         for (const auto& it : *pDict) {
3076           const CFX_ByteString& key = it.first;
3077           CPDF_Object* value = it.second;
3078           if (key != "Parent") {
3079             new_obj_array.Add(value);
3080           }
3081         }
3082       } break;
3083       case PDFOBJ_REFERENCE: {
3084         CPDF_Reference* pRef = pObj->AsReference();
3085         FX_DWORD dwNum = pRef->GetRefObjNum();
3086         FX_FILESIZE offset;
3087         FX_DWORD size = GetObjectSize(dwNum, offset);
3088         if (size == 0 || offset < 0 || offset >= m_dwFileLen) {
3089           break;
3090         }
3091         if (!IsDataAvail(offset, size, pHints)) {
3092           ret_array.Add(pObj);
3093           count++;
3094         } else if (!pdfium::ContainsKey(m_ObjectSet, dwNum)) {
3095           m_ObjectSet.insert(dwNum);
3096           CPDF_Object* pReferred =
3097               m_pDocument->GetIndirectObject(pRef->GetRefObjNum(), nullptr);
3098           if (pReferred) {
3099             new_obj_array.Add(pReferred);
3100           }
3101         }
3102       } break;
3103     }
3104   }
3105   if (count > 0) {
3106     int32_t iSize = new_obj_array.GetSize();
3107     for (i = 0; i < iSize; ++i) {
3108       CPDF_Object* pObj = new_obj_array[i];
3109       if (CPDF_Reference* pRef = pObj->AsReference()) {
3110         FX_DWORD dwNum = pRef->GetRefObjNum();
3111         if (!pdfium::ContainsKey(m_ObjectSet, dwNum))
3112           ret_array.Add(pObj);
3113       } else {
3114         ret_array.Add(pObj);
3115       }
3116     }
3117     return FALSE;
3118   }
3119   obj_array.RemoveAll();
3120   obj_array.Append(new_obj_array);
3121   return IsObjectsAvail(obj_array, FALSE, pHints, ret_array);
3122 }
3123 
IsDocAvail(IFX_DownloadHints * pHints)3124 IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
3125     IFX_DownloadHints* pHints) {
3126   if (!m_dwFileLen && m_pFileRead) {
3127     m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize();
3128     if (!m_dwFileLen) {
3129       return DataError;
3130     }
3131   }
3132   while (!m_bDocAvail) {
3133     if (!CheckDocStatus(pHints)) {
3134       return DataNotAvailable;
3135     }
3136   }
3137   return DataAvailable;
3138 }
3139 
CheckAcroFormSubObject(IFX_DownloadHints * pHints)3140 FX_BOOL CPDF_DataAvail::CheckAcroFormSubObject(IFX_DownloadHints* pHints) {
3141   if (!m_objs_array.GetSize()) {
3142     m_objs_array.RemoveAll();
3143     m_ObjectSet.clear();
3144     CFX_ArrayTemplate<CPDF_Object*> obj_array;
3145     obj_array.Append(m_arrayAcroforms);
3146     FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array);
3147     if (bRet) {
3148       m_objs_array.RemoveAll();
3149     }
3150     return bRet;
3151   }
3152   CFX_ArrayTemplate<CPDF_Object*> new_objs_array;
3153   FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
3154   if (bRet) {
3155     int32_t iSize = m_arrayAcroforms.GetSize();
3156     for (int32_t i = 0; i < iSize; ++i) {
3157       m_arrayAcroforms.GetAt(i)->Release();
3158     }
3159     m_arrayAcroforms.RemoveAll();
3160   } else {
3161     m_objs_array.RemoveAll();
3162     m_objs_array.Append(new_objs_array);
3163   }
3164   return bRet;
3165 }
CheckAcroForm(IFX_DownloadHints * pHints)3166 FX_BOOL CPDF_DataAvail::CheckAcroForm(IFX_DownloadHints* pHints) {
3167   FX_BOOL bExist = FALSE;
3168   m_pAcroForm = GetObject(m_dwAcroFormObjNum, pHints, &bExist);
3169   if (!bExist) {
3170     m_docStatus = PDF_DATAAVAIL_PAGETREE;
3171     return TRUE;
3172   }
3173   if (!m_pAcroForm) {
3174     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3175       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3176       return TRUE;
3177     }
3178     return FALSE;
3179   }
3180   m_arrayAcroforms.Add(m_pAcroForm);
3181   m_docStatus = PDF_DATAAVAIL_PAGETREE;
3182   return TRUE;
3183 }
CheckDocStatus(IFX_DownloadHints * pHints)3184 FX_BOOL CPDF_DataAvail::CheckDocStatus(IFX_DownloadHints* pHints) {
3185   switch (m_docStatus) {
3186     case PDF_DATAAVAIL_HEADER:
3187       return CheckHeader(pHints);
3188     case PDF_DATAAVAIL_FIRSTPAGE:
3189     case PDF_DATAAVAIL_FIRSTPAGE_PREPARE:
3190       return CheckFirstPage(pHints);
3191     case PDF_DATAAVAIL_HINTTABLE:
3192       return CheckHintTables(pHints);
3193     case PDF_DATAAVAIL_END:
3194       return CheckEnd(pHints);
3195     case PDF_DATAAVAIL_CROSSREF:
3196       return CheckCrossRef(pHints);
3197     case PDF_DATAAVAIL_CROSSREF_ITEM:
3198       return CheckCrossRefItem(pHints);
3199     case PDF_DATAAVAIL_CROSSREF_STREAM:
3200       return CheckAllCrossRefStream(pHints);
3201     case PDF_DATAAVAIL_TRAILER:
3202       return CheckTrailer(pHints);
3203     case PDF_DATAAVAIL_TRAILER_APPEND:
3204       return CheckTrailerAppend(pHints);
3205     case PDF_DATAAVAIL_LOADALLCROSSREF:
3206       return LoadAllXref(pHints);
3207     case PDF_DATAAVAIL_LOADALLFILE:
3208       return LoadAllFile(pHints);
3209     case PDF_DATAAVAIL_ROOT:
3210       return CheckRoot(pHints);
3211     case PDF_DATAAVAIL_INFO:
3212       return CheckInfo(pHints);
3213     case PDF_DATAAVAIL_ACROFORM:
3214       return CheckAcroForm(pHints);
3215     case PDF_DATAAVAIL_PAGETREE:
3216       if (m_bTotalLoadPageTree) {
3217         return CheckPages(pHints);
3218       }
3219       return LoadDocPages(pHints);
3220     case PDF_DATAAVAIL_PAGE:
3221       if (m_bTotalLoadPageTree) {
3222         return CheckPage(pHints);
3223       }
3224       m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD;
3225       return TRUE;
3226     case PDF_DATAAVAIL_ERROR:
3227       return LoadAllFile(pHints);
3228     case PDF_DATAAVAIL_PAGE_LATERLOAD:
3229       m_docStatus = PDF_DATAAVAIL_PAGE;
3230     default:
3231       m_bDocAvail = TRUE;
3232       return TRUE;
3233   }
3234 }
CheckPageStatus(IFX_DownloadHints * pHints)3235 FX_BOOL CPDF_DataAvail::CheckPageStatus(IFX_DownloadHints* pHints) {
3236   switch (m_docStatus) {
3237     case PDF_DATAAVAIL_PAGETREE:
3238       return CheckPages(pHints);
3239     case PDF_DATAAVAIL_PAGE:
3240       return CheckPage(pHints);
3241     case PDF_DATAAVAIL_ERROR:
3242       return LoadAllFile(pHints);
3243     default:
3244       m_bPagesTreeLoad = TRUE;
3245       m_bPagesLoad = TRUE;
3246       return TRUE;
3247   }
3248 }
LoadAllFile(IFX_DownloadHints * pHints)3249 FX_BOOL CPDF_DataAvail::LoadAllFile(IFX_DownloadHints* pHints) {
3250   if (m_pFileAvail->IsDataAvail(0, (FX_DWORD)m_dwFileLen)) {
3251     m_docStatus = PDF_DATAAVAIL_DONE;
3252     return TRUE;
3253   }
3254   pHints->AddSegment(0, (FX_DWORD)m_dwFileLen);
3255   return FALSE;
3256 }
LoadAllXref(IFX_DownloadHints * pHints)3257 FX_BOOL CPDF_DataAvail::LoadAllXref(IFX_DownloadHints* pHints) {
3258   m_parser.m_Syntax.InitParser(m_pFileRead, (FX_DWORD)m_dwHeaderOffset);
3259   m_parser.m_bOwnFileRead = FALSE;
3260   if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) &&
3261       !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) {
3262     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3263     return FALSE;
3264   }
3265   FXSYS_qsort(m_parser.m_SortedOffset.GetData(),
3266               m_parser.m_SortedOffset.GetSize(), sizeof(FX_FILESIZE),
3267               CompareFileSize);
3268   m_dwRootObjNum = m_parser.GetRootObjNum();
3269   m_dwInfoObjNum = m_parser.GetInfoObjNum();
3270   m_pCurrentParser = &m_parser;
3271   m_docStatus = PDF_DATAAVAIL_ROOT;
3272   return TRUE;
3273 }
GetObject(FX_DWORD objnum,IFX_DownloadHints * pHints,FX_BOOL * pExistInFile)3274 CPDF_Object* CPDF_DataAvail::GetObject(FX_DWORD objnum,
3275                                        IFX_DownloadHints* pHints,
3276                                        FX_BOOL* pExistInFile) {
3277   CPDF_Object* pRet = nullptr;
3278   FX_DWORD size = 0;
3279   FX_FILESIZE offset = 0;
3280   CPDF_Parser* pParser = nullptr;
3281   if (pExistInFile)
3282     *pExistInFile = TRUE;
3283 
3284   if (m_pDocument) {
3285     size = GetObjectSize(objnum, offset);
3286     pParser = (CPDF_Parser*)(m_pDocument->GetParser());
3287   } else {
3288     size = (FX_DWORD)m_parser.GetObjectSize(objnum);
3289     offset = m_parser.GetObjectOffset(objnum);
3290     pParser = &m_parser;
3291   }
3292   if (!IsDataAvail(offset, size, pHints)) {
3293     return nullptr;
3294   }
3295   if (pParser) {
3296     pRet = pParser->ParseIndirectObject(NULL, objnum, NULL);
3297   }
3298 
3299   if (!pRet && pExistInFile) {
3300     *pExistInFile = FALSE;
3301   }
3302 
3303   return pRet;
3304 }
3305 
CheckInfo(IFX_DownloadHints * pHints)3306 FX_BOOL CPDF_DataAvail::CheckInfo(IFX_DownloadHints* pHints) {
3307   FX_BOOL bExist = FALSE;
3308   CPDF_Object* pInfo = GetObject(m_dwInfoObjNum, pHints, &bExist);
3309   if (!bExist) {
3310     if (m_bHaveAcroForm) {
3311       m_docStatus = PDF_DATAAVAIL_ACROFORM;
3312     } else {
3313       m_docStatus = PDF_DATAAVAIL_PAGETREE;
3314     }
3315     return TRUE;
3316   }
3317   if (!pInfo) {
3318     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3319       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3320       return TRUE;
3321     }
3322     if (m_Pos == m_dwFileLen) {
3323       m_docStatus = PDF_DATAAVAIL_ERROR;
3324     }
3325     return FALSE;
3326   }
3327   if (pInfo) {
3328     pInfo->Release();
3329   }
3330   if (m_bHaveAcroForm) {
3331     m_docStatus = PDF_DATAAVAIL_ACROFORM;
3332   } else {
3333     m_docStatus = PDF_DATAAVAIL_PAGETREE;
3334   }
3335   return TRUE;
3336 }
CheckRoot(IFX_DownloadHints * pHints)3337 FX_BOOL CPDF_DataAvail::CheckRoot(IFX_DownloadHints* pHints) {
3338   FX_BOOL bExist = FALSE;
3339   m_pRoot = GetObject(m_dwRootObjNum, pHints, &bExist);
3340   if (!bExist) {
3341     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3342     return TRUE;
3343   }
3344   if (!m_pRoot) {
3345     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3346       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3347       return TRUE;
3348     }
3349     return FALSE;
3350   }
3351   CPDF_Dictionary* pDict = m_pRoot->GetDict();
3352   if (!pDict) {
3353     m_docStatus = PDF_DATAAVAIL_ERROR;
3354     return FALSE;
3355   }
3356   CPDF_Reference* pRef = ToReference(pDict->GetElement("Pages"));
3357   if (!pRef) {
3358     m_docStatus = PDF_DATAAVAIL_ERROR;
3359     return FALSE;
3360   }
3361 
3362   m_PagesObjNum = pRef->GetRefObjNum();
3363   CPDF_Reference* pAcroFormRef =
3364       ToReference(m_pRoot->GetDict()->GetElement("AcroForm"));
3365   if (pAcroFormRef) {
3366     m_bHaveAcroForm = TRUE;
3367     m_dwAcroFormObjNum = pAcroFormRef->GetRefObjNum();
3368   }
3369 
3370   if (m_dwInfoObjNum) {
3371     m_docStatus = PDF_DATAAVAIL_INFO;
3372   } else {
3373     m_docStatus =
3374         m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE;
3375   }
3376   return TRUE;
3377 }
PreparePageItem()3378 FX_BOOL CPDF_DataAvail::PreparePageItem() {
3379   CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
3380   CPDF_Reference* pRef =
3381       ToReference(pRoot ? pRoot->GetElement("Pages") : nullptr);
3382   if (!pRef) {
3383     m_docStatus = PDF_DATAAVAIL_ERROR;
3384     return FALSE;
3385   }
3386 
3387   m_PagesObjNum = pRef->GetRefObjNum();
3388   m_pCurrentParser = (CPDF_Parser*)m_pDocument->GetParser();
3389   m_docStatus = PDF_DATAAVAIL_PAGETREE;
3390   return TRUE;
3391 }
IsFirstCheck(int iPage)3392 bool CPDF_DataAvail::IsFirstCheck(int iPage) {
3393   return m_pageMapCheckState.insert(iPage).second;
3394 }
ResetFirstCheck(int iPage)3395 void CPDF_DataAvail::ResetFirstCheck(int iPage) {
3396   m_pageMapCheckState.erase(iPage);
3397 }
CheckPage(IFX_DownloadHints * pHints)3398 FX_BOOL CPDF_DataAvail::CheckPage(IFX_DownloadHints* pHints) {
3399   FX_DWORD iPageObjs = m_PageObjList.GetSize();
3400   CFX_DWordArray UnavailObjList;
3401   for (FX_DWORD i = 0; i < iPageObjs; ++i) {
3402     FX_DWORD dwPageObjNum = m_PageObjList.GetAt(i);
3403     FX_BOOL bExist = FALSE;
3404     CPDF_Object* pObj = GetObject(dwPageObjNum, pHints, &bExist);
3405     if (!pObj) {
3406       if (bExist) {
3407         UnavailObjList.Add(dwPageObjNum);
3408       }
3409       continue;
3410     }
3411     if (pObj->IsArray()) {
3412       CPDF_Array* pArray = pObj->GetArray();
3413       if (pArray) {
3414         int32_t iSize = pArray->GetCount();
3415         for (int32_t j = 0; j < iSize; ++j) {
3416           if (CPDF_Reference* pRef = ToReference(pArray->GetElement(j)))
3417             UnavailObjList.Add(pRef->GetRefObjNum());
3418         }
3419       }
3420     }
3421     if (!pObj->IsDictionary()) {
3422       pObj->Release();
3423       continue;
3424     }
3425     CFX_ByteString type = pObj->GetDict()->GetString("Type");
3426     if (type == "Pages") {
3427       m_PagesArray.Add(pObj);
3428       continue;
3429     }
3430     pObj->Release();
3431   }
3432   m_PageObjList.RemoveAll();
3433   if (UnavailObjList.GetSize()) {
3434     m_PageObjList.Append(UnavailObjList);
3435     return FALSE;
3436   }
3437   FX_DWORD iPages = m_PagesArray.GetSize();
3438   for (FX_DWORD i = 0; i < iPages; i++) {
3439     CPDF_Object* pPages = m_PagesArray.GetAt(i);
3440     if (!pPages)
3441       continue;
3442 
3443     if (!GetPageKids(m_pCurrentParser, pPages)) {
3444       pPages->Release();
3445       while (++i < iPages) {
3446         pPages = m_PagesArray.GetAt(i);
3447         pPages->Release();
3448       }
3449       m_PagesArray.RemoveAll();
3450       m_docStatus = PDF_DATAAVAIL_ERROR;
3451       return FALSE;
3452     }
3453     pPages->Release();
3454   }
3455   m_PagesArray.RemoveAll();
3456   if (!m_PageObjList.GetSize()) {
3457     m_docStatus = PDF_DATAAVAIL_DONE;
3458   }
3459   return TRUE;
3460 }
GetPageKids(CPDF_Parser * pParser,CPDF_Object * pPages)3461 FX_BOOL CPDF_DataAvail::GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages) {
3462   if (!pParser) {
3463     m_docStatus = PDF_DATAAVAIL_ERROR;
3464     return FALSE;
3465   }
3466   CPDF_Dictionary* pDict = pPages->GetDict();
3467   CPDF_Object* pKids = pDict ? pDict->GetElement("Kids") : NULL;
3468   if (!pKids) {
3469     return TRUE;
3470   }
3471   switch (pKids->GetType()) {
3472     case PDFOBJ_REFERENCE:
3473       m_PageObjList.Add(pKids->AsReference()->GetRefObjNum());
3474       break;
3475     case PDFOBJ_ARRAY: {
3476       CPDF_Array* pKidsArray = pKids->AsArray();
3477       for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) {
3478         if (CPDF_Reference* pRef = ToReference(pKidsArray->GetElement(i)))
3479           m_PageObjList.Add(pRef->GetRefObjNum());
3480       }
3481     } break;
3482     default:
3483       m_docStatus = PDF_DATAAVAIL_ERROR;
3484       return FALSE;
3485   }
3486   return TRUE;
3487 }
CheckPages(IFX_DownloadHints * pHints)3488 FX_BOOL CPDF_DataAvail::CheckPages(IFX_DownloadHints* pHints) {
3489   FX_BOOL bExist = FALSE;
3490   CPDF_Object* pPages = GetObject(m_PagesObjNum, pHints, &bExist);
3491   if (!bExist) {
3492     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3493     return TRUE;
3494   }
3495   if (!pPages) {
3496     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3497       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3498       return TRUE;
3499     }
3500     return FALSE;
3501   }
3502   if (!GetPageKids(m_pCurrentParser, pPages)) {
3503     pPages->Release();
3504     m_docStatus = PDF_DATAAVAIL_ERROR;
3505     return FALSE;
3506   }
3507   pPages->Release();
3508   m_docStatus = PDF_DATAAVAIL_PAGE;
3509   return TRUE;
3510 }
CheckHeader(IFX_DownloadHints * pHints)3511 FX_BOOL CPDF_DataAvail::CheckHeader(IFX_DownloadHints* pHints) {
3512   FX_DWORD req_size = 1024;
3513   if ((FX_FILESIZE)req_size > m_dwFileLen) {
3514     req_size = (FX_DWORD)m_dwFileLen;
3515   }
3516   if (m_pFileAvail->IsDataAvail(0, req_size)) {
3517     uint8_t buffer[1024];
3518     m_pFileRead->ReadBlock(buffer, 0, req_size);
3519     if (IsLinearizedFile(buffer, req_size)) {
3520       m_docStatus = PDF_DATAAVAIL_FIRSTPAGE;
3521     } else {
3522       if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3523         return FALSE;
3524       }
3525       m_docStatus = PDF_DATAAVAIL_END;
3526     }
3527     return TRUE;
3528   }
3529   pHints->AddSegment(0, req_size);
3530   return FALSE;
3531 }
CheckFirstPage(IFX_DownloadHints * pHints)3532 FX_BOOL CPDF_DataAvail::CheckFirstPage(IFX_DownloadHints* pHints) {
3533   CPDF_Dictionary* pDict = m_pLinearized->GetDict();
3534   CPDF_Object* pEndOffSet = pDict ? pDict->GetElement("E") : NULL;
3535   if (!pEndOffSet) {
3536     m_docStatus = PDF_DATAAVAIL_ERROR;
3537     return FALSE;
3538   }
3539   CPDF_Object* pXRefOffset = pDict ? pDict->GetElement("T") : NULL;
3540   if (!pXRefOffset) {
3541     m_docStatus = PDF_DATAAVAIL_ERROR;
3542     return FALSE;
3543   }
3544   CPDF_Object* pFileLen = pDict ? pDict->GetElement("L") : NULL;
3545   if (!pFileLen) {
3546     m_docStatus = PDF_DATAAVAIL_ERROR;
3547     return FALSE;
3548   }
3549   FX_BOOL bNeedDownLoad = FALSE;
3550   if (pEndOffSet->IsNumber()) {
3551     FX_DWORD dwEnd = pEndOffSet->GetInteger();
3552     dwEnd += 512;
3553     if ((FX_FILESIZE)dwEnd > m_dwFileLen) {
3554       dwEnd = (FX_DWORD)m_dwFileLen;
3555     }
3556     int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen);
3557     int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0;
3558     if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) {
3559       pHints->AddSegment(iStartPos, iSize);
3560       bNeedDownLoad = TRUE;
3561     }
3562   }
3563   m_dwLastXRefOffset = 0;
3564   FX_FILESIZE dwFileLen = 0;
3565   if (pXRefOffset->IsNumber())
3566     m_dwLastXRefOffset = pXRefOffset->GetInteger();
3567 
3568   if (pFileLen->IsNumber())
3569     dwFileLen = pFileLen->GetInteger();
3570 
3571   if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset,
3572                                  (FX_DWORD)(dwFileLen - m_dwLastXRefOffset))) {
3573     if (m_docStatus == PDF_DATAAVAIL_FIRSTPAGE) {
3574       FX_DWORD dwSize = (FX_DWORD)(dwFileLen - m_dwLastXRefOffset);
3575       FX_FILESIZE offset = m_dwLastXRefOffset;
3576       if (dwSize < 512 && dwFileLen > 512) {
3577         dwSize = 512;
3578         offset = dwFileLen - 512;
3579       }
3580       pHints->AddSegment(offset, dwSize);
3581     }
3582   } else {
3583     m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE;
3584   }
3585   if (bNeedDownLoad || m_docStatus != PDF_DATAAVAIL_FIRSTPAGE_PREPARE) {
3586     m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE;
3587     return FALSE;
3588   }
3589   m_docStatus =
3590       m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE;
3591   return TRUE;
3592 }
IsDataAvail(FX_FILESIZE offset,FX_DWORD size,IFX_DownloadHints * pHints)3593 FX_BOOL CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset,
3594                                     FX_DWORD size,
3595                                     IFX_DownloadHints* pHints) {
3596   if (offset > m_dwFileLen)
3597     return TRUE;
3598   FX_SAFE_DWORD safeSize = pdfium::base::checked_cast<FX_DWORD>(offset);
3599   safeSize += size;
3600   safeSize += 512;
3601   if (!safeSize.IsValid() || safeSize.ValueOrDie() > m_dwFileLen)
3602     size = m_dwFileLen - offset;
3603   else
3604     size += 512;
3605   if (!m_pFileAvail->IsDataAvail(offset, size)) {
3606     pHints->AddSegment(offset, size);
3607     return FALSE;
3608   }
3609   return TRUE;
3610 }
CheckHintTables(IFX_DownloadHints * pHints)3611 FX_BOOL CPDF_DataAvail::CheckHintTables(IFX_DownloadHints* pHints) {
3612   CPDF_Dictionary* pDict = m_pLinearized->GetDict();
3613   if (!pDict) {
3614     m_docStatus = PDF_DATAAVAIL_ERROR;
3615     return FALSE;
3616   }
3617   if (!pDict->KeyExist("H") || !pDict->KeyExist("O") || !pDict->KeyExist("N")) {
3618     m_docStatus = PDF_DATAAVAIL_ERROR;
3619     return FALSE;
3620   }
3621   int nPageCount = pDict->GetElementValue("N")->GetInteger();
3622   if (nPageCount <= 1) {
3623     m_docStatus = PDF_DATAAVAIL_DONE;
3624     return TRUE;
3625   }
3626   CPDF_Array* pHintStreamRange = pDict->GetArray("H");
3627   FX_FILESIZE szHSStart =
3628       pHintStreamRange->GetElementValue(0)
3629           ? pHintStreamRange->GetElementValue(0)->GetInteger()
3630           : 0;
3631   FX_FILESIZE szHSLength =
3632       pHintStreamRange->GetElementValue(1)
3633           ? pHintStreamRange->GetElementValue(1)->GetInteger()
3634           : 0;
3635   if (szHSStart < 0 || szHSLength <= 0) {
3636     m_docStatus = PDF_DATAAVAIL_ERROR;
3637     return FALSE;
3638   }
3639   if (!IsDataAvail(szHSStart, szHSLength, pHints)) {
3640     return FALSE;
3641   }
3642   m_syntaxParser.InitParser(m_pFileRead, m_dwHeaderOffset);
3643   std::unique_ptr<CPDF_HintTables> pHintTables(
3644       new CPDF_HintTables(this, pDict));
3645   std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pHintStream(
3646       ParseIndirectObjectAt(szHSStart, 0));
3647   CPDF_Stream* pStream = ToStream(pHintStream.get());
3648   if (pStream && pHintTables->LoadHintStream(pStream))
3649     m_pHintTables = std::move(pHintTables);
3650 
3651   m_docStatus = PDF_DATAAVAIL_DONE;
3652   return TRUE;
3653 }
ParseIndirectObjectAt(FX_FILESIZE pos,FX_DWORD objnum,CPDF_IndirectObjectHolder * pObjList)3654 CPDF_Object* CPDF_DataAvail::ParseIndirectObjectAt(
3655     FX_FILESIZE pos,
3656     FX_DWORD objnum,
3657     CPDF_IndirectObjectHolder* pObjList) {
3658   FX_FILESIZE SavedPos = m_syntaxParser.SavePos();
3659   m_syntaxParser.RestorePos(pos);
3660   bool bIsNumber;
3661   CFX_ByteString word = m_syntaxParser.GetNextWord(&bIsNumber);
3662   if (!bIsNumber)
3663     return nullptr;
3664 
3665   FX_DWORD parser_objnum = FXSYS_atoi(word);
3666   if (objnum && parser_objnum != objnum)
3667     return nullptr;
3668 
3669   word = m_syntaxParser.GetNextWord(&bIsNumber);
3670   if (!bIsNumber)
3671     return nullptr;
3672 
3673   FX_DWORD gennum = FXSYS_atoi(word);
3674   if (m_syntaxParser.GetKeyword() != "obj") {
3675     m_syntaxParser.RestorePos(SavedPos);
3676     return nullptr;
3677   }
3678   CPDF_Object* pObj =
3679       m_syntaxParser.GetObject(pObjList, parser_objnum, gennum, nullptr, true);
3680   m_syntaxParser.RestorePos(SavedPos);
3681   return pObj;
3682 }
IsLinearizedPDF()3683 IPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
3684   FX_DWORD req_size = 1024;
3685   if (!m_pFileAvail->IsDataAvail(0, req_size)) {
3686     return LinearizationUnknown;
3687   }
3688   if (!m_pFileRead) {
3689     return NotLinearized;
3690   }
3691   FX_FILESIZE dwSize = m_pFileRead->GetSize();
3692   if (dwSize < (FX_FILESIZE)req_size) {
3693     return LinearizationUnknown;
3694   }
3695   uint8_t buffer[1024];
3696   m_pFileRead->ReadBlock(buffer, 0, req_size);
3697   if (IsLinearizedFile(buffer, req_size)) {
3698     return Linearized;
3699   }
3700   return NotLinearized;
3701 }
IsLinearizedFile(uint8_t * pData,FX_DWORD dwLen)3702 FX_BOOL CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, FX_DWORD dwLen) {
3703   ScopedFileStream file(FX_CreateMemoryStream(pData, (size_t)dwLen, FALSE));
3704   int32_t offset = GetHeaderOffset(file.get());
3705   if (offset == -1) {
3706     m_docStatus = PDF_DATAAVAIL_ERROR;
3707     return FALSE;
3708   }
3709   m_dwHeaderOffset = offset;
3710   m_syntaxParser.InitParser(file.get(), offset);
3711   m_syntaxParser.RestorePos(m_syntaxParser.m_HeaderOffset + 9);
3712   bool bNumber;
3713   CFX_ByteString wordObjNum = m_syntaxParser.GetNextWord(&bNumber);
3714   if (!bNumber)
3715     return FALSE;
3716 
3717   FX_DWORD objnum = FXSYS_atoi(wordObjNum);
3718   if (m_pLinearized) {
3719     m_pLinearized->Release();
3720     m_pLinearized = NULL;
3721   }
3722   m_pLinearized =
3723       ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum);
3724   if (!m_pLinearized) {
3725     return FALSE;
3726   }
3727 
3728   CPDF_Dictionary* pDict = m_pLinearized->GetDict();
3729   if (pDict && pDict->GetElement("Linearized")) {
3730     CPDF_Object* pLen = pDict->GetElement("L");
3731     if (!pLen) {
3732       return FALSE;
3733     }
3734     if ((FX_FILESIZE)pLen->GetInteger() != m_pFileRead->GetSize()) {
3735       return FALSE;
3736     }
3737     m_bLinearized = TRUE;
3738 
3739     if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P")))
3740       m_dwFirstPageNo = pNo->GetInteger();
3741 
3742     return TRUE;
3743   }
3744   return FALSE;
3745 }
CheckEnd(IFX_DownloadHints * pHints)3746 FX_BOOL CPDF_DataAvail::CheckEnd(IFX_DownloadHints* pHints) {
3747   FX_DWORD req_pos = (FX_DWORD)(m_dwFileLen > 1024 ? m_dwFileLen - 1024 : 0);
3748   FX_DWORD dwSize = (FX_DWORD)(m_dwFileLen - req_pos);
3749   if (m_pFileAvail->IsDataAvail(req_pos, dwSize)) {
3750     uint8_t buffer[1024];
3751     m_pFileRead->ReadBlock(buffer, req_pos, dwSize);
3752     ScopedFileStream file(FX_CreateMemoryStream(buffer, (size_t)dwSize, FALSE));
3753     m_syntaxParser.InitParser(file.get(), 0);
3754     m_syntaxParser.RestorePos(dwSize - 1);
3755     if (m_syntaxParser.SearchWord("startxref", TRUE, FALSE, dwSize)) {
3756       m_syntaxParser.GetNextWord(nullptr);
3757       bool bNumber;
3758       CFX_ByteString xrefpos_str = m_syntaxParser.GetNextWord(&bNumber);
3759       if (!bNumber) {
3760         m_docStatus = PDF_DATAAVAIL_ERROR;
3761         return FALSE;
3762       }
3763       m_dwXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);
3764       if (!m_dwXRefOffset || m_dwXRefOffset > m_dwFileLen) {
3765         m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3766         return TRUE;
3767       }
3768       m_dwLastXRefOffset = m_dwXRefOffset;
3769       SetStartOffset(m_dwXRefOffset);
3770       m_docStatus = PDF_DATAAVAIL_CROSSREF;
3771       return TRUE;
3772     }
3773     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3774     return TRUE;
3775   }
3776   pHints->AddSegment(req_pos, dwSize);
3777   return FALSE;
3778 }
CheckCrossRefStream(IFX_DownloadHints * pHints,FX_FILESIZE & xref_offset)3779 int32_t CPDF_DataAvail::CheckCrossRefStream(IFX_DownloadHints* pHints,
3780                                             FX_FILESIZE& xref_offset) {
3781   xref_offset = 0;
3782   FX_DWORD req_size =
3783       (FX_DWORD)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3784   if (m_pFileAvail->IsDataAvail(m_Pos, req_size)) {
3785     int32_t iSize = (int32_t)(m_Pos + req_size - m_dwCurrentXRefSteam);
3786     CFX_BinaryBuf buf(iSize);
3787     uint8_t* pBuf = buf.GetBuffer();
3788     m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize);
3789     ScopedFileStream file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE));
3790     m_parser.m_Syntax.InitParser(file.get(), 0);
3791     bool bNumber;
3792     CFX_ByteString objnum = m_parser.m_Syntax.GetNextWord(&bNumber);
3793     if (!bNumber)
3794       return -1;
3795 
3796     FX_DWORD objNum = FXSYS_atoi(objnum);
3797     CPDF_Object* pObj = m_parser.ParseIndirectObjectAt(NULL, 0, objNum, NULL);
3798     if (!pObj) {
3799       m_Pos += m_parser.m_Syntax.SavePos();
3800       return 0;
3801     }
3802     CPDF_Dictionary* pDict = pObj->GetDict();
3803     CPDF_Name* pName = ToName(pDict ? pDict->GetElement("Type") : nullptr);
3804     if (pName) {
3805       if (pName->GetString() == "XRef") {
3806         m_Pos += m_parser.m_Syntax.SavePos();
3807         xref_offset = pObj->GetDict()->GetInteger("Prev");
3808         pObj->Release();
3809         return 1;
3810       }
3811     }
3812     pObj->Release();
3813     return -1;
3814   }
3815   pHints->AddSegment(m_Pos, req_size);
3816   return 0;
3817 }
SetStartOffset(FX_FILESIZE dwOffset)3818 inline void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset) {
3819   m_Pos = dwOffset;
3820 }
3821 
GetNextToken(CFX_ByteString & token)3822 FX_BOOL CPDF_DataAvail::GetNextToken(CFX_ByteString& token) {
3823   uint8_t ch;
3824   if (!GetNextChar(ch))
3825     return FALSE;
3826 
3827   while (1) {
3828     while (PDFCharIsWhitespace(ch)) {
3829       if (!GetNextChar(ch))
3830         return FALSE;
3831     }
3832 
3833     if (ch != '%')
3834       break;
3835 
3836     while (1) {
3837       if (!GetNextChar(ch))
3838         return FALSE;
3839       if (PDFCharIsLineEnding(ch))
3840         break;
3841     }
3842   }
3843 
3844   uint8_t buffer[256];
3845   FX_DWORD index = 0;
3846   if (PDFCharIsDelimiter(ch)) {
3847     buffer[index++] = ch;
3848     if (ch == '/') {
3849       while (1) {
3850         if (!GetNextChar(ch))
3851           return FALSE;
3852 
3853         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
3854           m_Pos--;
3855           CFX_ByteString ret(buffer, index);
3856           token = ret;
3857           return TRUE;
3858         }
3859 
3860         if (index < sizeof(buffer))
3861           buffer[index++] = ch;
3862       }
3863     } else if (ch == '<') {
3864       if (!GetNextChar(ch))
3865         return FALSE;
3866 
3867       if (ch == '<')
3868         buffer[index++] = ch;
3869       else
3870         m_Pos--;
3871     } else if (ch == '>') {
3872       if (!GetNextChar(ch))
3873         return FALSE;
3874 
3875       if (ch == '>')
3876         buffer[index++] = ch;
3877       else
3878         m_Pos--;
3879     }
3880 
3881     CFX_ByteString ret(buffer, index);
3882     token = ret;
3883     return TRUE;
3884   }
3885 
3886   while (1) {
3887     if (index < sizeof(buffer))
3888       buffer[index++] = ch;
3889 
3890     if (!GetNextChar(ch))
3891       return FALSE;
3892 
3893     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
3894       m_Pos--;
3895       break;
3896     }
3897   }
3898 
3899   token = CFX_ByteString(buffer, index);
3900   return TRUE;
3901 }
3902 
GetNextChar(uint8_t & ch)3903 FX_BOOL CPDF_DataAvail::GetNextChar(uint8_t& ch) {
3904   FX_FILESIZE pos = m_Pos;
3905   if (pos >= m_dwFileLen) {
3906     return FALSE;
3907   }
3908   if (m_bufferOffset >= pos ||
3909       (FX_FILESIZE)(m_bufferOffset + m_bufferSize) <= pos) {
3910     FX_FILESIZE read_pos = pos;
3911     FX_DWORD read_size = 512;
3912     if ((FX_FILESIZE)read_size > m_dwFileLen) {
3913       read_size = (FX_DWORD)m_dwFileLen;
3914     }
3915     if ((FX_FILESIZE)(read_pos + read_size) > m_dwFileLen) {
3916       read_pos = m_dwFileLen - read_size;
3917     }
3918     if (!m_pFileRead->ReadBlock(m_bufferData, read_pos, read_size)) {
3919       return FALSE;
3920     }
3921     m_bufferOffset = read_pos;
3922     m_bufferSize = read_size;
3923   }
3924   ch = m_bufferData[pos - m_bufferOffset];
3925   m_Pos++;
3926   return TRUE;
3927 }
CheckCrossRefItem(IFX_DownloadHints * pHints)3928 FX_BOOL CPDF_DataAvail::CheckCrossRefItem(IFX_DownloadHints* pHints) {
3929   int32_t iSize = 0;
3930   CFX_ByteString token;
3931   while (1) {
3932     if (!GetNextToken(token)) {
3933       iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3934       pHints->AddSegment(m_Pos, iSize);
3935       return FALSE;
3936     }
3937     if (token == "trailer") {
3938       m_dwTrailerOffset = m_Pos;
3939       m_docStatus = PDF_DATAAVAIL_TRAILER;
3940       return TRUE;
3941     }
3942   }
3943 }
CheckAllCrossRefStream(IFX_DownloadHints * pHints)3944 FX_BOOL CPDF_DataAvail::CheckAllCrossRefStream(IFX_DownloadHints* pHints) {
3945   FX_FILESIZE xref_offset = 0;
3946   int32_t nRet = CheckCrossRefStream(pHints, xref_offset);
3947   if (nRet == 1) {
3948     if (!xref_offset) {
3949       m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF;
3950     } else {
3951       m_dwCurrentXRefSteam = xref_offset;
3952       m_Pos = xref_offset;
3953     }
3954     return TRUE;
3955   }
3956   if (nRet == -1) {
3957     m_docStatus = PDF_DATAAVAIL_ERROR;
3958   }
3959   return FALSE;
3960 }
CheckCrossRef(IFX_DownloadHints * pHints)3961 FX_BOOL CPDF_DataAvail::CheckCrossRef(IFX_DownloadHints* pHints) {
3962   int32_t iSize = 0;
3963   CFX_ByteString token;
3964   if (!GetNextToken(token)) {
3965     iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3966     pHints->AddSegment(m_Pos, iSize);
3967     return FALSE;
3968   }
3969   if (token == "xref") {
3970     m_CrossOffset.InsertAt(0, m_dwXRefOffset);
3971     while (1) {
3972       if (!GetNextToken(token)) {
3973         iSize =
3974             (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3975         pHints->AddSegment(m_Pos, iSize);
3976         m_docStatus = PDF_DATAAVAIL_CROSSREF_ITEM;
3977         return FALSE;
3978       }
3979       if (token == "trailer") {
3980         m_dwTrailerOffset = m_Pos;
3981         m_docStatus = PDF_DATAAVAIL_TRAILER;
3982         return TRUE;
3983       }
3984     }
3985   } else {
3986     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3987     return TRUE;
3988   }
3989   return FALSE;
3990 }
CheckTrailerAppend(IFX_DownloadHints * pHints)3991 FX_BOOL CPDF_DataAvail::CheckTrailerAppend(IFX_DownloadHints* pHints) {
3992   if (m_Pos < m_dwFileLen) {
3993     FX_FILESIZE dwAppendPos = m_Pos + m_syntaxParser.SavePos();
3994     int32_t iSize = (int32_t)(
3995         dwAppendPos + 512 > m_dwFileLen ? m_dwFileLen - dwAppendPos : 512);
3996     if (!m_pFileAvail->IsDataAvail(dwAppendPos, iSize)) {
3997       pHints->AddSegment(dwAppendPos, iSize);
3998       return FALSE;
3999     }
4000   }
4001   if (m_dwPrevXRefOffset) {
4002     SetStartOffset(m_dwPrevXRefOffset);
4003     m_docStatus = PDF_DATAAVAIL_CROSSREF;
4004   } else {
4005     m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF;
4006   }
4007   return TRUE;
4008 }
4009 
CheckTrailer(IFX_DownloadHints * pHints)4010 FX_BOOL CPDF_DataAvail::CheckTrailer(IFX_DownloadHints* pHints) {
4011   int32_t iTrailerSize =
4012       (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
4013   if (m_pFileAvail->IsDataAvail(m_Pos, iTrailerSize)) {
4014     int32_t iSize = (int32_t)(m_Pos + iTrailerSize - m_dwTrailerOffset);
4015     CFX_BinaryBuf buf(iSize);
4016     uint8_t* pBuf = buf.GetBuffer();
4017     if (!pBuf) {
4018       m_docStatus = PDF_DATAAVAIL_ERROR;
4019       return FALSE;
4020     }
4021     if (!m_pFileRead->ReadBlock(pBuf, m_dwTrailerOffset, iSize)) {
4022       return FALSE;
4023     }
4024     ScopedFileStream file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE));
4025     m_syntaxParser.InitParser(file.get(), 0);
4026     std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pTrailer(
4027         m_syntaxParser.GetObject(nullptr, 0, 0, nullptr, true));
4028     if (!pTrailer) {
4029       m_Pos += m_syntaxParser.SavePos();
4030       pHints->AddSegment(m_Pos, iTrailerSize);
4031       return FALSE;
4032     }
4033     if (!pTrailer->IsDictionary())
4034       return FALSE;
4035 
4036     CPDF_Dictionary* pTrailerDict = pTrailer->GetDict();
4037     CPDF_Object* pEncrypt = pTrailerDict->GetElement("Encrypt");
4038     if (ToReference(pEncrypt)) {
4039       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
4040       return TRUE;
4041     }
4042 
4043     FX_DWORD xrefpos = GetDirectInteger(pTrailerDict, "Prev");
4044     if (xrefpos) {
4045       m_dwPrevXRefOffset = GetDirectInteger(pTrailerDict, "XRefStm");
4046       if (m_dwPrevXRefOffset) {
4047         m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
4048       } else {
4049         m_dwPrevXRefOffset = xrefpos;
4050         if (m_dwPrevXRefOffset >= m_dwFileLen) {
4051           m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
4052         } else {
4053           SetStartOffset(m_dwPrevXRefOffset);
4054           m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND;
4055         }
4056       }
4057       return TRUE;
4058     }
4059     m_dwPrevXRefOffset = 0;
4060     m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND;
4061     return TRUE;
4062   }
4063   pHints->AddSegment(m_Pos, iTrailerSize);
4064   return FALSE;
4065 }
4066 
CheckPage(int32_t iPage,IFX_DownloadHints * pHints)4067 FX_BOOL CPDF_DataAvail::CheckPage(int32_t iPage, IFX_DownloadHints* pHints) {
4068   while (TRUE) {
4069     switch (m_docStatus) {
4070       case PDF_DATAAVAIL_PAGETREE:
4071         if (!LoadDocPages(pHints)) {
4072           return FALSE;
4073         }
4074         break;
4075       case PDF_DATAAVAIL_PAGE:
4076         if (!LoadDocPage(iPage, pHints)) {
4077           return FALSE;
4078         }
4079         break;
4080       case PDF_DATAAVAIL_ERROR:
4081         return LoadAllFile(pHints);
4082       default:
4083         m_bPagesTreeLoad = TRUE;
4084         m_bPagesLoad = TRUE;
4085         m_bCurPageDictLoadOK = TRUE;
4086         m_docStatus = PDF_DATAAVAIL_PAGE;
4087         return TRUE;
4088     }
4089   }
4090 }
CheckArrayPageNode(FX_DWORD dwPageNo,CPDF_PageNode * pPageNode,IFX_DownloadHints * pHints)4091 FX_BOOL CPDF_DataAvail::CheckArrayPageNode(FX_DWORD dwPageNo,
4092                                            CPDF_PageNode* pPageNode,
4093                                            IFX_DownloadHints* pHints) {
4094   FX_BOOL bExist = FALSE;
4095   CPDF_Object* pPages = GetObject(dwPageNo, pHints, &bExist);
4096   if (!bExist) {
4097     m_docStatus = PDF_DATAAVAIL_ERROR;
4098     return FALSE;
4099   }
4100   if (!pPages) {
4101     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
4102       m_docStatus = PDF_DATAAVAIL_ERROR;
4103       return FALSE;
4104     }
4105     return FALSE;
4106   }
4107 
4108   CPDF_Array* pArray = pPages->AsArray();
4109   if (!pArray) {
4110     pPages->Release();
4111     m_docStatus = PDF_DATAAVAIL_ERROR;
4112     return FALSE;
4113   }
4114 
4115   pPageNode->m_type = PDF_PAGENODE_PAGES;
4116   for (FX_DWORD i = 0; i < pArray->GetCount(); ++i) {
4117     CPDF_Reference* pKid = ToReference(pArray->GetElement(i));
4118     if (!pKid)
4119       continue;
4120 
4121     CPDF_PageNode* pNode = new CPDF_PageNode();
4122     pPageNode->m_childNode.Add(pNode);
4123     pNode->m_dwPageNo = pKid->GetRefObjNum();
4124   }
4125   pPages->Release();
4126   return TRUE;
4127 }
CheckUnkownPageNode(FX_DWORD dwPageNo,CPDF_PageNode * pPageNode,IFX_DownloadHints * pHints)4128 FX_BOOL CPDF_DataAvail::CheckUnkownPageNode(FX_DWORD dwPageNo,
4129                                             CPDF_PageNode* pPageNode,
4130                                             IFX_DownloadHints* pHints) {
4131   FX_BOOL bExist = FALSE;
4132   CPDF_Object* pPage = GetObject(dwPageNo, pHints, &bExist);
4133   if (!bExist) {
4134     m_docStatus = PDF_DATAAVAIL_ERROR;
4135     return FALSE;
4136   }
4137   if (!pPage) {
4138     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
4139       m_docStatus = PDF_DATAAVAIL_ERROR;
4140       return FALSE;
4141     }
4142     return FALSE;
4143   }
4144   if (pPage->IsArray()) {
4145     pPageNode->m_dwPageNo = dwPageNo;
4146     pPageNode->m_type = PDF_PAGENODE_ARRAY;
4147     pPage->Release();
4148     return TRUE;
4149   }
4150   if (!pPage->IsDictionary()) {
4151     pPage->Release();
4152     m_docStatus = PDF_DATAAVAIL_ERROR;
4153     return FALSE;
4154   }
4155   pPageNode->m_dwPageNo = dwPageNo;
4156   CPDF_Dictionary* pDict = pPage->GetDict();
4157   CFX_ByteString type = pDict->GetString("Type");
4158   if (type == "Pages") {
4159     pPageNode->m_type = PDF_PAGENODE_PAGES;
4160     CPDF_Object* pKids = pDict->GetElement("Kids");
4161     if (!pKids) {
4162       m_docStatus = PDF_DATAAVAIL_PAGE;
4163       return TRUE;
4164     }
4165     switch (pKids->GetType()) {
4166       case PDFOBJ_REFERENCE: {
4167         CPDF_Reference* pKid = pKids->AsReference();
4168         CPDF_PageNode* pNode = new CPDF_PageNode();
4169         pPageNode->m_childNode.Add(pNode);
4170         pNode->m_dwPageNo = pKid->GetRefObjNum();
4171       } break;
4172       case PDFOBJ_ARRAY: {
4173         CPDF_Array* pKidsArray = pKids->AsArray();
4174         for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) {
4175           CPDF_Reference* pKid = ToReference(pKidsArray->GetElement(i));
4176           if (!pKid)
4177             continue;
4178 
4179           CPDF_PageNode* pNode = new CPDF_PageNode();
4180           pPageNode->m_childNode.Add(pNode);
4181           pNode->m_dwPageNo = pKid->GetRefObjNum();
4182         }
4183       } break;
4184       default:
4185         break;
4186     }
4187   } else if (type == "Page") {
4188     pPageNode->m_type = PDF_PAGENODE_PAGE;
4189   } else {
4190     pPage->Release();
4191     m_docStatus = PDF_DATAAVAIL_ERROR;
4192     return FALSE;
4193   }
4194   pPage->Release();
4195   return TRUE;
4196 }
CheckPageNode(CPDF_PageNode & pageNodes,int32_t iPage,int32_t & iCount,IFX_DownloadHints * pHints,int level)4197 FX_BOOL CPDF_DataAvail::CheckPageNode(CPDF_PageNode& pageNodes,
4198                                       int32_t iPage,
4199                                       int32_t& iCount,
4200                                       IFX_DownloadHints* pHints,
4201                                       int level) {
4202   if (level >= kMaxPageRecursionDepth) {
4203     return FALSE;
4204   }
4205   int32_t iSize = pageNodes.m_childNode.GetSize();
4206   if (iSize <= 0 || iPage >= iSize) {
4207     m_docStatus = PDF_DATAAVAIL_ERROR;
4208     return FALSE;
4209   }
4210   for (int32_t i = 0; i < iSize; ++i) {
4211     CPDF_PageNode* pNode = pageNodes.m_childNode.GetAt(i);
4212     if (!pNode) {
4213       continue;
4214     }
4215     switch (pNode->m_type) {
4216       case PDF_PAGENODE_UNKOWN:
4217         if (!CheckUnkownPageNode(pNode->m_dwPageNo, pNode, pHints)) {
4218           return FALSE;
4219         }
4220         --i;
4221         break;
4222       case PDF_PAGENODE_PAGE:
4223         iCount++;
4224         if (iPage == iCount && m_pDocument) {
4225           m_pDocument->m_PageList.SetAt(iPage, pNode->m_dwPageNo);
4226         }
4227         break;
4228       case PDF_PAGENODE_PAGES:
4229         if (!CheckPageNode(*pNode, iPage, iCount, pHints, level + 1)) {
4230           return FALSE;
4231         }
4232         break;
4233       case PDF_PAGENODE_ARRAY:
4234         if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode, pHints)) {
4235           return FALSE;
4236         }
4237         --i;
4238         break;
4239     }
4240     if (iPage == iCount) {
4241       m_docStatus = PDF_DATAAVAIL_DONE;
4242       return TRUE;
4243     }
4244   }
4245   return TRUE;
4246 }
LoadDocPage(int32_t iPage,IFX_DownloadHints * pHints)4247 FX_BOOL CPDF_DataAvail::LoadDocPage(int32_t iPage, IFX_DownloadHints* pHints) {
4248   if (m_pDocument->GetPageCount() <= iPage ||
4249       m_pDocument->m_PageList.GetAt(iPage)) {
4250     m_docStatus = PDF_DATAAVAIL_DONE;
4251     return TRUE;
4252   }
4253   if (m_pageNodes.m_type == PDF_PAGENODE_PAGE) {
4254     if (iPage == 0) {
4255       m_docStatus = PDF_DATAAVAIL_DONE;
4256       return TRUE;
4257     }
4258     m_docStatus = PDF_DATAAVAIL_ERROR;
4259     return TRUE;
4260   }
4261   int32_t iCount = -1;
4262   return CheckPageNode(m_pageNodes, iPage, iCount, pHints, 0);
4263 }
CheckPageCount(IFX_DownloadHints * pHints)4264 FX_BOOL CPDF_DataAvail::CheckPageCount(IFX_DownloadHints* pHints) {
4265   FX_BOOL bExist = FALSE;
4266   CPDF_Object* pPages = GetObject(m_PagesObjNum, pHints, &bExist);
4267   if (!bExist) {
4268     m_docStatus = PDF_DATAAVAIL_ERROR;
4269     return FALSE;
4270   }
4271   if (!pPages) {
4272     return FALSE;
4273   }
4274   CPDF_Dictionary* pPagesDict = pPages->GetDict();
4275   if (!pPagesDict) {
4276     pPages->Release();
4277     m_docStatus = PDF_DATAAVAIL_ERROR;
4278     return FALSE;
4279   }
4280   if (!pPagesDict->KeyExist("Kids")) {
4281     pPages->Release();
4282     return TRUE;
4283   }
4284   int count = pPagesDict->GetInteger("Count");
4285   if (count > 0) {
4286     pPages->Release();
4287     return TRUE;
4288   }
4289   pPages->Release();
4290   return FALSE;
4291 }
LoadDocPages(IFX_DownloadHints * pHints)4292 FX_BOOL CPDF_DataAvail::LoadDocPages(IFX_DownloadHints* pHints) {
4293   if (!CheckUnkownPageNode(m_PagesObjNum, &m_pageNodes, pHints)) {
4294     return FALSE;
4295   }
4296   if (CheckPageCount(pHints)) {
4297     m_docStatus = PDF_DATAAVAIL_PAGE;
4298     return TRUE;
4299   }
4300   m_bTotalLoadPageTree = TRUE;
4301   return FALSE;
4302 }
LoadPages(IFX_DownloadHints * pHints)4303 FX_BOOL CPDF_DataAvail::LoadPages(IFX_DownloadHints* pHints) {
4304   while (!m_bPagesTreeLoad) {
4305     if (!CheckPageStatus(pHints)) {
4306       return FALSE;
4307     }
4308   }
4309   if (m_bPagesLoad) {
4310     return TRUE;
4311   }
4312   m_pDocument->LoadPages();
4313   return FALSE;
4314 }
CheckLinearizedData(IFX_DownloadHints * pHints)4315 IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData(
4316     IFX_DownloadHints* pHints) {
4317   if (m_bLinearedDataOK) {
4318     return DataAvailable;
4319   }
4320 
4321   if (!m_bMainXRefLoadTried) {
4322     FX_SAFE_DWORD data_size = m_dwFileLen;
4323     data_size -= m_dwLastXRefOffset;
4324     if (!data_size.IsValid()) {
4325       return DataError;
4326     }
4327     if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset,
4328                                    data_size.ValueOrDie())) {
4329       pHints->AddSegment(m_dwLastXRefOffset, data_size.ValueOrDie());
4330       return DataNotAvailable;
4331     }
4332     FX_DWORD dwRet = m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
4333     m_bMainXRefLoadTried = TRUE;
4334     if (dwRet != PDFPARSE_ERROR_SUCCESS) {
4335       return DataError;
4336     }
4337     if (!PreparePageItem()) {
4338       return DataNotAvailable;
4339     }
4340     m_bMainXRefLoadedOK = TRUE;
4341     m_bLinearedDataOK = TRUE;
4342   }
4343 
4344   return m_bLinearedDataOK ? DataAvailable : DataNotAvailable;
4345 }
CheckPageAnnots(int32_t iPage,IFX_DownloadHints * pHints)4346 FX_BOOL CPDF_DataAvail::CheckPageAnnots(int32_t iPage,
4347                                         IFX_DownloadHints* pHints) {
4348   if (!m_objs_array.GetSize()) {
4349     m_objs_array.RemoveAll();
4350     m_ObjectSet.clear();
4351     CPDF_Dictionary* pPageDict = m_pDocument->GetPage(iPage);
4352     if (!pPageDict) {
4353       return TRUE;
4354     }
4355     CPDF_Object* pAnnots = pPageDict->GetElement("Annots");
4356     if (!pAnnots) {
4357       return TRUE;
4358     }
4359     CFX_ArrayTemplate<CPDF_Object*> obj_array;
4360     obj_array.Add(pAnnots);
4361     FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array);
4362     if (bRet) {
4363       m_objs_array.RemoveAll();
4364     }
4365     return bRet;
4366   }
4367   CFX_ArrayTemplate<CPDF_Object*> new_objs_array;
4368   FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4369   m_objs_array.RemoveAll();
4370   if (!bRet) {
4371     m_objs_array.Append(new_objs_array);
4372   }
4373   return bRet;
4374 }
CheckLinearizedFirstPage(int32_t iPage,IFX_DownloadHints * pHints)4375 IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedFirstPage(
4376     int32_t iPage,
4377     IFX_DownloadHints* pHints) {
4378   if (!m_bAnnotsLoad) {
4379     if (!CheckPageAnnots(iPage, pHints)) {
4380       return DataNotAvailable;
4381     }
4382     m_bAnnotsLoad = TRUE;
4383   }
4384 
4385   DocAvailStatus nRet = CheckLinearizedData(pHints);
4386   if (nRet == DataAvailable)
4387     m_bPageLoadedOK = FALSE;
4388   return nRet;
4389 }
HaveResourceAncestor(CPDF_Dictionary * pDict)4390 FX_BOOL CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) {
4391   CFX_AutoRestorer<int> restorer(&s_CurrentDataAvailRecursionDepth);
4392   if (++s_CurrentDataAvailRecursionDepth > kMaxDataAvailRecursionDepth) {
4393     return FALSE;
4394   }
4395   CPDF_Object* pParent = pDict->GetElement("Parent");
4396   if (!pParent) {
4397     return FALSE;
4398   }
4399   CPDF_Dictionary* pParentDict = pParent->GetDict();
4400   if (!pParentDict) {
4401     return FALSE;
4402   }
4403   CPDF_Object* pRet = pParentDict->GetElement("Resources");
4404   if (pRet) {
4405     m_pPageResource = pRet;
4406     return TRUE;
4407   }
4408   return HaveResourceAncestor(pParentDict);
4409 }
IsPageAvail(int32_t iPage,IFX_DownloadHints * pHints)4410 IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
4411     int32_t iPage,
4412     IFX_DownloadHints* pHints) {
4413   if (!m_pDocument) {
4414     return DataError;
4415   }
4416   if (IsFirstCheck(iPage)) {
4417     m_bCurPageDictLoadOK = FALSE;
4418     m_bPageLoadedOK = FALSE;
4419     m_bAnnotsLoad = FALSE;
4420     m_bNeedDownLoadResource = FALSE;
4421     m_objs_array.RemoveAll();
4422     m_ObjectSet.clear();
4423   }
4424   if (pdfium::ContainsKey(m_pagesLoadState, iPage))
4425     return DataAvailable;
4426 
4427   if (m_bLinearized) {
4428     if ((FX_DWORD)iPage == m_dwFirstPageNo) {
4429       DocAvailStatus nRet = CheckLinearizedFirstPage(iPage, pHints);
4430       if (nRet == DataAvailable)
4431         m_pagesLoadState.insert(iPage);
4432       return nRet;
4433     }
4434     DocAvailStatus nResult = CheckLinearizedData(pHints);
4435     if (nResult != DataAvailable) {
4436       return nResult;
4437     }
4438     if (m_pHintTables) {
4439       nResult = m_pHintTables->CheckPage(iPage, pHints);
4440       if (nResult != DataAvailable)
4441         return nResult;
4442       m_pagesLoadState.insert(iPage);
4443       return DataAvailable;
4444     }
4445     if (m_bMainXRefLoadedOK) {
4446       if (m_bTotalLoadPageTree) {
4447         if (!LoadPages(pHints)) {
4448           return DataNotAvailable;
4449         }
4450       } else {
4451         if (!m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) {
4452           return DataNotAvailable;
4453         }
4454       }
4455     } else {
4456       if (!LoadAllFile(pHints)) {
4457         return DataNotAvailable;
4458       }
4459       ((CPDF_Parser*)m_pDocument->GetParser())->RebuildCrossRef();
4460       ResetFirstCheck(iPage);
4461       return DataAvailable;
4462     }
4463   } else {
4464     if (!m_bTotalLoadPageTree) {
4465       if (!m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) {
4466         return DataNotAvailable;
4467       }
4468     }
4469   }
4470   if (m_bHaveAcroForm && !m_bAcroFormLoad) {
4471     if (!CheckAcroFormSubObject(pHints)) {
4472       return DataNotAvailable;
4473     }
4474     m_bAcroFormLoad = TRUE;
4475   }
4476   if (!m_bPageLoadedOK) {
4477     if (!m_objs_array.GetSize()) {
4478       m_objs_array.RemoveAll();
4479       m_ObjectSet.clear();
4480       m_pPageDict = m_pDocument->GetPage(iPage);
4481       if (!m_pPageDict) {
4482         ResetFirstCheck(iPage);
4483         return DataAvailable;
4484       }
4485       CFX_ArrayTemplate<CPDF_Object*> obj_array;
4486       obj_array.Add(m_pPageDict);
4487       FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array);
4488       if (!bRet)
4489         return DataNotAvailable;
4490 
4491       m_objs_array.RemoveAll();
4492     } else {
4493       CFX_ArrayTemplate<CPDF_Object*> new_objs_array;
4494       FX_BOOL bRet =
4495           IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4496       m_objs_array.RemoveAll();
4497       if (!bRet) {
4498         m_objs_array.Append(new_objs_array);
4499         return DataNotAvailable;
4500       }
4501     }
4502     m_bPageLoadedOK = TRUE;
4503   }
4504 
4505   if (!m_bAnnotsLoad) {
4506     if (!CheckPageAnnots(iPage, pHints)) {
4507       return DataNotAvailable;
4508     }
4509     m_bAnnotsLoad = TRUE;
4510   }
4511 
4512   if (m_pPageDict && !m_bNeedDownLoadResource) {
4513     m_pPageResource = m_pPageDict->GetElement("Resources");
4514     if (!m_pPageResource) {
4515       m_bNeedDownLoadResource = HaveResourceAncestor(m_pPageDict);
4516     } else {
4517       m_bNeedDownLoadResource = TRUE;
4518     }
4519   }
4520   if (m_bNeedDownLoadResource) {
4521     FX_BOOL bRet = CheckResources(pHints);
4522     if (!bRet) {
4523       return DataNotAvailable;
4524     }
4525     m_bNeedDownLoadResource = FALSE;
4526   }
4527   m_bPageLoadedOK = FALSE;
4528   m_bAnnotsLoad = FALSE;
4529   m_bCurPageDictLoadOK = FALSE;
4530   ResetFirstCheck(iPage);
4531   m_pagesLoadState.insert(iPage);
4532   return DataAvailable;
4533 }
CheckResources(IFX_DownloadHints * pHints)4534 FX_BOOL CPDF_DataAvail::CheckResources(IFX_DownloadHints* pHints) {
4535   if (!m_objs_array.GetSize()) {
4536     m_objs_array.RemoveAll();
4537     CFX_ArrayTemplate<CPDF_Object*> obj_array;
4538     obj_array.Add(m_pPageResource);
4539     FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array);
4540     if (bRet) {
4541       m_objs_array.RemoveAll();
4542     }
4543     return bRet;
4544   }
4545   CFX_ArrayTemplate<CPDF_Object*> new_objs_array;
4546   FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4547   m_objs_array.RemoveAll();
4548   if (!bRet) {
4549     m_objs_array.Append(new_objs_array);
4550   }
4551   return bRet;
4552 }
GetLinearizedMainXRefInfo(FX_FILESIZE * pPos,FX_DWORD * pSize)4553 void CPDF_DataAvail::GetLinearizedMainXRefInfo(FX_FILESIZE* pPos,
4554                                                FX_DWORD* pSize) {
4555   if (pPos) {
4556     *pPos = m_dwLastXRefOffset;
4557   }
4558   if (pSize) {
4559     *pSize = (FX_DWORD)(m_dwFileLen - m_dwLastXRefOffset);
4560   }
4561 }
GetPageCount() const4562 int CPDF_DataAvail::GetPageCount() const {
4563   if (m_pLinearized) {
4564     CPDF_Dictionary* pDict = m_pLinearized->GetDict();
4565     CPDF_Object* pObj = pDict ? pDict->GetElementValue("N") : nullptr;
4566     return pObj ? pObj->GetInteger() : 0;
4567   }
4568   return m_pDocument ? m_pDocument->GetPageCount() : 0;
4569 }
GetPage(int index)4570 CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) {
4571   if (!m_pDocument || index < 0 || index >= this->GetPageCount()) {
4572     return nullptr;
4573   }
4574   if (m_pLinearized) {
4575     CPDF_Dictionary* pDict = m_pLinearized->GetDict();
4576     CPDF_Object* pObj = pDict ? pDict->GetElementValue("P") : nullptr;
4577     int pageNum = pObj ? pObj->GetInteger() : 0;
4578     if (m_pHintTables && index != pageNum) {
4579       FX_FILESIZE szPageStartPos = 0;
4580       FX_FILESIZE szPageLength = 0;
4581       FX_DWORD dwObjNum = 0;
4582       FX_BOOL bPagePosGot = m_pHintTables->GetPagePos(index, szPageStartPos,
4583                                                       szPageLength, dwObjNum);
4584       if (!bPagePosGot) {
4585         return nullptr;
4586       }
4587       m_syntaxParser.InitParser(m_pFileRead, (FX_DWORD)szPageStartPos);
4588       CPDF_Object* pPageDict = ParseIndirectObjectAt(0, dwObjNum, m_pDocument);
4589       if (!pPageDict) {
4590         return nullptr;
4591       }
4592       if (!m_pDocument->InsertIndirectObject(dwObjNum, pPageDict))
4593         return nullptr;
4594       return pPageDict->GetDict();
4595     }
4596   }
4597   return m_pDocument->GetPage(index);
4598 }
IsFormAvail(IFX_DownloadHints * pHints)4599 IPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
4600     IFX_DownloadHints* pHints) {
4601   if (!m_pDocument) {
4602     return FormAvailable;
4603   }
4604   if (!m_bLinearizedFormParamLoad) {
4605     CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
4606     if (!pRoot) {
4607       return FormAvailable;
4608     }
4609     CPDF_Object* pAcroForm = pRoot->GetElement("AcroForm");
4610     if (!pAcroForm) {
4611       return FormNotExist;
4612     }
4613     DocAvailStatus nDocStatus = CheckLinearizedData(pHints);
4614     if (nDocStatus == DataError)
4615       return FormError;
4616     if (nDocStatus == DataNotAvailable)
4617       return FormNotAvailable;
4618 
4619     if (!m_objs_array.GetSize()) {
4620       m_objs_array.Add(pAcroForm->GetDict());
4621     }
4622     m_bLinearizedFormParamLoad = TRUE;
4623   }
4624   CFX_ArrayTemplate<CPDF_Object*> new_objs_array;
4625   FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4626   m_objs_array.RemoveAll();
4627   if (!bRet) {
4628     m_objs_array.Append(new_objs_array);
4629     return FormNotAvailable;
4630   }
4631   return FormAvailable;
4632 }
4633 
~CPDF_PageNode()4634 CPDF_PageNode::~CPDF_PageNode() {
4635   for (int32_t i = 0; i < m_childNode.GetSize(); ++i) {
4636     delete m_childNode[i];
4637   }
4638   m_childNode.RemoveAll();
4639 }
~CPDF_HintTables()4640 CPDF_HintTables::~CPDF_HintTables() {
4641   m_dwDeltaNObjsArray.RemoveAll();
4642   m_dwNSharedObjsArray.RemoveAll();
4643   m_dwSharedObjNumArray.RemoveAll();
4644   m_dwIdentifierArray.RemoveAll();
4645   m_szPageOffsetArray.RemoveAll();
4646   m_szSharedObjOffsetArray.RemoveAll();
4647 }
GetItemLength(int index,const CFX_FileSizeArray & szArray)4648 FX_DWORD CPDF_HintTables::GetItemLength(int index,
4649                                         const CFX_FileSizeArray& szArray) {
4650   if (index < 0 || szArray.GetSize() < 2 || index > szArray.GetSize() - 2 ||
4651       szArray[index] > szArray[index + 1])
4652     return 0;
4653   return szArray[index + 1] - szArray[index];
4654 }
ReadPageHintTable(CFX_BitStream * hStream)4655 FX_BOOL CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) {
4656   if (!hStream || hStream->IsEOF())
4657     return FALSE;
4658   int nStreamOffset = ReadPrimaryHintStreamOffset();
4659   int nStreamLen = ReadPrimaryHintStreamLength();
4660   if (nStreamOffset < 0 || nStreamLen < 1)
4661     return FALSE;
4662 
4663   const FX_DWORD kHeaderSize = 288;
4664   if (hStream->BitsRemaining() < kHeaderSize)
4665     return FALSE;
4666   // Item 1: The least number of objects in a page.
4667   FX_DWORD dwObjLeastNum = hStream->GetBits(32);
4668   // Item 2: The location of the first page's page object.
4669   FX_DWORD dwFirstObjLoc = hStream->GetBits(32);
4670   if (dwFirstObjLoc > nStreamOffset) {
4671     FX_SAFE_DWORD safeLoc = pdfium::base::checked_cast<FX_DWORD>(nStreamLen);
4672     safeLoc += dwFirstObjLoc;
4673     if (!safeLoc.IsValid())
4674       return FALSE;
4675     m_szFirstPageObjOffset =
4676         pdfium::base::checked_cast<FX_FILESIZE>(safeLoc.ValueOrDie());
4677   } else {
4678     m_szFirstPageObjOffset =
4679         pdfium::base::checked_cast<FX_FILESIZE>(dwFirstObjLoc);
4680   }
4681   // Item 3: The number of bits needed to represent the difference
4682   // between the greatest and least number of objects in a page.
4683   FX_DWORD dwDeltaObjectsBits = hStream->GetBits(16);
4684   // Item 4: The least length of a page in bytes.
4685   FX_DWORD dwPageLeastLen = hStream->GetBits(32);
4686   // Item 5: The number of bits needed to represent the difference
4687   // between the greatest and least length of a page, in bytes.
4688   FX_DWORD dwDeltaPageLenBits = hStream->GetBits(16);
4689   // Skip Item 6, 7, 8, 9 total 96 bits.
4690   hStream->SkipBits(96);
4691   // Item 10: The number of bits needed to represent the greatest
4692   // number of shared object references.
4693   FX_DWORD dwSharedObjBits = hStream->GetBits(16);
4694   // Item 11: The number of bits needed to represent the numerically
4695   // greatest shared object identifier used by the pages.
4696   FX_DWORD dwSharedIdBits = hStream->GetBits(16);
4697   // Item 12: The number of bits needed to represent the numerator of
4698   // the fractional position for each shared object reference. For each
4699   // shared object referenced from a page, there is an indication of
4700   // where in the page's content stream the object is first referenced.
4701   FX_DWORD dwSharedNumeratorBits = hStream->GetBits(16);
4702   // Item 13: Skip Item 13 which has 16 bits.
4703   hStream->SkipBits(16);
4704   CPDF_Object* pPageNum = m_pLinearizedDict->GetElementValue("N");
4705   int nPages = pPageNum ? pPageNum->GetInteger() : 0;
4706   if (nPages < 1)
4707     return FALSE;
4708 
4709   FX_SAFE_DWORD required_bits = dwDeltaObjectsBits;
4710   required_bits *= pdfium::base::checked_cast<FX_DWORD>(nPages);
4711   if (!CanReadFromBitStream(hStream, required_bits))
4712     return FALSE;
4713   for (int i = 0; i < nPages; ++i) {
4714     FX_SAFE_DWORD safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits);
4715     safeDeltaObj += dwObjLeastNum;
4716     if (!safeDeltaObj.IsValid())
4717       return FALSE;
4718     m_dwDeltaNObjsArray.Add(safeDeltaObj.ValueOrDie());
4719   }
4720   hStream->ByteAlign();
4721 
4722   required_bits = dwDeltaPageLenBits;
4723   required_bits *= pdfium::base::checked_cast<FX_DWORD>(nPages);
4724   if (!CanReadFromBitStream(hStream, required_bits))
4725     return FALSE;
4726   CFX_DWordArray dwPageLenArray;
4727   for (int i = 0; i < nPages; ++i) {
4728     FX_SAFE_DWORD safePageLen = hStream->GetBits(dwDeltaPageLenBits);
4729     safePageLen += dwPageLeastLen;
4730     if (!safePageLen.IsValid())
4731       return FALSE;
4732     dwPageLenArray.Add(safePageLen.ValueOrDie());
4733   }
4734   CPDF_Object* pOffsetE = m_pLinearizedDict->GetElementValue("E");
4735   int nOffsetE = pOffsetE ? pOffsetE->GetInteger() : -1;
4736   if (nOffsetE < 0)
4737     return FALSE;
4738   CPDF_Object* pFirstPageNum = m_pLinearizedDict->GetElementValue("P");
4739   int nFirstPageNum = pFirstPageNum ? pFirstPageNum->GetInteger() : 0;
4740   for (int i = 0; i < nPages; ++i) {
4741     if (i == nFirstPageNum) {
4742       m_szPageOffsetArray.Add(m_szFirstPageObjOffset);
4743     } else if (i == nFirstPageNum + 1) {
4744       if (i == 1) {
4745         m_szPageOffsetArray.Add(nOffsetE);
4746       } else {
4747         m_szPageOffsetArray.Add(m_szPageOffsetArray[i - 2] +
4748                                 dwPageLenArray[i - 2]);
4749       }
4750     } else {
4751       if (i == 0) {
4752         m_szPageOffsetArray.Add(nOffsetE);
4753       } else {
4754         m_szPageOffsetArray.Add(m_szPageOffsetArray[i - 1] +
4755                                 dwPageLenArray[i - 1]);
4756       }
4757     }
4758   }
4759   if (nPages > 0) {
4760     m_szPageOffsetArray.Add(m_szPageOffsetArray[nPages - 1] +
4761                             dwPageLenArray[nPages - 1]);
4762   }
4763   hStream->ByteAlign();
4764 
4765   // number of shared objects
4766   required_bits = dwSharedObjBits;
4767   required_bits *= pdfium::base::checked_cast<FX_DWORD>(nPages);
4768   if (!CanReadFromBitStream(hStream, required_bits))
4769     return FALSE;
4770   for (int i = 0; i < nPages; i++) {
4771     m_dwNSharedObjsArray.Add(hStream->GetBits(dwSharedObjBits));
4772   }
4773   hStream->ByteAlign();
4774 
4775   // array of identifier, sizes = nshared_objects
4776   for (int i = 0; i < nPages; i++) {
4777     required_bits = dwSharedIdBits;
4778     required_bits *= m_dwNSharedObjsArray[i];
4779     if (!CanReadFromBitStream(hStream, required_bits))
4780       return FALSE;
4781     for (int j = 0; j < m_dwNSharedObjsArray[i]; j++) {
4782       m_dwIdentifierArray.Add(hStream->GetBits(dwSharedIdBits));
4783     }
4784   }
4785   hStream->ByteAlign();
4786 
4787   for (int i = 0; i < nPages; i++) {
4788     FX_SAFE_DWORD safeSize = m_dwNSharedObjsArray[i];
4789     safeSize *= dwSharedNumeratorBits;
4790     if (!CanReadFromBitStream(hStream, safeSize))
4791       return FALSE;
4792     hStream->SkipBits(safeSize.ValueOrDie());
4793   }
4794   hStream->ByteAlign();
4795 
4796   FX_SAFE_DWORD safeTotalPageLen = pdfium::base::checked_cast<FX_DWORD>(nPages);
4797   safeTotalPageLen *= dwDeltaPageLenBits;
4798   if (!CanReadFromBitStream(hStream, safeTotalPageLen))
4799     return FALSE;
4800   hStream->SkipBits(safeTotalPageLen.ValueOrDie());
4801   hStream->ByteAlign();
4802   return TRUE;
4803 }
ReadSharedObjHintTable(CFX_BitStream * hStream,FX_DWORD offset)4804 FX_BOOL CPDF_HintTables::ReadSharedObjHintTable(CFX_BitStream* hStream,
4805                                                 FX_DWORD offset) {
4806   if (!hStream || hStream->IsEOF())
4807     return FALSE;
4808   int nStreamOffset = ReadPrimaryHintStreamOffset();
4809   int nStreamLen = ReadPrimaryHintStreamLength();
4810   if (nStreamOffset < 0 || nStreamLen < 1)
4811     return FALSE;
4812 
4813   FX_SAFE_DWORD bit_offset = offset;
4814   bit_offset *= 8;
4815   if (!bit_offset.IsValid() || hStream->GetPos() > bit_offset.ValueOrDie())
4816     return FALSE;
4817   hStream->SkipBits(bit_offset.ValueOrDie() - hStream->GetPos());
4818 
4819   const FX_DWORD kHeaderSize = 192;
4820   if (hStream->BitsRemaining() < kHeaderSize)
4821     return FALSE;
4822   // Item 1: The object number of the first object in the shared objects
4823   // section.
4824   FX_DWORD dwFirstSharedObjNum = hStream->GetBits(32);
4825   // Item 2: The location of the first object in the shared objects section.
4826   FX_DWORD dwFirstSharedObjLoc = hStream->GetBits(32);
4827   if (dwFirstSharedObjLoc > nStreamOffset)
4828     dwFirstSharedObjLoc += nStreamLen;
4829   // Item 3: The number of shared object entries for the first page.
4830   m_nFirstPageSharedObjs = hStream->GetBits(32);
4831   // Item 4: The number of shared object entries for the shared objects
4832   // section, including the number of shared object entries for the first page.
4833   FX_DWORD dwSharedObjTotal = hStream->GetBits(32);
4834   // Item 5: The number of bits needed to represent the greatest number of
4835   // objects in a shared object group. Skipped.
4836   hStream->SkipBits(16);
4837   // Item 6: The least length of a shared object group in bytes.
4838   FX_DWORD dwGroupLeastLen = hStream->GetBits(32);
4839   // Item 7: The number of bits needed to represent the difference between the
4840   // greatest and least length of a shared object group, in bytes.
4841   FX_DWORD dwDeltaGroupLen = hStream->GetBits(16);
4842   CPDF_Object* pFirstPageObj = m_pLinearizedDict->GetElementValue("O");
4843   int nFirstPageObjNum = pFirstPageObj ? pFirstPageObj->GetInteger() : -1;
4844   if (nFirstPageObjNum < 0)
4845     return FALSE;
4846   FX_DWORD dwPrevObjLen = 0;
4847   FX_DWORD dwCurObjLen = 0;
4848   FX_SAFE_DWORD required_bits = dwSharedObjTotal;
4849   required_bits *= dwDeltaGroupLen;
4850   if (!CanReadFromBitStream(hStream, required_bits))
4851     return FALSE;
4852 
4853   for (int i = 0; i < dwSharedObjTotal; ++i) {
4854     dwPrevObjLen = dwCurObjLen;
4855     FX_SAFE_DWORD safeObjLen = hStream->GetBits(dwDeltaGroupLen);
4856     safeObjLen += dwGroupLeastLen;
4857     if (!safeObjLen.IsValid())
4858       return FALSE;
4859     dwCurObjLen = safeObjLen.ValueOrDie();
4860     if (i < m_nFirstPageSharedObjs) {
4861       m_dwSharedObjNumArray.Add(nFirstPageObjNum + i);
4862       if (i == 0)
4863         m_szSharedObjOffsetArray.Add(m_szFirstPageObjOffset);
4864     } else {
4865       FX_SAFE_DWORD safeObjNum = dwFirstSharedObjNum;
4866       safeObjNum += i - m_nFirstPageSharedObjs;
4867       if (!safeObjNum.IsValid())
4868         return FALSE;
4869       m_dwSharedObjNumArray.Add(safeObjNum.ValueOrDie());
4870       if (i == m_nFirstPageSharedObjs)
4871         m_szSharedObjOffsetArray.Add(
4872             pdfium::base::checked_cast<int32_t>(dwFirstSharedObjLoc));
4873     }
4874     if (i != 0 && i != m_nFirstPageSharedObjs) {
4875       FX_SAFE_INT32 safeLoc = pdfium::base::checked_cast<int32_t>(dwPrevObjLen);
4876       safeLoc += m_szSharedObjOffsetArray[i - 1];
4877       if (!safeLoc.IsValid())
4878         return FALSE;
4879       m_szSharedObjOffsetArray.Add(safeLoc.ValueOrDie());
4880     }
4881   }
4882   if (dwSharedObjTotal > 0) {
4883     FX_SAFE_INT32 safeLoc = pdfium::base::checked_cast<int32_t>(dwCurObjLen);
4884     safeLoc += m_szSharedObjOffsetArray[dwSharedObjTotal - 1];
4885     if (!safeLoc.IsValid())
4886       return FALSE;
4887     m_szSharedObjOffsetArray.Add(safeLoc.ValueOrDie());
4888   }
4889   hStream->ByteAlign();
4890   if (hStream->BitsRemaining() < dwSharedObjTotal)
4891     return FALSE;
4892   hStream->SkipBits(dwSharedObjTotal);
4893   hStream->ByteAlign();
4894   return TRUE;
4895 }
GetPagePos(int index,FX_FILESIZE & szPageStartPos,FX_FILESIZE & szPageLength,FX_DWORD & dwObjNum)4896 FX_BOOL CPDF_HintTables::GetPagePos(int index,
4897                                     FX_FILESIZE& szPageStartPos,
4898                                     FX_FILESIZE& szPageLength,
4899                                     FX_DWORD& dwObjNum) {
4900   if (!m_pLinearizedDict)
4901     return FALSE;
4902   szPageStartPos = m_szPageOffsetArray[index];
4903   szPageLength = GetItemLength(index, m_szPageOffsetArray);
4904   CPDF_Object* pFirstPageNum = m_pLinearizedDict->GetElementValue("P");
4905   int nFirstPageNum = pFirstPageNum ? pFirstPageNum->GetInteger() : 0;
4906   CPDF_Object* pFirstPageObjNum = m_pLinearizedDict->GetElementValue("O");
4907   if (!pFirstPageObjNum)
4908     return FALSE;
4909   int nFirstPageObjNum = pFirstPageObjNum->GetInteger();
4910   if (index == nFirstPageNum) {
4911     dwObjNum = nFirstPageObjNum;
4912     return TRUE;
4913   }
4914   // The object number of remaining pages starts from 1.
4915   dwObjNum = 1;
4916   for (int i = 0; i < index; ++i) {
4917     if (i == nFirstPageNum)
4918       continue;
4919     dwObjNum += m_dwDeltaNObjsArray[i];
4920   }
4921   return TRUE;
4922 }
CheckPage(int index,IFX_DownloadHints * pHints)4923 IPDF_DataAvail::DocAvailStatus CPDF_HintTables::CheckPage(
4924     int index,
4925     IFX_DownloadHints* pHints) {
4926   if (!m_pLinearizedDict || !pHints)
4927     return IPDF_DataAvail::DataError;
4928   CPDF_Object* pFirstAvailPage = m_pLinearizedDict->GetElementValue("P");
4929   int nFirstAvailPage = pFirstAvailPage ? pFirstAvailPage->GetInteger() : 0;
4930   if (index == nFirstAvailPage)
4931     return IPDF_DataAvail::DataAvailable;
4932   FX_DWORD dwLength = GetItemLength(index, m_szPageOffsetArray);
4933   // If two pages have the same offset, it should be treated as an error.
4934   if (!dwLength)
4935     return IPDF_DataAvail::DataError;
4936   if (!m_pDataAvail->IsDataAvail(m_szPageOffsetArray[index], dwLength, pHints))
4937     return IPDF_DataAvail::DataNotAvailable;
4938   // Download data of shared objects in the page.
4939   FX_DWORD offset = 0;
4940   for (int i = 0; i < index; ++i) {
4941     offset += m_dwNSharedObjsArray[i];
4942   }
4943   CPDF_Object* pFirstPageObj = m_pLinearizedDict->GetElementValue("O");
4944   int nFirstPageObjNum = pFirstPageObj ? pFirstPageObj->GetInteger() : -1;
4945   if (nFirstPageObjNum < 0)
4946     return IPDF_DataAvail::DataError;
4947   FX_DWORD dwIndex = 0;
4948   FX_DWORD dwObjNum = 0;
4949   for (int j = 0; j < m_dwNSharedObjsArray[index]; ++j) {
4950     dwIndex = m_dwIdentifierArray[offset + j];
4951     if (dwIndex >= m_dwSharedObjNumArray.GetSize())
4952       return IPDF_DataAvail::DataNotAvailable;
4953     dwObjNum = m_dwSharedObjNumArray[dwIndex];
4954     if (dwObjNum >= nFirstPageObjNum &&
4955         dwObjNum < nFirstPageObjNum + m_nFirstPageSharedObjs) {
4956       continue;
4957     }
4958     dwLength = GetItemLength(dwIndex, m_szSharedObjOffsetArray);
4959     // If two objects have the same offset, it should be treated as an error.
4960     if (!dwLength)
4961       return IPDF_DataAvail::DataError;
4962     if (!m_pDataAvail->IsDataAvail(m_szSharedObjOffsetArray[dwIndex], dwLength,
4963                                    pHints)) {
4964       return IPDF_DataAvail::DataNotAvailable;
4965     }
4966   }
4967   return IPDF_DataAvail::DataAvailable;
4968 }
4969 
LoadHintStream(CPDF_Stream * pHintStream)4970 FX_BOOL CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) {
4971   if (!pHintStream || !m_pLinearizedDict)
4972     return FALSE;
4973   CPDF_Dictionary* pDict = pHintStream->GetDict();
4974   CPDF_Object* pOffset = pDict ? pDict->GetElement("S") : nullptr;
4975   if (!pOffset || pOffset->GetType() != PDFOBJ_NUMBER)
4976     return FALSE;
4977   int shared_hint_table_offset = pOffset->GetInteger();
4978   CPDF_StreamAcc acc;
4979   acc.LoadAllData(pHintStream);
4980   FX_DWORD size = acc.GetSize();
4981   // The header section of page offset hint table is 36 bytes.
4982   // The header section of shared object hint table is 24 bytes.
4983   // Hint table has at least 60 bytes.
4984   const FX_DWORD MIN_STREAM_LEN = 60;
4985   if (size < MIN_STREAM_LEN || shared_hint_table_offset <= 0 ||
4986       size < shared_hint_table_offset) {
4987     return FALSE;
4988   }
4989   CFX_BitStream bs;
4990   bs.Init(acc.GetData(), size);
4991   return ReadPageHintTable(&bs) &&
4992          ReadSharedObjHintTable(&bs, pdfium::base::checked_cast<FX_DWORD>(
4993                                          shared_hint_table_offset));
4994 }
4995 
ReadPrimaryHintStreamOffset() const4996 int CPDF_HintTables::ReadPrimaryHintStreamOffset() const {
4997   if (!m_pLinearizedDict)
4998     return -1;
4999   CPDF_Array* pRange = m_pLinearizedDict->GetArray("H");
5000   if (!pRange)
5001     return -1;
5002   CPDF_Object* pStreamOffset = pRange->GetElementValue(0);
5003   if (!pStreamOffset)
5004     return -1;
5005   return pStreamOffset->GetInteger();
5006 }
ReadPrimaryHintStreamLength() const5007 int CPDF_HintTables::ReadPrimaryHintStreamLength() const {
5008   if (!m_pLinearizedDict)
5009     return -1;
5010   CPDF_Array* pRange = m_pLinearizedDict->GetArray("H");
5011   if (!pRange)
5012     return -1;
5013   CPDF_Object* pStreamLen = pRange->GetElementValue(1);
5014   if (!pStreamLen)
5015     return -1;
5016   return pStreamLen->GetInteger();
5017 }
5018