1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_parser.h"
8 
9 #include <algorithm>
10 #include <utility>
11 #include <vector>
12 
13 #include "core/fpdfapi/parser/cpdf_array.h"
14 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_document.h"
17 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
18 #include "core/fpdfapi/parser/cpdf_number.h"
19 #include "core/fpdfapi/parser/cpdf_reference.h"
20 #include "core/fpdfapi/parser/cpdf_security_handler.h"
21 #include "core/fpdfapi/parser/cpdf_stream.h"
22 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
23 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
24 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
25 #include "core/fxcrt/autorestorer.h"
26 #include "core/fxcrt/cfx_memorystream.h"
27 #include "core/fxcrt/fx_extension.h"
28 #include "core/fxcrt/fx_safe_types.h"
29 #include "third_party/base/ptr_util.h"
30 #include "third_party/base/stl_util.h"
31 
32 namespace {
33 
34 // A limit on the size of the xref table. Theoretical limits are higher, but
35 // this may be large enough in practice.
36 const int32_t kMaxXRefSize = 1048576;
37 
38 constexpr FX_FILESIZE kPDFHeaderSize = 9;
39 
GetVarInt(const uint8_t * p,int32_t n)40 uint32_t GetVarInt(const uint8_t* p, int32_t n) {
41   uint32_t result = 0;
42   for (int32_t i = 0; i < n; ++i)
43     result = result * 256 + p[i];
44   return result;
45 }
46 
GetStreamNCount(const RetainPtr<CPDF_StreamAcc> & pObjStream)47 int32_t GetStreamNCount(const RetainPtr<CPDF_StreamAcc>& pObjStream) {
48   return pObjStream->GetDict()->GetIntegerFor("N");
49 }
50 
GetStreamFirst(const RetainPtr<CPDF_StreamAcc> & pObjStream)51 int32_t GetStreamFirst(const RetainPtr<CPDF_StreamAcc>& pObjStream) {
52   return pObjStream->GetDict()->GetIntegerFor("First");
53 }
54 
55 }  // namespace
56 
57 class CPDF_Parser::TrailerData {
58  public:
TrailerData()59   TrailerData() {}
~TrailerData()60   ~TrailerData() {}
61 
GetMainTrailer() const62   CPDF_Dictionary* GetMainTrailer() const { return main_trailer_.get(); }
63 
GetCombinedTrailer() const64   std::unique_ptr<CPDF_Dictionary> GetCombinedTrailer() const {
65     std::unique_ptr<CPDF_Dictionary> result =
66         ToDictionary(main_trailer_->Clone());
67 
68     // Info is optional.
69     uint32_t info_obj_num = GetInfoObjNum();
70     if (info_obj_num > 0)
71       result->SetNewFor<CPDF_Reference>("Info", nullptr, GetInfoObjNum());
72 
73     // Root is required.
74     result->SetNewFor<CPDF_Reference>("Root", nullptr, GetRootObjNum());
75     return result;
76   }
77 
SetMainTrailer(std::unique_ptr<CPDF_Dictionary> trailer)78   void SetMainTrailer(std::unique_ptr<CPDF_Dictionary> trailer) {
79     ASSERT(trailer);
80     main_trailer_ = std::move(trailer);
81     ApplyTrailer(main_trailer_.get());
82   }
83 
AppendTrailer(std::unique_ptr<CPDF_Dictionary> trailer)84   void AppendTrailer(std::unique_ptr<CPDF_Dictionary> trailer) {
85     ASSERT(trailer);
86     ApplyTrailer(trailer.get());
87   }
88 
Clear()89   void Clear() {
90     main_trailer_.reset();
91     last_info_obj_num_ = 0;
92     last_root_obj_num_ = 0;
93   }
94 
GetInfoObjNum() const95   uint32_t GetInfoObjNum() const {
96     const CPDF_Reference* pRef = ToReference(
97         GetMainTrailer() ? GetMainTrailer()->GetObjectFor("Info") : nullptr);
98     return pRef ? pRef->GetRefObjNum() : last_info_obj_num_;
99   }
100 
GetRootObjNum() const101   uint32_t GetRootObjNum() const {
102     const CPDF_Reference* pRef = ToReference(
103         GetMainTrailer() ? GetMainTrailer()->GetObjectFor("Root") : nullptr);
104     return pRef ? pRef->GetRefObjNum() : last_root_obj_num_;
105   }
106 
107  private:
ApplyTrailer(const CPDF_Dictionary * dict)108   void ApplyTrailer(const CPDF_Dictionary* dict) {
109     // The most recent Info object number contained in last added trailer.
110     // See PDF 1.7 spec, section 3.4.5 - Incremental Updates.
111     const auto* pRef = ToReference(dict->GetObjectFor("Info"));
112     if (pRef)
113       last_info_obj_num_ = pRef->GetRefObjNum();
114 
115     const auto* pRoot = ToReference(dict->GetObjectFor("Root"));
116     if (pRoot)
117       last_root_obj_num_ = pRoot->GetRefObjNum();
118   }
119 
120   std::unique_ptr<CPDF_Dictionary> main_trailer_;
121   uint32_t last_info_obj_num_ = 0;
122   uint32_t last_root_obj_num_ = 0;
123 };
124 
CPDF_Parser()125 CPDF_Parser::CPDF_Parser()
126     : m_pSyntax(pdfium::MakeUnique<CPDF_SyntaxParser>()),
127       m_bHasParsed(false),
128       m_bXRefStream(false),
129       m_FileVersion(0),
130       m_TrailerData(pdfium::MakeUnique<TrailerData>()) {}
131 
~CPDF_Parser()132 CPDF_Parser::~CPDF_Parser() {
133   ReleaseEncryptHandler();
134 }
135 
GetLastObjNum() const136 uint32_t CPDF_Parser::GetLastObjNum() const {
137   return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first;
138 }
139 
IsValidObjectNumber(uint32_t objnum) const140 bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const {
141   return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first;
142 }
143 
GetObjectPositionOrZero(uint32_t objnum) const144 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const {
145   auto it = m_ObjectInfo.find(objnum);
146   return it != m_ObjectInfo.end() ? it->second.pos : 0;
147 }
148 
GetObjectType(uint32_t objnum) const149 CPDF_Parser::ObjectType CPDF_Parser::GetObjectType(uint32_t objnum) const {
150   ASSERT(IsValidObjectNumber(objnum));
151   auto it = m_ObjectInfo.find(objnum);
152   return it != m_ObjectInfo.end() ? it->second.type : ObjectType::kFree;
153 }
154 
GetObjectGenNum(uint32_t objnum) const155 uint16_t CPDF_Parser::GetObjectGenNum(uint32_t objnum) const {
156   ASSERT(IsValidObjectNumber(objnum));
157   auto it = m_ObjectInfo.find(objnum);
158   return it != m_ObjectInfo.end() ? it->second.gennum : 0;
159 }
160 
IsObjectFreeOrNull(uint32_t objnum) const161 bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const {
162   switch (GetObjectType(objnum)) {
163     case ObjectType::kFree:
164     case ObjectType::kNull:
165       return true;
166     case ObjectType::kNotCompressed:
167     case ObjectType::kCompressed:
168       return false;
169   }
170   ASSERT(false);  // NOTREACHED();
171   return false;
172 }
173 
IsObjectFree(uint32_t objnum) const174 bool CPDF_Parser::IsObjectFree(uint32_t objnum) const {
175   return GetObjectType(objnum) == ObjectType::kFree;
176 }
177 
SetEncryptDictionary(CPDF_Dictionary * pDict)178 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {
179   m_pEncryptDict = pDict;
180 }
181 
GetFileAccess() const182 RetainPtr<IFX_SeekableReadStream> CPDF_Parser::GetFileAccess() const {
183   return m_pSyntax->GetFileAccess();
184 }
185 
ShrinkObjectMap(uint32_t objnum)186 void CPDF_Parser::ShrinkObjectMap(uint32_t objnum) {
187   if (objnum == 0) {
188     m_ObjectInfo.clear();
189     return;
190   }
191 
192   auto it = m_ObjectInfo.lower_bound(objnum);
193   while (it != m_ObjectInfo.end()) {
194     auto saved_it = it++;
195     m_ObjectInfo.erase(saved_it);
196   }
197 
198   if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1))
199     m_ObjectInfo[objnum - 1].pos = 0;
200 }
201 
InitSyntaxParser(const RetainPtr<IFX_SeekableReadStream> & file_access)202 bool CPDF_Parser::InitSyntaxParser(
203     const RetainPtr<IFX_SeekableReadStream>& file_access) {
204   const int32_t header_offset = GetHeaderOffset(file_access);
205   if (header_offset == kInvalidHeaderOffset)
206     return false;
207   if (file_access->GetSize() < header_offset + kPDFHeaderSize)
208     return false;
209 
210   m_pSyntax->InitParser(file_access, header_offset);
211   return ParseFileVersion();
212 }
213 
ParseFileVersion()214 bool CPDF_Parser::ParseFileVersion() {
215   m_FileVersion = 0;
216   uint8_t ch;
217   if (!m_pSyntax->GetCharAt(5, ch))
218     return false;
219 
220   if (std::isdigit(ch))
221     m_FileVersion = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)) * 10;
222 
223   if (!m_pSyntax->GetCharAt(7, ch))
224     return false;
225 
226   if (std::isdigit(ch))
227     m_FileVersion += FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
228   return true;
229 }
230 
StartParse(const RetainPtr<IFX_SeekableReadStream> & pFileAccess,CPDF_Document * pDocument)231 CPDF_Parser::Error CPDF_Parser::StartParse(
232     const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
233     CPDF_Document* pDocument) {
234   if (!InitSyntaxParser(pFileAccess))
235     return FORMAT_ERROR;
236   return StartParseInternal(pDocument);
237 }
238 
StartParseInternal(CPDF_Document * pDocument)239 CPDF_Parser::Error CPDF_Parser::StartParseInternal(CPDF_Document* pDocument) {
240   ASSERT(!m_bHasParsed);
241   m_bHasParsed = true;
242   m_bXRefStream = false;
243 
244   m_pDocument = pDocument;
245 
246   bool bXRefRebuilt = false;
247 
248   m_LastXRefOffset = ParseStartXRef();
249 
250   if (m_LastXRefOffset > 0) {
251     if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
252         !LoadAllCrossRefV5(m_LastXRefOffset)) {
253       if (!RebuildCrossRef())
254         return FORMAT_ERROR;
255 
256       bXRefRebuilt = true;
257       m_LastXRefOffset = 0;
258     }
259   } else {
260     if (!RebuildCrossRef())
261       return FORMAT_ERROR;
262 
263     bXRefRebuilt = true;
264   }
265   Error eRet = SetEncryptHandler();
266   if (eRet != SUCCESS)
267     return eRet;
268 
269   m_pDocument->LoadDoc();
270   if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
271     if (bXRefRebuilt)
272       return FORMAT_ERROR;
273 
274     ReleaseEncryptHandler();
275     if (!RebuildCrossRef())
276       return FORMAT_ERROR;
277 
278     eRet = SetEncryptHandler();
279     if (eRet != SUCCESS)
280       return eRet;
281 
282     m_pDocument->LoadDoc();
283     if (!m_pDocument->GetRoot())
284       return FORMAT_ERROR;
285   }
286   if (GetRootObjNum() == 0) {
287     ReleaseEncryptHandler();
288     if (!RebuildCrossRef() || GetRootObjNum() == 0)
289       return FORMAT_ERROR;
290 
291     eRet = SetEncryptHandler();
292     if (eRet != SUCCESS)
293       return eRet;
294   }
295   if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
296     CPDF_Reference* pMetadata =
297         ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata"));
298     if (pMetadata)
299       m_MetadataObjnum = pMetadata->GetRefObjNum();
300   }
301   return SUCCESS;
302 }
303 
ParseStartXRef()304 FX_FILESIZE CPDF_Parser::ParseStartXRef() {
305   static constexpr char kStartXRefKeyword[] = "startxref";
306   m_pSyntax->SetPos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset -
307                     strlen(kStartXRefKeyword));
308   if (!m_pSyntax->BackwardsSearchToWord(kStartXRefKeyword, 4096))
309     return 0;
310 
311   // Skip "startxref" keyword.
312   m_pSyntax->GetKeyword();
313 
314   // Read XRef offset.
315   bool bNumber;
316   const ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber);
317   if (!bNumber || xrefpos_str.IsEmpty())
318     return 0;
319 
320   const FX_SAFE_FILESIZE result = FXSYS_atoi64(xrefpos_str.c_str());
321   if (!result.IsValid() || result.ValueOrDie() >= GetFileAccess()->GetSize())
322     return 0;
323 
324   return result.ValueOrDie();
325 }
326 
SetEncryptHandler()327 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
328   ReleaseEncryptHandler();
329   if (!GetTrailer())
330     return FORMAT_ERROR;
331 
332   CPDF_Object* pEncryptObj = GetTrailer()->GetObjectFor("Encrypt");
333   if (pEncryptObj) {
334     if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) {
335       SetEncryptDictionary(pEncryptDict);
336     } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) {
337       pEncryptObj = m_pDocument->GetOrParseIndirectObject(pRef->GetRefObjNum());
338       if (pEncryptObj)
339         SetEncryptDictionary(pEncryptObj->GetDict());
340     }
341   }
342 
343   if (m_pEncryptDict) {
344     ByteString filter = m_pEncryptDict->GetStringFor("Filter");
345     if (filter != "Standard")
346       return HANDLER_ERROR;
347 
348     std::unique_ptr<CPDF_SecurityHandler> pSecurityHandler =
349         pdfium::MakeUnique<CPDF_SecurityHandler>();
350     if (!pSecurityHandler->OnInit(m_pEncryptDict.Get(), GetIDArray(),
351                                   m_Password))
352       return PASSWORD_ERROR;
353 
354     m_pSecurityHandler = std::move(pSecurityHandler);
355   }
356   return SUCCESS;
357 }
358 
ReleaseEncryptHandler()359 void CPDF_Parser::ReleaseEncryptHandler() {
360   m_pSecurityHandler.reset();
361   SetEncryptDictionary(nullptr);
362 }
363 
GetObjectOffset(uint32_t objnum) const364 FX_FILESIZE CPDF_Parser::GetObjectOffset(uint32_t objnum) const {
365   if (!IsValidObjectNumber(objnum))
366     return 0;
367 
368   if (GetObjectType(objnum) == ObjectType::kNotCompressed)
369     return GetObjectPositionOrZero(objnum);
370 
371   if (GetObjectType(objnum) == ObjectType::kCompressed) {
372     FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
373     return GetObjectPositionOrZero(pos);
374   }
375   return 0;
376 }
377 
378 // Ideally, all the cross reference entries should be verified.
379 // In reality, we rarely see well-formed cross references don't match
380 // with the objects. crbug/602650 showed a case where object numbers
381 // in the cross reference table are all off by one.
VerifyCrossRefV4()382 bool CPDF_Parser::VerifyCrossRefV4() {
383   for (const auto& it : m_ObjectInfo) {
384     if (it.second.pos == 0)
385       continue;
386     // Find the first non-zero position.
387     FX_FILESIZE SavedPos = m_pSyntax->GetPos();
388     m_pSyntax->SetPos(it.second.pos);
389     bool is_num = false;
390     ByteString num_str = m_pSyntax->GetNextWord(&is_num);
391     m_pSyntax->SetPos(SavedPos);
392     if (!is_num || num_str.IsEmpty() ||
393         FXSYS_atoui(num_str.c_str()) != it.first) {
394       // If the object number read doesn't match the one stored,
395       // something is wrong with the cross reference table.
396       return false;
397     }
398     return true;
399   }
400   return true;
401 }
402 
LoadAllCrossRefV4(FX_FILESIZE xrefpos)403 bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
404   if (!LoadCrossRefV4(xrefpos, true))
405     return false;
406 
407   std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4();
408   if (!trailer)
409     return false;
410 
411   m_TrailerData->SetMainTrailer(std::move(trailer));
412   int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size");
413   if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
414     ShrinkObjectMap(xrefsize);
415 
416   std::vector<FX_FILESIZE> CrossRefList;
417   std::vector<FX_FILESIZE> XRefStreamList;
418   std::set<FX_FILESIZE> seen_xrefpos;
419 
420   CrossRefList.push_back(xrefpos);
421   XRefStreamList.push_back(GetDirectInteger(GetTrailer(), "XRefStm"));
422   seen_xrefpos.insert(xrefpos);
423 
424   // When the trailer doesn't have Prev entry or Prev entry value is not
425   // numerical, GetDirectInteger() returns 0. Loading will end.
426   xrefpos = GetDirectInteger(GetTrailer(), "Prev");
427   while (xrefpos) {
428     // Check for circular references.
429     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
430       return false;
431 
432     seen_xrefpos.insert(xrefpos);
433 
434     // SLOW ...
435     CrossRefList.insert(CrossRefList.begin(), xrefpos);
436     LoadCrossRefV4(xrefpos, true);
437 
438     std::unique_ptr<CPDF_Dictionary> pDict(LoadTrailerV4());
439     if (!pDict)
440       return false;
441 
442     xrefpos = GetDirectInteger(pDict.get(), "Prev");
443 
444     // SLOW ...
445     XRefStreamList.insert(XRefStreamList.begin(),
446                           pDict->GetIntegerFor("XRefStm"));
447     m_TrailerData->AppendTrailer(std::move(pDict));
448   }
449 
450   for (size_t i = 0; i < CrossRefList.size(); ++i) {
451     if (!LoadCrossRefV4(CrossRefList[i], false))
452       return false;
453 
454     if (XRefStreamList[i] && !LoadCrossRefV5(&XRefStreamList[i], false))
455       return false;
456 
457     if (i == 0 && !VerifyCrossRefV4())
458       return false;
459   }
460   return true;
461 }
462 
LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos)463 bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos) {
464   if (!LoadCrossRefV4(xrefpos, false))
465     return false;
466 
467   std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4();
468   if (!trailer)
469     return false;
470 
471   m_TrailerData->SetMainTrailer(std::move(trailer));
472   int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size");
473   if (xrefsize == 0)
474     return false;
475 
476   std::vector<FX_FILESIZE> CrossRefList;
477   std::vector<FX_FILESIZE> XRefStreamList;
478   std::set<FX_FILESIZE> seen_xrefpos;
479 
480   CrossRefList.push_back(xrefpos);
481   XRefStreamList.push_back(GetDirectInteger(GetTrailer(), "XRefStm"));
482   seen_xrefpos.insert(xrefpos);
483 
484   xrefpos = GetDirectInteger(GetTrailer(), "Prev");
485   while (xrefpos) {
486     // Check for circular references.
487     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
488       return false;
489 
490     seen_xrefpos.insert(xrefpos);
491 
492     // SLOW ...
493     CrossRefList.insert(CrossRefList.begin(), xrefpos);
494     LoadCrossRefV4(xrefpos, true);
495 
496     std::unique_ptr<CPDF_Dictionary> pDict(LoadTrailerV4());
497     if (!pDict)
498       return false;
499 
500     xrefpos = GetDirectInteger(pDict.get(), "Prev");
501 
502     // SLOW ...
503     XRefStreamList.insert(XRefStreamList.begin(),
504                           pDict->GetIntegerFor("XRefStm"));
505     m_TrailerData->AppendTrailer(std::move(pDict));
506   }
507 
508   for (size_t i = 1; i < CrossRefList.size(); ++i) {
509     if (!LoadCrossRefV4(CrossRefList[i], false))
510       return false;
511 
512     if (XRefStreamList[i] && !LoadCrossRefV5(&XRefStreamList[i], false))
513       return false;
514   }
515   return true;
516 }
517 
ParseAndAppendCrossRefSubsectionData(uint32_t start_objnum,uint32_t count,std::vector<CrossRefObjData> * out_objects)518 bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData(
519     uint32_t start_objnum,
520     uint32_t count,
521     std::vector<CrossRefObjData>* out_objects) {
522   // Each entry shall be exactly 20 byte.
523   // A sample entry looks like:
524   // "0000000000 00007 f\r\n"
525   static constexpr int32_t kEntryConstSize = 20;
526 
527   if (!out_objects) {
528     FX_SAFE_FILESIZE pos = count;
529     pos *= kEntryConstSize;
530     pos += m_pSyntax->GetPos();
531     if (!pos.IsValid())
532       return false;
533     m_pSyntax->SetPos(pos.ValueOrDie());
534     return true;
535   }
536   const size_t start_obj_index = out_objects->size();
537   FX_SAFE_SIZE_T new_size = start_obj_index;
538   new_size += count;
539   if (!new_size.IsValid())
540     return false;
541 
542   if (new_size.ValueOrDie() > kMaxXRefSize)
543     return false;
544 
545   const size_t max_entries_in_file =
546       m_pSyntax->GetFileAccess()->GetSize() / kEntryConstSize;
547   if (new_size.ValueOrDie() > max_entries_in_file)
548     return false;
549 
550   out_objects->resize(new_size.ValueOrDie());
551 
552   std::vector<char> buf(1024 * kEntryConstSize + 1);
553   buf.back() = '\0';
554 
555   int32_t nBlocks = count / 1024 + 1;
556   for (int32_t block = 0; block < nBlocks; block++) {
557     int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
558     if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
559                               block_size * kEntryConstSize)) {
560       return false;
561     }
562 
563     for (int32_t i = 0; i < block_size; i++) {
564       CrossRefObjData& obj_data =
565           (*out_objects)[start_obj_index + block * 1024 + i];
566 
567       const uint32_t objnum = start_objnum + block * 1024 + i;
568 
569       obj_data.obj_num = objnum;
570 
571       ObjectInfo& info = obj_data.info;
572 
573       char* pEntry = &buf[i * kEntryConstSize];
574       if (pEntry[17] == 'f') {
575         info.pos = 0;
576         info.type = ObjectType::kFree;
577       } else {
578         const FX_SAFE_FILESIZE offset = FXSYS_atoi64(pEntry);
579         if (!offset.IsValid())
580           return false;
581 
582         if (offset.ValueOrDie() == 0) {
583           for (int32_t c = 0; c < 10; c++) {
584             if (!std::isdigit(pEntry[c]))
585               return false;
586           }
587         }
588 
589         info.pos = offset.ValueOrDie();
590 
591         // TODO(art-snake): The info.gennum is uint16_t, but version may be
592         // greated than max<uint16_t>. Needs solve this issue.
593         const int32_t version = FXSYS_atoi(pEntry + 11);
594         info.gennum = version;
595         info.type = ObjectType::kNotCompressed;
596       }
597     }
598   }
599   return true;
600 }
601 
ParseCrossRefV4(std::vector<CrossRefObjData> * out_objects)602 bool CPDF_Parser::ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects) {
603   if (out_objects)
604     out_objects->clear();
605 
606   if (m_pSyntax->GetKeyword() != "xref")
607     return false;
608   std::vector<CrossRefObjData> result_objects;
609   while (1) {
610     FX_FILESIZE SavedPos = m_pSyntax->GetPos();
611     bool bIsNumber;
612     ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
613     if (word.IsEmpty()) {
614       return false;
615     }
616 
617     if (!bIsNumber) {
618       m_pSyntax->SetPos(SavedPos);
619       break;
620     }
621 
622     uint32_t start_objnum = FXSYS_atoui(word.c_str());
623     if (start_objnum >= kMaxObjectNumber)
624       return false;
625 
626     uint32_t count = m_pSyntax->GetDirectNum();
627     m_pSyntax->ToNextWord();
628     SavedPos = m_pSyntax->GetPos();
629 
630     if (!ParseAndAppendCrossRefSubsectionData(
631             start_objnum, count, out_objects ? &result_objects : nullptr)) {
632       return false;
633     }
634   }
635   if (out_objects)
636     *out_objects = std::move(result_objects);
637   return true;
638 }
639 
LoadCrossRefV4(FX_FILESIZE pos,bool bSkip)640 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
641                                  bool bSkip) {
642   m_pSyntax->SetPos(pos);
643   std::vector<CrossRefObjData> objects;
644   if (!ParseCrossRefV4(bSkip ? nullptr : &objects))
645     return false;
646 
647   MergeCrossRefObjectsData(objects);
648 
649   return true;
650 }
651 
MergeCrossRefObjectsData(const std::vector<CrossRefObjData> & objects)652 void CPDF_Parser::MergeCrossRefObjectsData(
653     const std::vector<CrossRefObjData>& objects) {
654   for (const auto& obj : objects) {
655     m_ObjectInfo[obj.obj_num] = obj.info;
656   }
657 }
658 
LoadAllCrossRefV5(FX_FILESIZE xrefpos)659 bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
660   if (!LoadCrossRefV5(&xrefpos, true))
661     return false;
662 
663   std::set<FX_FILESIZE> seen_xrefpos;
664   while (xrefpos) {
665     seen_xrefpos.insert(xrefpos);
666     if (!LoadCrossRefV5(&xrefpos, false))
667       return false;
668 
669     // Check for circular references.
670     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
671       return false;
672   }
673   m_ObjectStreamMap.clear();
674   m_bXRefStream = true;
675   return true;
676 }
677 
RebuildCrossRef()678 bool CPDF_Parser::RebuildCrossRef() {
679   m_ObjectInfo.clear();
680   m_TrailerData->Clear();
681 
682   ParserState state = ParserState::kDefault;
683   int32_t inside_index = 0;
684   uint32_t objnum = 0;
685   uint32_t gennum = 0;
686   int32_t depth = 0;
687   const uint32_t kBufferSize = 4096;
688   std::vector<uint8_t> buffer(kBufferSize);
689 
690   FX_FILESIZE pos = m_pSyntax->m_HeaderOffset;
691   FX_FILESIZE start_pos = 0;
692   FX_FILESIZE start_pos1 = 0;
693   FX_FILESIZE last_obj = -1;
694   FX_FILESIZE last_xref = -1;
695   FX_FILESIZE last_trailer = -1;
696 
697   while (pos < m_pSyntax->m_FileLen) {
698     const FX_FILESIZE saved_pos = pos;
699     bool bOverFlow = false;
700     uint32_t size =
701         std::min((uint32_t)(m_pSyntax->m_FileLen - pos), kBufferSize);
702     if (!m_pSyntax->GetFileAccess()->ReadBlock(buffer.data(), pos, size))
703       break;
704 
705     for (uint32_t i = 0; i < size; i++) {
706       uint8_t byte = buffer[i];
707       switch (state) {
708         case ParserState::kDefault:
709           if (PDFCharIsWhitespace(byte)) {
710             state = ParserState::kWhitespace;
711           } else if (std::isdigit(byte)) {
712             --i;
713             state = ParserState::kWhitespace;
714           } else if (byte == '%') {
715             inside_index = 0;
716             state = ParserState::kComment;
717           } else if (byte == '(') {
718             state = ParserState::kString;
719             depth = 1;
720           } else if (byte == '<') {
721             inside_index = 1;
722             state = ParserState::kHexString;
723           } else if (byte == '\\') {
724             state = ParserState::kEscapedString;
725           } else if (byte == 't') {
726             state = ParserState::kTrailer;
727             inside_index = 1;
728           }
729           break;
730 
731         case ParserState::kWhitespace:
732           if (std::isdigit(byte)) {
733             start_pos = pos + i;
734             state = ParserState::kObjNum;
735             objnum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
736           } else if (byte == 't') {
737             state = ParserState::kTrailer;
738             inside_index = 1;
739           } else if (byte == 'x') {
740             state = ParserState::kXref;
741             inside_index = 1;
742           } else if (!PDFCharIsWhitespace(byte)) {
743             --i;
744             state = ParserState::kDefault;
745           }
746           break;
747 
748         case ParserState::kObjNum:
749           if (std::isdigit(byte)) {
750             objnum = objnum * 10 +
751                      FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
752           } else if (PDFCharIsWhitespace(byte)) {
753             state = ParserState::kPostObjNum;
754           } else {
755             --i;
756             state = ParserState::kEndObj;
757             inside_index = 0;
758           }
759           break;
760 
761         case ParserState::kPostObjNum:
762           if (std::isdigit(byte)) {
763             start_pos1 = pos + i;
764             state = ParserState::kGenNum;
765             gennum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
766           } else if (byte == 't') {
767             state = ParserState::kTrailer;
768             inside_index = 1;
769           } else if (!PDFCharIsWhitespace(byte)) {
770             --i;
771             state = ParserState::kDefault;
772           }
773           break;
774 
775         case ParserState::kGenNum:
776           if (std::isdigit(byte)) {
777             gennum = gennum * 10 +
778                      FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
779           } else if (PDFCharIsWhitespace(byte)) {
780             state = ParserState::kPostGenNum;
781           } else {
782             --i;
783             state = ParserState::kDefault;
784           }
785           break;
786 
787         case ParserState::kPostGenNum:
788           if (byte == 'o') {
789             state = ParserState::kBeginObj;
790             inside_index = 1;
791           } else if (std::isdigit(byte)) {
792             objnum = gennum;
793             gennum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
794             start_pos = start_pos1;
795             start_pos1 = pos + i;
796             state = ParserState::kGenNum;
797           } else if (byte == 't') {
798             state = ParserState::kTrailer;
799             inside_index = 1;
800           } else if (!PDFCharIsWhitespace(byte)) {
801             --i;
802             state = ParserState::kDefault;
803           }
804           break;
805 
806         case ParserState::kBeginObj:
807           switch (inside_index) {
808             case 1:
809               if (byte != 'b') {
810                 --i;
811                 state = ParserState::kDefault;
812               } else {
813                 inside_index++;
814               }
815               break;
816             case 2:
817               if (byte != 'j') {
818                 --i;
819                 state = ParserState::kDefault;
820               } else {
821                 inside_index++;
822               }
823               break;
824             case 3:
825               if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
826                 FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset;
827                 last_obj = start_pos;
828                 FX_FILESIZE obj_end = 0;
829                 std::unique_ptr<CPDF_Object> pObject =
830                     ParseIndirectObjectAtByStrict(m_pDocument.Get(), obj_pos,
831                                                   objnum, &obj_end);
832                 if (CPDF_Stream* pStream = ToStream(pObject.get())) {
833                   if (CPDF_Dictionary* pDict = pStream->GetDict()) {
834                     if ((pDict->KeyExist("Type")) &&
835                         (pDict->GetStringFor("Type") == "XRef" &&
836                          pDict->KeyExist("Size"))) {
837                       CPDF_Object* pRoot = pDict->GetObjectFor("Root");
838                       if (pRoot && pRoot->GetDict() &&
839                           pRoot->GetDict()->GetObjectFor("Pages")) {
840                         m_TrailerData->SetMainTrailer(
841                             ToDictionary(pDict->Clone()));
842                       }
843                     }
844                   }
845                 }
846 
847                 FX_FILESIZE offset = 0;
848                 m_pSyntax->SetPos(obj_pos);
849                 offset = m_pSyntax->FindTag("obj", 0);
850                 if (offset == -1)
851                   offset = 0;
852                 else
853                   offset += 3;
854 
855                 FX_FILESIZE nLen = obj_end - obj_pos - offset;
856                 if ((uint32_t)nLen > size - i) {
857                   pos = obj_end + m_pSyntax->m_HeaderOffset;
858                   bOverFlow = true;
859                 } else {
860                   i += (uint32_t)nLen;
861                 }
862 
863                 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&
864                     m_ObjectInfo[objnum].pos) {
865                   if (pObject) {
866                     m_ObjectInfo[objnum].pos = obj_pos;
867                     m_ObjectInfo[objnum].gennum = gennum;
868                   }
869                 } else {
870                   m_ObjectInfo[objnum].pos = obj_pos;
871                   m_ObjectInfo[objnum].type = ObjectType::kNotCompressed;
872                   m_ObjectInfo[objnum].gennum = gennum;
873                 }
874               }
875               --i;
876               state = ParserState::kDefault;
877               break;
878           }
879           break;
880 
881         case ParserState::kTrailer:
882           if (inside_index == 7) {
883             if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
884               last_trailer = pos + i - 7;
885               m_pSyntax->SetPos(pos + i - m_pSyntax->m_HeaderOffset);
886 
887               std::unique_ptr<CPDF_Object> pObj =
888                   m_pSyntax->GetObjectBody(m_pDocument.Get());
889               if (pObj) {
890                 if (pObj->IsDictionary() || pObj->AsStream()) {
891                   CPDF_Stream* pStream = pObj->AsStream();
892                   if (CPDF_Dictionary* pTrailer =
893                           pStream ? pStream->GetDict() : pObj->AsDictionary()) {
894                     if (GetTrailer()) {
895                       CPDF_Object* pRoot = pTrailer->GetObjectFor("Root");
896                       CPDF_Reference* pRef = ToReference(pRoot);
897                       if (!pRoot ||
898                           (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) &&
899                            m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) {
900                         auto it = pTrailer->begin();
901                         while (it != pTrailer->end()) {
902                           const ByteString& key = it->first;
903                           CPDF_Object* pElement = it->second.get();
904                           ++it;
905                           uint32_t dwObjNum =
906                               pElement ? pElement->GetObjNum() : 0;
907                           if (dwObjNum) {
908                             GetTrailer()->SetNewFor<CPDF_Reference>(
909                                 key, m_pDocument.Get(), dwObjNum);
910                           } else {
911                             GetTrailer()->SetFor(key, pElement->Clone());
912                           }
913                         }
914                       }
915                     } else {
916                       m_TrailerData->SetMainTrailer(
917                           ToDictionary(pObj->IsStream() ? pTrailer->Clone()
918                                                         : std::move(pObj)));
919 
920                       FX_FILESIZE dwSavePos = m_pSyntax->GetPos();
921                       ByteString strWord = m_pSyntax->GetKeyword();
922                       if (!strWord.Compare("startxref")) {
923                         bool bNumber;
924                         ByteString bsOffset = m_pSyntax->GetNextWord(&bNumber);
925                         if (bNumber)
926                           m_LastXRefOffset = FXSYS_atoi(bsOffset.c_str());
927                       }
928                       m_pSyntax->SetPos(dwSavePos);
929                     }
930                   }
931                 }
932               }
933             }
934             --i;
935             state = ParserState::kDefault;
936           } else if (byte == "trailer"[inside_index]) {
937             inside_index++;
938           } else {
939             --i;
940             state = ParserState::kDefault;
941           }
942           break;
943 
944         case ParserState::kXref:
945           if (inside_index == 4) {
946             last_xref = pos + i - 4;
947             state = ParserState::kWhitespace;
948           } else if (byte == "xref"[inside_index]) {
949             inside_index++;
950           } else {
951             --i;
952             state = ParserState::kDefault;
953           }
954           break;
955 
956         case ParserState::kComment:
957           if (PDFCharIsLineEnding(byte))
958             state = ParserState::kDefault;
959           break;
960 
961         case ParserState::kString:
962           if (byte == ')') {
963             if (depth > 0)
964               depth--;
965           } else if (byte == '(') {
966             depth++;
967           }
968 
969           if (!depth)
970             state = ParserState::kDefault;
971           break;
972 
973         case ParserState::kHexString:
974           if (byte == '>' || (byte == '<' && inside_index == 1))
975             state = ParserState::kDefault;
976           inside_index = 0;
977           break;
978 
979         case ParserState::kEscapedString:
980           if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
981             --i;
982             state = ParserState::kDefault;
983           }
984           break;
985 
986         case ParserState::kEndObj:
987           if (PDFCharIsWhitespace(byte)) {
988             state = ParserState::kDefault;
989           } else if (byte == '%' || byte == '(' || byte == '<' ||
990                      byte == '\\') {
991             state = ParserState::kDefault;
992             --i;
993           } else if (inside_index == 6) {
994             state = ParserState::kDefault;
995             --i;
996           } else if (byte == "endobj"[inside_index]) {
997             inside_index++;
998           }
999           break;
1000       }
1001 
1002       if (bOverFlow) {
1003         size = 0;
1004         break;
1005       }
1006     }
1007     pos += size;
1008 
1009     // If the position has not changed at all or went backwards in a loop
1010     // iteration, then break out to prevent infinite looping.
1011     if (pos <= saved_pos)
1012       break;
1013   }
1014 
1015   if (last_xref != -1 && last_xref > last_obj)
1016     last_trailer = last_xref;
1017   else if (last_trailer == -1 || last_xref < last_obj)
1018     last_trailer = m_pSyntax->m_FileLen;
1019 
1020   return GetTrailer() && !m_ObjectInfo.empty();
1021 }
1022 
LoadCrossRefV5(FX_FILESIZE * pos,bool bMainXRef)1023 bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) {
1024   std::unique_ptr<CPDF_Object> pObject(
1025       ParseIndirectObjectAt(m_pDocument.Get(), *pos, 0));
1026   if (!pObject)
1027     return false;
1028 
1029   uint32_t objnum = pObject->GetObjNum();
1030   if (!objnum)
1031     return false;
1032 
1033   CPDF_Object* pUnownedObject = pObject.get();
1034   if (m_pDocument) {
1035     const CPDF_Dictionary* pRootDict = m_pDocument->GetRoot();
1036     if (pRootDict && pRootDict->GetObjNum() == objnum)
1037       return false;
1038     if (!m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
1039             objnum, std::move(pObject))) {
1040       return false;
1041     }
1042   }
1043 
1044   CPDF_Stream* pStream = pUnownedObject->AsStream();
1045   if (!pStream)
1046     return false;
1047 
1048   CPDF_Dictionary* pDict = pStream->GetDict();
1049   *pos = pDict->GetIntegerFor("Prev");
1050   int32_t size = pDict->GetIntegerFor("Size");
1051   if (size < 0)
1052     return false;
1053 
1054   std::unique_ptr<CPDF_Dictionary> pNewTrailer = ToDictionary(pDict->Clone());
1055   if (bMainXRef) {
1056     m_TrailerData->SetMainTrailer(std::move(pNewTrailer));
1057     ShrinkObjectMap(size);
1058     for (auto& it : m_ObjectInfo)
1059       it.second.type = ObjectType::kFree;
1060   } else {
1061     m_TrailerData->AppendTrailer(std::move(pNewTrailer));
1062   }
1063 
1064   std::vector<std::pair<int32_t, int32_t>> arrIndex;
1065   CPDF_Array* pArray = pDict->GetArrayFor("Index");
1066   if (pArray) {
1067     for (size_t i = 0; i < pArray->GetCount() / 2; i++) {
1068       CPDF_Object* pStartNumObj = pArray->GetObjectAt(i * 2);
1069       CPDF_Object* pCountObj = pArray->GetObjectAt(i * 2 + 1);
1070 
1071       if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) {
1072         int nStartNum = pStartNumObj->GetInteger();
1073         int nCount = pCountObj->GetInteger();
1074         if (nStartNum >= 0 && nCount > 0)
1075           arrIndex.push_back(std::make_pair(nStartNum, nCount));
1076       }
1077     }
1078   }
1079 
1080   if (arrIndex.size() == 0)
1081     arrIndex.push_back(std::make_pair(0, size));
1082 
1083   pArray = pDict->GetArrayFor("W");
1084   if (!pArray)
1085     return false;
1086 
1087   std::vector<uint32_t> WidthArray;
1088   FX_SAFE_UINT32 dwAccWidth = 0;
1089   for (size_t i = 0; i < pArray->GetCount(); ++i) {
1090     WidthArray.push_back(pArray->GetIntegerAt(i));
1091     dwAccWidth += WidthArray[i];
1092   }
1093 
1094   if (!dwAccWidth.IsValid() || WidthArray.size() < 3)
1095     return false;
1096 
1097   uint32_t totalWidth = dwAccWidth.ValueOrDie();
1098   auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
1099   pAcc->LoadAllDataFiltered();
1100 
1101   const uint8_t* pData = pAcc->GetData();
1102   uint32_t dwTotalSize = pAcc->GetSize();
1103   uint32_t segindex = 0;
1104   for (uint32_t i = 0; i < arrIndex.size(); i++) {
1105     int32_t startnum = arrIndex[i].first;
1106     if (startnum < 0)
1107       continue;
1108 
1109     uint32_t count = pdfium::base::checked_cast<uint32_t>(arrIndex[i].second);
1110     FX_SAFE_UINT32 dwCaculatedSize = segindex;
1111     dwCaculatedSize += count;
1112     dwCaculatedSize *= totalWidth;
1113     if (!dwCaculatedSize.IsValid() ||
1114         dwCaculatedSize.ValueOrDie() > dwTotalSize) {
1115       continue;
1116     }
1117 
1118     const uint8_t* segstart = pData + segindex * totalWidth;
1119     FX_SAFE_UINT32 dwMaxObjNum = startnum;
1120     dwMaxObjNum += count;
1121     uint32_t dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1;
1122     if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size)
1123       continue;
1124 
1125     for (uint32_t j = 0; j < count; j++) {
1126       ObjectType type = ObjectType::kNotCompressed;
1127       const uint8_t* entrystart = segstart + j * totalWidth;
1128       if (WidthArray[0]) {
1129         const int cross_ref_stream_obj_type =
1130             GetVarInt(entrystart, WidthArray[0]);
1131         type = GetObjectTypeFromCrossRefStreamType(cross_ref_stream_obj_type);
1132       }
1133 
1134       if (GetObjectType(startnum + j) == ObjectType::kNull) {
1135         FX_FILESIZE offset =
1136             GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1137         m_ObjectInfo[startnum + j].pos = offset;
1138         continue;
1139       }
1140 
1141       if (GetObjectType(startnum + j) != ObjectType::kFree)
1142         continue;
1143 
1144       ObjectInfo& info = m_ObjectInfo[startnum + j];
1145 
1146       info.type = type;
1147       if (type == ObjectType::kFree) {
1148         info.pos = 0;
1149       } else {
1150         const FX_FILESIZE entry_value =
1151             GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1152         if (type == ObjectType::kNotCompressed) {
1153           const auto object_offset = entry_value;
1154           info.pos = object_offset;
1155         } else {
1156           const auto archive_obj_num = entry_value;
1157           info.archive_obj_num = archive_obj_num;
1158           if (archive_obj_num < 0 || !IsValidObjectNumber(archive_obj_num))
1159             return false;
1160           m_ObjectInfo[archive_obj_num].type = ObjectType::kNull;
1161         }
1162       }
1163     }
1164     segindex += count;
1165   }
1166   return true;
1167 }
1168 
GetIDArray() const1169 const CPDF_Array* CPDF_Parser::GetIDArray() const {
1170   return GetTrailer() ? GetTrailer()->GetArrayFor("ID") : nullptr;
1171 }
1172 
GetTrailer() const1173 CPDF_Dictionary* CPDF_Parser::GetTrailer() const {
1174   return m_TrailerData->GetMainTrailer();
1175 }
1176 
GetCombinedTrailer() const1177 std::unique_ptr<CPDF_Dictionary> CPDF_Parser::GetCombinedTrailer() const {
1178   return m_TrailerData->GetCombinedTrailer();
1179 }
1180 
GetInfoObjNum()1181 uint32_t CPDF_Parser::GetInfoObjNum() {
1182   return m_TrailerData->GetInfoObjNum();
1183 }
1184 
GetRootObjNum()1185 uint32_t CPDF_Parser::GetRootObjNum() {
1186   return m_TrailerData->GetRootObjNum();
1187 }
1188 
ParseIndirectObject(CPDF_IndirectObjectHolder * pObjList,uint32_t objnum)1189 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObject(
1190     CPDF_IndirectObjectHolder* pObjList,
1191     uint32_t objnum) {
1192   if (!IsValidObjectNumber(objnum))
1193     return nullptr;
1194 
1195   // Prevent circular parsing the same object.
1196   if (pdfium::ContainsKey(m_ParsingObjNums, objnum))
1197     return nullptr;
1198 
1199   pdfium::ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum);
1200   if (GetObjectType(objnum) == ObjectType::kNotCompressed ||
1201       GetObjectType(objnum) == ObjectType::kNull) {
1202     FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1203     if (pos <= 0)
1204       return nullptr;
1205     return ParseIndirectObjectAt(pObjList, pos, objnum);
1206   }
1207   if (GetObjectType(objnum) != ObjectType::kCompressed)
1208     return nullptr;
1209 
1210   RetainPtr<CPDF_StreamAcc> pObjStream =
1211       GetObjectStream(m_ObjectInfo[objnum].pos);
1212   if (!pObjStream)
1213     return nullptr;
1214 
1215   auto file = pdfium::MakeRetain<CFX_MemoryStream>(
1216       const_cast<uint8_t*>(pObjStream->GetData()),
1217       static_cast<size_t>(pObjStream->GetSize()), false);
1218   CPDF_SyntaxParser syntax;
1219   syntax.InitParser(file, 0);
1220   const int32_t offset = GetStreamFirst(pObjStream);
1221 
1222   // Read object numbers from |pObjStream| into a cache.
1223   if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) {
1224     for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) {
1225       uint32_t thisnum = syntax.GetDirectNum();
1226       uint32_t thisoff = syntax.GetDirectNum();
1227       m_ObjCache[pObjStream][thisnum] = thisoff;
1228     }
1229   }
1230 
1231   const auto it = m_ObjCache[pObjStream].find(objnum);
1232   if (it == m_ObjCache[pObjStream].end())
1233     return nullptr;
1234 
1235   syntax.SetPos(offset + it->second);
1236   return syntax.GetObjectBody(pObjList);
1237 }
1238 
GetObjectStream(uint32_t objnum)1239 RetainPtr<CPDF_StreamAcc> CPDF_Parser::GetObjectStream(uint32_t objnum) {
1240   auto it = m_ObjectStreamMap.find(objnum);
1241   if (it != m_ObjectStreamMap.end())
1242     return it->second;
1243 
1244   if (!m_pDocument)
1245     return nullptr;
1246 
1247   const CPDF_Stream* pStream =
1248       ToStream(m_pDocument->GetOrParseIndirectObject(objnum));
1249   if (!pStream)
1250     return nullptr;
1251 
1252   auto pStreamAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
1253   pStreamAcc->LoadAllDataFiltered();
1254   m_ObjectStreamMap[objnum] = pStreamAcc;
1255   return pStreamAcc;
1256 }
1257 
ParseIndirectObjectAt(CPDF_IndirectObjectHolder * pObjList,FX_FILESIZE pos,uint32_t objnum)1258 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAt(
1259     CPDF_IndirectObjectHolder* pObjList,
1260     FX_FILESIZE pos,
1261     uint32_t objnum) {
1262   return ParseIndirectObjectAtInternal(
1263       pObjList, pos, objnum, CPDF_SyntaxParser::ParseType::kLoose, nullptr);
1264 }
1265 
ParseIndirectObjectAtInternal(CPDF_IndirectObjectHolder * pObjList,FX_FILESIZE pos,uint32_t objnum,CPDF_SyntaxParser::ParseType parse_type,FX_FILESIZE * pResultPos)1266 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAtInternal(
1267     CPDF_IndirectObjectHolder* pObjList,
1268     FX_FILESIZE pos,
1269     uint32_t objnum,
1270     CPDF_SyntaxParser::ParseType parse_type,
1271     FX_FILESIZE* pResultPos) {
1272   const FX_FILESIZE saved_pos = m_pSyntax->GetPos();
1273   m_pSyntax->SetPos(pos);
1274   auto result = m_pSyntax->GetIndirectObject(pObjList, parse_type);
1275 
1276   if (pResultPos)
1277     *pResultPos = m_pSyntax->GetPos();
1278   m_pSyntax->SetPos(saved_pos);
1279 
1280   if (result && objnum && result->GetObjNum() != objnum)
1281     return nullptr;
1282 
1283   const bool should_decrypt = m_pSecurityHandler &&
1284                               m_pSecurityHandler->GetCryptoHandler() &&
1285                               objnum != m_MetadataObjnum;
1286   if (should_decrypt)
1287     result = m_pSecurityHandler->GetCryptoHandler()->DecryptObjectTree(
1288         std::move(result));
1289 
1290   return result;
1291 }
1292 
ParseIndirectObjectAtByStrict(CPDF_IndirectObjectHolder * pObjList,FX_FILESIZE pos,uint32_t objnum,FX_FILESIZE * pResultPos)1293 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAtByStrict(
1294     CPDF_IndirectObjectHolder* pObjList,
1295     FX_FILESIZE pos,
1296     uint32_t objnum,
1297     FX_FILESIZE* pResultPos) {
1298   return ParseIndirectObjectAtInternal(
1299       pObjList, pos, objnum, CPDF_SyntaxParser::ParseType::kStrict, pResultPos);
1300 }
1301 
GetFirstPageNo() const1302 uint32_t CPDF_Parser::GetFirstPageNo() const {
1303   return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0;
1304 }
1305 
LoadTrailerV4()1306 std::unique_ptr<CPDF_Dictionary> CPDF_Parser::LoadTrailerV4() {
1307   if (m_pSyntax->GetKeyword() != "trailer")
1308     return nullptr;
1309 
1310   return ToDictionary(m_pSyntax->GetObjectBody(m_pDocument.Get()));
1311 }
1312 
GetPermissions() const1313 uint32_t CPDF_Parser::GetPermissions() const {
1314   if (!m_pSecurityHandler)
1315     return 0xFFFFFFFF;
1316 
1317   uint32_t dwPermission = m_pSecurityHandler->GetPermissions();
1318   if (m_pEncryptDict && m_pEncryptDict->GetStringFor("Filter") == "Standard") {
1319     // See PDF Reference 1.7, page 123, table 3.20.
1320     dwPermission &= 0xFFFFFFFC;
1321     dwPermission |= 0xFFFFF0C0;
1322   }
1323   return dwPermission;
1324 }
1325 
ParseLinearizedHeader()1326 std::unique_ptr<CPDF_LinearizedHeader> CPDF_Parser::ParseLinearizedHeader() {
1327   return CPDF_LinearizedHeader::Parse(m_pSyntax.get());
1328 }
1329 
StartLinearizedParse(const RetainPtr<IFX_SeekableReadStream> & pFileAccess,CPDF_Document * pDocument)1330 CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
1331     const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
1332     CPDF_Document* pDocument) {
1333   ASSERT(!m_bHasParsed);
1334   m_bXRefStream = false;
1335   m_LastXRefOffset = 0;
1336 
1337   if (!InitSyntaxParser(pFileAccess))
1338     return FORMAT_ERROR;
1339 
1340   m_pLinearized = ParseLinearizedHeader();
1341   if (!m_pLinearized)
1342     return StartParseInternal(std::move(pDocument));
1343 
1344   m_bHasParsed = true;
1345   m_pDocument = pDocument;
1346 
1347   m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
1348   FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset;
1349   bool bXRefRebuilt = false;
1350   bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, false);
1351   if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) {
1352     if (!RebuildCrossRef())
1353       return FORMAT_ERROR;
1354 
1355     bXRefRebuilt = true;
1356     m_LastXRefOffset = 0;
1357   }
1358   if (bLoadV4) {
1359     std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4();
1360     if (!trailer)
1361       return SUCCESS;
1362 
1363     m_TrailerData->SetMainTrailer(std::move(trailer));
1364     int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size");
1365     if (xrefsize > 0)
1366       ShrinkObjectMap(xrefsize);
1367   }
1368 
1369   Error eRet = SetEncryptHandler();
1370   if (eRet != SUCCESS)
1371     return eRet;
1372 
1373   m_pDocument->LoadLinearizedDoc(m_pLinearized.get());
1374   if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
1375     if (bXRefRebuilt)
1376       return FORMAT_ERROR;
1377 
1378     ReleaseEncryptHandler();
1379     if (!RebuildCrossRef())
1380       return FORMAT_ERROR;
1381 
1382     eRet = SetEncryptHandler();
1383     if (eRet != SUCCESS)
1384       return eRet;
1385 
1386     m_pDocument->LoadLinearizedDoc(m_pLinearized.get());
1387     if (!m_pDocument->GetRoot())
1388       return FORMAT_ERROR;
1389   }
1390 
1391   if (GetRootObjNum() == 0) {
1392     ReleaseEncryptHandler();
1393     if (!RebuildCrossRef() || GetRootObjNum() == 0)
1394       return FORMAT_ERROR;
1395 
1396     eRet = SetEncryptHandler();
1397     if (eRet != SUCCESS)
1398       return eRet;
1399   }
1400 
1401   if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1402     if (CPDF_Reference* pMetadata =
1403             ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata")))
1404       m_MetadataObjnum = pMetadata->GetRefObjNum();
1405   }
1406   return SUCCESS;
1407 }
1408 
LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos)1409 bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
1410   if (!LoadCrossRefV5(&xrefpos, false))
1411     return false;
1412 
1413   std::set<FX_FILESIZE> seen_xrefpos;
1414   while (xrefpos) {
1415     seen_xrefpos.insert(xrefpos);
1416     if (!LoadCrossRefV5(&xrefpos, false))
1417       return false;
1418 
1419     // Check for circular references.
1420     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
1421       return false;
1422   }
1423   m_ObjectStreamMap.clear();
1424   m_bXRefStream = true;
1425   return true;
1426 }
1427 
LoadLinearizedMainXRefTable()1428 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
1429   const FX_SAFE_FILESIZE main_xref_offset = GetTrailer()->GetIntegerFor("Prev");
1430   if (!main_xref_offset.IsValid())
1431     return FORMAT_ERROR;
1432 
1433   if (main_xref_offset.ValueOrDie() == 0)
1434     return SUCCESS;
1435 
1436   const AutoRestorer<uint32_t> save_metadata_objnum(&m_MetadataObjnum);
1437   m_MetadataObjnum = 0;
1438   m_ObjectStreamMap.clear();
1439   m_ObjCache.clear();
1440 
1441   if (!LoadLinearizedAllCrossRefV4(main_xref_offset.ValueOrDie()) &&
1442       !LoadLinearizedAllCrossRefV5(main_xref_offset.ValueOrDie())) {
1443     m_LastXRefOffset = 0;
1444     return FORMAT_ERROR;
1445   }
1446 
1447   return SUCCESS;
1448 }
1449 
GetObjectTypeFromCrossRefStreamType(int cross_ref_stream_type) const1450 CPDF_Parser::ObjectType CPDF_Parser::GetObjectTypeFromCrossRefStreamType(
1451     int cross_ref_stream_type) const {
1452   switch (cross_ref_stream_type) {
1453     case 0:
1454       return CPDF_Parser::ObjectType::kFree;
1455     case 1:
1456       return CPDF_Parser::ObjectType::kNotCompressed;
1457     case 2:
1458       return CPDF_Parser::ObjectType::kCompressed;
1459     default:
1460       return CPDF_Parser::ObjectType::kNull;
1461   }
1462 }
1463