1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_parser.h"
8 
9 #include <algorithm>
10 #include <utility>
11 #include <vector>
12 
13 #include "core/fpdfapi/parser/cpdf_array.h"
14 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
16 #include "core/fpdfapi/parser/cpdf_document.h"
17 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
18 #include "core/fpdfapi/parser/cpdf_number.h"
19 #include "core/fpdfapi/parser/cpdf_object_stream.h"
20 #include "core/fpdfapi/parser/cpdf_read_validator.h"
21 #include "core/fpdfapi/parser/cpdf_reference.h"
22 #include "core/fpdfapi/parser/cpdf_security_handler.h"
23 #include "core/fpdfapi/parser/cpdf_stream.h"
24 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
25 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
26 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
27 #include "core/fxcrt/autorestorer.h"
28 #include "core/fxcrt/fx_extension.h"
29 #include "core/fxcrt/fx_safe_types.h"
30 #include "third_party/base/ptr_util.h"
31 #include "third_party/base/stl_util.h"
32 
33 namespace {
34 
35 // A limit on the size of the xref table. Theoretical limits are higher, but
36 // this may be large enough in practice.
37 const int32_t kMaxXRefSize = 1048576;
38 
39 // "%PDF-1.7\n"
40 constexpr FX_FILESIZE kPDFHeaderSize = 9;
41 
GetVarInt(const uint8_t * p,int32_t n)42 uint32_t GetVarInt(const uint8_t* p, int32_t n) {
43   uint32_t result = 0;
44   for (int32_t i = 0; i < n; ++i)
45     result = result * 256 + p[i];
46   return result;
47 }
48 
49 class ObjectsHolderStub final : public CPDF_Parser::ParsedObjectsHolder {
50  public:
51   ObjectsHolderStub() = default;
52   ~ObjectsHolderStub() override = default;
TryInit()53   bool TryInit() override { return true; }
54 };
55 
56 }  // namespace
57 
CPDF_Parser(ParsedObjectsHolder * holder)58 CPDF_Parser::CPDF_Parser(ParsedObjectsHolder* holder)
59     : m_pObjectsHolder(holder),
60       m_CrossRefTable(pdfium::MakeUnique<CPDF_CrossRefTable>()) {
61   if (!holder) {
62     m_pOwnedObjectsHolder = pdfium::MakeUnique<ObjectsHolderStub>();
63     m_pObjectsHolder = m_pOwnedObjectsHolder.get();
64   }
65 }
66 
CPDF_Parser()67 CPDF_Parser::CPDF_Parser() : CPDF_Parser(nullptr) {}
68 
~CPDF_Parser()69 CPDF_Parser::~CPDF_Parser() {
70   ReleaseEncryptHandler();
71 }
72 
GetLastObjNum() const73 uint32_t CPDF_Parser::GetLastObjNum() const {
74   return m_CrossRefTable->objects_info().empty()
75              ? 0
76              : m_CrossRefTable->objects_info().rbegin()->first;
77 }
78 
IsValidObjectNumber(uint32_t objnum) const79 bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const {
80   return objnum <= GetLastObjNum();
81 }
82 
GetObjectPositionOrZero(uint32_t objnum) const83 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const {
84   const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
85   return (info && info->type == ObjectType::kNormal) ? info->pos : 0;
86 }
87 
GetObjectType(uint32_t objnum) const88 CPDF_Parser::ObjectType CPDF_Parser::GetObjectType(uint32_t objnum) const {
89   ASSERT(IsValidObjectNumber(objnum));
90   const auto* info = m_CrossRefTable->GetObjectInfo(objnum);
91   return info ? info->type : ObjectType::kFree;
92 }
93 
IsObjectFreeOrNull(uint32_t objnum) const94 bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const {
95   switch (GetObjectType(objnum)) {
96     case ObjectType::kFree:
97     case ObjectType::kNull:
98       return true;
99     case ObjectType::kNotCompressed:
100     case ObjectType::kCompressed:
101       return false;
102   }
103   NOTREACHED();
104   return false;
105 }
106 
IsObjectFree(uint32_t objnum) const107 bool CPDF_Parser::IsObjectFree(uint32_t objnum) const {
108   return GetObjectType(objnum) == ObjectType::kFree;
109 }
110 
ShrinkObjectMap(uint32_t size)111 void CPDF_Parser::ShrinkObjectMap(uint32_t size) {
112   m_CrossRefTable->ShrinkObjectMap(size);
113 }
114 
InitSyntaxParser(const RetainPtr<CPDF_ReadValidator> & validator)115 bool CPDF_Parser::InitSyntaxParser(
116     const RetainPtr<CPDF_ReadValidator>& validator) {
117   const Optional<FX_FILESIZE> header_offset = GetHeaderOffset(validator);
118   if (!header_offset)
119     return false;
120   if (validator->GetSize() < *header_offset + kPDFHeaderSize)
121     return false;
122 
123   m_pSyntax = pdfium::MakeUnique<CPDF_SyntaxParser>(validator, *header_offset);
124   return ParseFileVersion();
125 }
126 
ParseFileVersion()127 bool CPDF_Parser::ParseFileVersion() {
128   m_FileVersion = 0;
129   uint8_t ch;
130   if (!m_pSyntax->GetCharAt(5, ch))
131     return false;
132 
133   if (std::isdigit(ch))
134     m_FileVersion = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)) * 10;
135 
136   if (!m_pSyntax->GetCharAt(7, ch))
137     return false;
138 
139   if (std::isdigit(ch))
140     m_FileVersion += FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
141   return true;
142 }
143 
StartParse(const RetainPtr<IFX_SeekableReadStream> & pFileAccess,const char * password)144 CPDF_Parser::Error CPDF_Parser::StartParse(
145     const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
146     const char* password) {
147   if (!InitSyntaxParser(
148           pdfium::MakeRetain<CPDF_ReadValidator>(pFileAccess, nullptr)))
149     return FORMAT_ERROR;
150   SetPassword(password);
151   return StartParseInternal();
152 }
153 
StartParseInternal()154 CPDF_Parser::Error CPDF_Parser::StartParseInternal() {
155   ASSERT(!m_bHasParsed);
156   ASSERT(!m_bXRefTableRebuilt);
157   m_bHasParsed = true;
158   m_bXRefStream = false;
159 
160   m_LastXRefOffset = ParseStartXRef();
161   if (m_LastXRefOffset >= kPDFHeaderSize) {
162     if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
163         !LoadAllCrossRefV5(m_LastXRefOffset)) {
164       if (!RebuildCrossRef())
165         return FORMAT_ERROR;
166 
167       m_bXRefTableRebuilt = true;
168       m_LastXRefOffset = 0;
169     }
170   } else {
171     if (!RebuildCrossRef())
172       return FORMAT_ERROR;
173 
174     m_bXRefTableRebuilt = true;
175   }
176   Error eRet = SetEncryptHandler();
177   if (eRet != SUCCESS)
178     return eRet;
179 
180   if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
181     if (m_bXRefTableRebuilt)
182       return FORMAT_ERROR;
183 
184     ReleaseEncryptHandler();
185     if (!RebuildCrossRef())
186       return FORMAT_ERROR;
187 
188     eRet = SetEncryptHandler();
189     if (eRet != SUCCESS)
190       return eRet;
191 
192     m_pObjectsHolder->TryInit();
193     if (!GetRoot())
194       return FORMAT_ERROR;
195   }
196   if (GetRootObjNum() == CPDF_Object::kInvalidObjNum) {
197     ReleaseEncryptHandler();
198     if (!RebuildCrossRef() || GetRootObjNum() == CPDF_Object::kInvalidObjNum)
199       return FORMAT_ERROR;
200 
201     eRet = SetEncryptHandler();
202     if (eRet != SUCCESS)
203       return eRet;
204   }
205   if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
206     CPDF_Reference* pMetadata =
207         ToReference(GetRoot()->GetObjectFor("Metadata"));
208     if (pMetadata)
209       m_MetadataObjnum = pMetadata->GetRefObjNum();
210   }
211   return SUCCESS;
212 }
213 
ParseStartXRef()214 FX_FILESIZE CPDF_Parser::ParseStartXRef() {
215   static constexpr char kStartXRefKeyword[] = "startxref";
216   m_pSyntax->SetPos(m_pSyntax->GetDocumentSize() - strlen(kStartXRefKeyword));
217   if (!m_pSyntax->BackwardsSearchToWord(kStartXRefKeyword, 4096))
218     return 0;
219 
220   // Skip "startxref" keyword.
221   m_pSyntax->GetKeyword();
222 
223   // Read XRef offset.
224   bool bNumber;
225   const ByteString xref_offset_str = m_pSyntax->GetNextWord(&bNumber);
226   if (!bNumber || xref_offset_str.IsEmpty())
227     return 0;
228 
229   const FX_SAFE_FILESIZE result = FXSYS_atoi64(xref_offset_str.c_str());
230   if (!result.IsValid() || result.ValueOrDie() >= m_pSyntax->GetDocumentSize())
231     return 0;
232 
233   return result.ValueOrDie();
234 }
235 
SetEncryptHandler()236 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
237   ReleaseEncryptHandler();
238   if (!GetTrailer())
239     return FORMAT_ERROR;
240 
241   const CPDF_Dictionary* pEncryptDict = GetEncryptDict();
242   if (!pEncryptDict)
243     return SUCCESS;
244 
245   if (pEncryptDict->GetStringFor("Filter") != "Standard")
246     return HANDLER_ERROR;
247 
248   auto pSecurityHandler = pdfium::MakeRetain<CPDF_SecurityHandler>();
249   if (!pSecurityHandler->OnInit(pEncryptDict, GetIDArray(), GetPassword()))
250     return PASSWORD_ERROR;
251 
252   m_pSecurityHandler = std::move(pSecurityHandler);
253   return SUCCESS;
254 }
255 
ReleaseEncryptHandler()256 void CPDF_Parser::ReleaseEncryptHandler() {
257   m_pSecurityHandler.Reset();
258 }
259 
260 // Ideally, all the cross reference entries should be verified.
261 // In reality, we rarely see well-formed cross references don't match
262 // with the objects. crbug/602650 showed a case where object numbers
263 // in the cross reference table are all off by one.
VerifyCrossRefV4()264 bool CPDF_Parser::VerifyCrossRefV4() {
265   for (const auto& it : m_CrossRefTable->objects_info()) {
266     if (it.second.pos == 0)
267       continue;
268     // Find the first non-zero position.
269     FX_FILESIZE SavedPos = m_pSyntax->GetPos();
270     m_pSyntax->SetPos(it.second.pos);
271     bool is_num = false;
272     ByteString num_str = m_pSyntax->GetNextWord(&is_num);
273     m_pSyntax->SetPos(SavedPos);
274     if (!is_num || num_str.IsEmpty() ||
275         FXSYS_atoui(num_str.c_str()) != it.first) {
276       // If the object number read doesn't match the one stored,
277       // something is wrong with the cross reference table.
278       return false;
279     }
280     break;
281   }
282   return true;
283 }
284 
LoadAllCrossRefV4(FX_FILESIZE xref_offset)285 bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xref_offset) {
286   if (!LoadCrossRefV4(xref_offset, true))
287     return false;
288 
289   RetainPtr<CPDF_Dictionary> trailer = LoadTrailerV4();
290   if (!trailer)
291     return false;
292 
293   m_CrossRefTable->SetTrailer(std::move(trailer));
294   int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size");
295   if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
296     ShrinkObjectMap(xrefsize);
297 
298   std::vector<FX_FILESIZE> xref_stream_list{
299       GetDirectInteger(GetTrailer(), "XRefStm")};
300   std::vector<FX_FILESIZE> xref_list{xref_offset};
301   std::set<FX_FILESIZE> seen_xref_offset{xref_offset};
302 
303   // When the trailer doesn't have Prev entry or Prev entry value is not
304   // numerical, GetDirectInteger() returns 0. Loading will end.
305   xref_offset = GetDirectInteger(GetTrailer(), "Prev");
306   while (xref_offset) {
307     // Check for circular references.
308     if (pdfium::ContainsKey(seen_xref_offset, xref_offset))
309       return false;
310 
311     seen_xref_offset.insert(xref_offset);
312 
313     // SLOW ...
314     xref_list.insert(xref_list.begin(), xref_offset);
315     LoadCrossRefV4(xref_offset, true);
316 
317     RetainPtr<CPDF_Dictionary> pDict(LoadTrailerV4());
318     if (!pDict)
319       return false;
320 
321     xref_offset = GetDirectInteger(pDict.Get(), "Prev");
322 
323     // SLOW ...
324     xref_stream_list.insert(xref_stream_list.begin(),
325                             pDict->GetIntegerFor("XRefStm"));
326 
327     m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
328         pdfium::MakeUnique<CPDF_CrossRefTable>(std::move(pDict)),
329         std::move(m_CrossRefTable));
330   }
331 
332   for (size_t i = 0; i < xref_list.size(); ++i) {
333     if (!LoadCrossRefV4(xref_list[i], false))
334       return false;
335 
336     if (xref_stream_list[i] && !LoadCrossRefV5(&xref_stream_list[i], false))
337       return false;
338 
339     if (i == 0 && !VerifyCrossRefV4())
340       return false;
341   }
342   return true;
343 }
344 
LoadLinearizedAllCrossRefV4(FX_FILESIZE main_xref_offset)345 bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE main_xref_offset) {
346   if (!LoadCrossRefV4(main_xref_offset, false))
347     return false;
348 
349   RetainPtr<CPDF_Dictionary> main_trailer = LoadTrailerV4();
350   if (!main_trailer)
351     return false;
352 
353   // GetTrailer() currently returns the first-page trailer.
354   if (GetDirectInteger(GetTrailer(), "Size") == 0)
355     return false;
356 
357   // Read /XRefStm from the first-page trailer. No need to read /Prev for the
358   // first-page trailer, as the caller already did that and passed it in as
359   // |main_xref_offset|.
360   std::vector<FX_FILESIZE> xref_stream_list{
361       GetDirectInteger(GetTrailer(), "XRefStm")};
362   std::vector<FX_FILESIZE> xref_list{main_xref_offset};
363   std::set<FX_FILESIZE> seen_xref_offset{main_xref_offset};
364 
365   // Merge the trailers.
366   m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
367       pdfium::MakeUnique<CPDF_CrossRefTable>(std::move(main_trailer)),
368       std::move(m_CrossRefTable));
369 
370   // Now GetTrailer() returns the merged trailer, where /Prev is from the
371   // main-trailer.
372   FX_FILESIZE xref_offset = GetDirectInteger(GetTrailer(), "Prev");
373   while (xref_offset) {
374     // Check for circular references.
375     if (pdfium::ContainsKey(seen_xref_offset, xref_offset))
376       return false;
377 
378     seen_xref_offset.insert(xref_offset);
379 
380     // SLOW ...
381     xref_list.insert(xref_list.begin(), xref_offset);
382     LoadCrossRefV4(xref_offset, true);
383 
384     RetainPtr<CPDF_Dictionary> pDict(LoadTrailerV4());
385     if (!pDict)
386       return false;
387 
388     xref_offset = GetDirectInteger(pDict.Get(), "Prev");
389 
390     // SLOW ...
391     xref_stream_list.insert(xref_stream_list.begin(),
392                             pDict->GetIntegerFor("XRefStm"));
393 
394     m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
395         pdfium::MakeUnique<CPDF_CrossRefTable>(std::move(pDict)),
396         std::move(m_CrossRefTable));
397   }
398 
399   if (xref_stream_list[0] && !LoadCrossRefV5(&xref_stream_list[0], false))
400     return false;
401 
402   for (size_t i = 1; i < xref_list.size(); ++i) {
403     if (!LoadCrossRefV4(xref_list[i], false))
404       return false;
405 
406     if (xref_stream_list[i] && !LoadCrossRefV5(&xref_stream_list[i], false))
407       return false;
408   }
409   return true;
410 }
411 
ParseAndAppendCrossRefSubsectionData(uint32_t start_objnum,uint32_t count,std::vector<CrossRefObjData> * out_objects)412 bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData(
413     uint32_t start_objnum,
414     uint32_t count,
415     std::vector<CrossRefObjData>* out_objects) {
416   if (!count)
417     return true;
418 
419   // Each entry shall be exactly 20 byte.
420   // A sample entry looks like:
421   // "0000000000 00007 f\r\n"
422   static constexpr int32_t kEntryConstSize = 20;
423 
424   if (!out_objects) {
425     FX_SAFE_FILESIZE pos = count;
426     pos *= kEntryConstSize;
427     pos += m_pSyntax->GetPos();
428     if (!pos.IsValid())
429       return false;
430     m_pSyntax->SetPos(pos.ValueOrDie());
431     return true;
432   }
433   const size_t start_obj_index = out_objects->size();
434   FX_SAFE_SIZE_T new_size = start_obj_index;
435   new_size += count;
436   if (!new_size.IsValid())
437     return false;
438 
439   if (new_size.ValueOrDie() > kMaxXRefSize)
440     return false;
441 
442   const size_t max_entries_in_file =
443       m_pSyntax->GetDocumentSize() / kEntryConstSize;
444   if (new_size.ValueOrDie() > max_entries_in_file)
445     return false;
446 
447   out_objects->resize(new_size.ValueOrDie());
448 
449   std::vector<char> buf(1024 * kEntryConstSize + 1);
450   buf.back() = '\0';
451 
452   uint32_t nBytesToRead = count;
453   while (nBytesToRead > 0) {
454     const uint32_t block_size = std::min(nBytesToRead, 1024u);
455     if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
456                               block_size * kEntryConstSize)) {
457       return false;
458     }
459 
460     for (uint32_t i = 0; i < block_size; i++) {
461       uint32_t iObjectIndex = count - nBytesToRead + i;
462       CrossRefObjData& obj_data =
463           (*out_objects)[start_obj_index + iObjectIndex];
464       const uint32_t objnum = start_objnum + iObjectIndex;
465       obj_data.obj_num = objnum;
466       ObjectInfo& info = obj_data.info;
467 
468       char* pEntry = &buf[i * kEntryConstSize];
469       if (pEntry[17] == 'f') {
470         info.pos = 0;
471         info.type = ObjectType::kFree;
472       } else {
473         const FX_SAFE_FILESIZE offset = FXSYS_atoi64(pEntry);
474         if (!offset.IsValid())
475           return false;
476 
477         if (offset.ValueOrDie() == 0) {
478           for (int32_t c = 0; c < 10; c++) {
479             if (!std::isdigit(pEntry[c]))
480               return false;
481           }
482         }
483 
484         info.pos = offset.ValueOrDie();
485 
486         // TODO(art-snake): The info.gennum is uint16_t, but version may be
487         // greated than max<uint16_t>. Needs solve this issue.
488         const int32_t version = FXSYS_atoi(pEntry + 11);
489         info.gennum = version;
490         info.type = ObjectType::kNotCompressed;
491       }
492     }
493     nBytesToRead -= block_size;
494   }
495   return true;
496 }
497 
ParseCrossRefV4(std::vector<CrossRefObjData> * out_objects)498 bool CPDF_Parser::ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects) {
499   if (out_objects)
500     out_objects->clear();
501 
502   if (m_pSyntax->GetKeyword() != "xref")
503     return false;
504   std::vector<CrossRefObjData> result_objects;
505   while (1) {
506     FX_FILESIZE saved_pos = m_pSyntax->GetPos();
507     bool bIsNumber;
508     ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
509     if (word.IsEmpty())
510       return false;
511 
512     if (!bIsNumber) {
513       m_pSyntax->SetPos(saved_pos);
514       break;
515     }
516 
517     uint32_t start_objnum = FXSYS_atoui(word.c_str());
518     if (start_objnum >= kMaxObjectNumber)
519       return false;
520 
521     uint32_t count = m_pSyntax->GetDirectNum();
522     m_pSyntax->ToNextWord();
523 
524     if (!ParseAndAppendCrossRefSubsectionData(
525             start_objnum, count, out_objects ? &result_objects : nullptr)) {
526       return false;
527     }
528   }
529   if (out_objects)
530     *out_objects = std::move(result_objects);
531   return true;
532 }
533 
LoadCrossRefV4(FX_FILESIZE pos,bool bSkip)534 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, bool bSkip) {
535   m_pSyntax->SetPos(pos);
536   std::vector<CrossRefObjData> objects;
537   if (!ParseCrossRefV4(bSkip ? nullptr : &objects))
538     return false;
539 
540   MergeCrossRefObjectsData(objects);
541   return true;
542 }
543 
MergeCrossRefObjectsData(const std::vector<CrossRefObjData> & objects)544 void CPDF_Parser::MergeCrossRefObjectsData(
545     const std::vector<CrossRefObjData>& objects) {
546   for (const auto& obj : objects) {
547     switch (obj.info.type) {
548       case ObjectType::kFree:
549         if (obj.info.gennum > 0)
550           m_CrossRefTable->SetFree(obj.obj_num);
551         break;
552       case ObjectType::kNormal:
553       case ObjectType::kObjStream:
554         m_CrossRefTable->AddNormal(obj.obj_num, obj.info.gennum, obj.info.pos);
555         break;
556       case ObjectType::kCompressed:
557         m_CrossRefTable->AddCompressed(obj.obj_num, obj.info.archive_obj_num);
558         break;
559       default:
560         NOTREACHED();
561     }
562   }
563 }
564 
LoadAllCrossRefV5(FX_FILESIZE xref_offset)565 bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xref_offset) {
566   if (!LoadCrossRefV5(&xref_offset, true))
567     return false;
568 
569   std::set<FX_FILESIZE> seen_xref_offset;
570   while (xref_offset) {
571     seen_xref_offset.insert(xref_offset);
572     if (!LoadCrossRefV5(&xref_offset, false))
573       return false;
574 
575     // Check for circular references.
576     if (pdfium::ContainsKey(seen_xref_offset, xref_offset))
577       return false;
578   }
579   m_ObjectStreamMap.clear();
580   m_bXRefStream = true;
581   return true;
582 }
583 
RebuildCrossRef()584 bool CPDF_Parser::RebuildCrossRef() {
585   auto cross_ref_table = pdfium::MakeUnique<CPDF_CrossRefTable>();
586 
587   const uint32_t kBufferSize = 4096;
588   m_pSyntax->SetReadBufferSize(kBufferSize);
589   m_pSyntax->SetPos(0);
590 
591   bool bIsNumber;
592   std::vector<std::pair<uint32_t, FX_FILESIZE>> numbers;
593   for (ByteString word = m_pSyntax->GetNextWord(&bIsNumber); !word.IsEmpty();
594        word = m_pSyntax->GetNextWord(&bIsNumber)) {
595     if (bIsNumber) {
596       numbers.emplace_back(FXSYS_atoui(word.c_str()),
597                            m_pSyntax->GetPos() - word.GetLength());
598       if (numbers.size() > 2u)
599         numbers.erase(numbers.begin());
600       continue;
601     }
602 
603     if (word == "(") {
604       m_pSyntax->ReadString();
605     } else if (word == "<") {
606       m_pSyntax->ReadHexString();
607     } else if (word == "trailer") {
608       RetainPtr<CPDF_Object> pTrailer = m_pSyntax->GetObjectBody(nullptr);
609       if (pTrailer) {
610         cross_ref_table = CPDF_CrossRefTable::MergeUp(
611             std::move(cross_ref_table),
612             pdfium::MakeUnique<CPDF_CrossRefTable>(ToDictionary(
613                 pTrailer->IsStream() ? pTrailer->AsStream()->GetDict()->Clone()
614                                      : std::move(pTrailer))));
615       }
616     } else if (word == "obj" && numbers.size() == 2u) {
617       const FX_FILESIZE obj_pos = numbers[0].second;
618       const uint32_t obj_num = numbers[0].first;
619       const uint32_t gen_num = numbers[1].first;
620 
621       m_pSyntax->SetPos(obj_pos);
622       const RetainPtr<CPDF_Stream> pStream =
623           ToStream(m_pSyntax->GetIndirectObject(
624               nullptr, CPDF_SyntaxParser::ParseType::kStrict));
625 
626       if (pStream && pStream->GetDict()->GetStringFor("Type") == "XRef") {
627         cross_ref_table = CPDF_CrossRefTable::MergeUp(
628             std::move(cross_ref_table),
629             pdfium::MakeUnique<CPDF_CrossRefTable>(
630                 ToDictionary(pStream->GetDict()->Clone())));
631       }
632 
633       if (obj_num < kMaxObjectNumber) {
634         cross_ref_table->AddNormal(obj_num, gen_num, obj_pos);
635         if (const auto object_stream =
636                 CPDF_ObjectStream::Create(pStream.Get())) {
637           for (const auto& it : object_stream->objects_offsets()) {
638             if (it.first < kMaxObjectNumber)
639               cross_ref_table->AddCompressed(it.first, obj_num);
640           }
641         }
642       }
643     }
644     numbers.clear();
645   }
646 
647   m_CrossRefTable = CPDF_CrossRefTable::MergeUp(std::move(m_CrossRefTable),
648                                                 std::move(cross_ref_table));
649   // Resore default buffer size.
650   m_pSyntax->SetReadBufferSize(CPDF_Stream::kFileBufSize);
651 
652   return GetTrailer() && !m_CrossRefTable->objects_info().empty();
653 }
654 
LoadCrossRefV5(FX_FILESIZE * pos,bool bMainXRef)655 bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) {
656   RetainPtr<CPDF_Object> pObject(ParseIndirectObjectAt(*pos, 0));
657   if (!pObject || !pObject->GetObjNum())
658     return false;
659 
660   CPDF_Stream* pStream = pObject->AsStream();
661   if (!pStream)
662     return false;
663 
664   CPDF_Dictionary* pDict = pStream->GetDict();
665   *pos = pDict->GetIntegerFor("Prev");
666   int32_t size = pDict->GetIntegerFor("Size");
667   if (size < 0)
668     return false;
669 
670   RetainPtr<CPDF_Dictionary> pNewTrailer = ToDictionary(pDict->Clone());
671   if (bMainXRef) {
672     m_CrossRefTable =
673         pdfium::MakeUnique<CPDF_CrossRefTable>(std::move(pNewTrailer));
674     m_CrossRefTable->ShrinkObjectMap(size);
675   } else {
676     m_CrossRefTable = CPDF_CrossRefTable::MergeUp(
677         pdfium::MakeUnique<CPDF_CrossRefTable>(std::move(pNewTrailer)),
678         std::move(m_CrossRefTable));
679   }
680 
681   std::vector<std::pair<int32_t, int32_t>> arrIndex;
682   CPDF_Array* pArray = pDict->GetArrayFor("Index");
683   if (pArray) {
684     for (size_t i = 0; i < pArray->size() / 2; i++) {
685       CPDF_Object* pStartNumObj = pArray->GetObjectAt(i * 2);
686       CPDF_Object* pCountObj = pArray->GetObjectAt(i * 2 + 1);
687 
688       if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) {
689         int nStartNum = pStartNumObj->GetInteger();
690         int nCount = pCountObj->GetInteger();
691         if (nStartNum >= 0 && nCount > 0)
692           arrIndex.push_back(std::make_pair(nStartNum, nCount));
693       }
694     }
695   }
696 
697   if (arrIndex.empty())
698     arrIndex.push_back(std::make_pair(0, size));
699 
700   pArray = pDict->GetArrayFor("W");
701   if (!pArray)
702     return false;
703 
704   std::vector<uint32_t> WidthArray;
705   FX_SAFE_UINT32 dwAccWidth = 0;
706   for (size_t i = 0; i < pArray->size(); ++i) {
707     WidthArray.push_back(pArray->GetIntegerAt(i));
708     dwAccWidth += WidthArray[i];
709   }
710 
711   if (!dwAccWidth.IsValid() || WidthArray.size() < 3)
712     return false;
713 
714   uint32_t totalWidth = dwAccWidth.ValueOrDie();
715   auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
716   pAcc->LoadAllDataFiltered();
717 
718   const uint8_t* pData = pAcc->GetData();
719   uint32_t dwTotalSize = pAcc->GetSize();
720   uint32_t segindex = 0;
721   for (const auto& index : arrIndex) {
722     const int32_t startnum = index.first;
723     if (startnum < 0)
724       continue;
725 
726     uint32_t count = pdfium::base::checked_cast<uint32_t>(index.second);
727     FX_SAFE_UINT32 dwCaculatedSize = segindex;
728     dwCaculatedSize += count;
729     dwCaculatedSize *= totalWidth;
730     if (!dwCaculatedSize.IsValid() ||
731         dwCaculatedSize.ValueOrDie() > dwTotalSize) {
732       continue;
733     }
734 
735     const uint8_t* segstart = pData + segindex * totalWidth;
736     FX_SAFE_UINT32 dwMaxObjNum = startnum;
737     dwMaxObjNum += count;
738     uint32_t dwV5Size =
739         m_CrossRefTable->objects_info().empty() ? 0 : GetLastObjNum() + 1;
740     if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size)
741       continue;
742 
743     for (uint32_t i = 0; i < count; i++) {
744       ObjectType type = ObjectType::kNotCompressed;
745       const uint8_t* entrystart = segstart + i * totalWidth;
746       if (WidthArray[0]) {
747         const uint32_t cross_ref_stream_obj_type =
748             GetVarInt(entrystart, WidthArray[0]);
749         type = GetObjectTypeFromCrossRefStreamType(cross_ref_stream_obj_type);
750         if (type == ObjectType::kNull)
751           continue;
752       }
753 
754       const uint32_t objnum = startnum + i;
755       if (objnum >= CPDF_Parser::kMaxObjectNumber)
756         continue;
757 
758       const ObjectType existing_type = GetObjectType(objnum);
759       if (existing_type == ObjectType::kNull) {
760         uint32_t offset = GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
761         if (pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(offset))
762           m_CrossRefTable->AddNormal(objnum, 0, offset);
763         continue;
764       }
765 
766       if (existing_type != ObjectType::kFree)
767         continue;
768 
769       if (type == ObjectType::kFree) {
770         m_CrossRefTable->SetFree(objnum);
771         continue;
772       }
773 
774       const uint32_t entry_value =
775           GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
776       if (type == ObjectType::kNotCompressed) {
777         const uint32_t offset = entry_value;
778         if (pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(offset))
779           m_CrossRefTable->AddNormal(objnum, 0, offset);
780         continue;
781       }
782 
783       ASSERT(type == ObjectType::kCompressed);
784       const uint32_t archive_obj_num = entry_value;
785       if (!IsValidObjectNumber(archive_obj_num))
786         return false;
787 
788       m_CrossRefTable->AddCompressed(objnum, archive_obj_num);
789     }
790     segindex += count;
791   }
792   return true;
793 }
794 
GetIDArray() const795 const CPDF_Array* CPDF_Parser::GetIDArray() const {
796   return GetTrailer() ? GetTrailer()->GetArrayFor("ID") : nullptr;
797 }
798 
GetRoot() const799 CPDF_Dictionary* CPDF_Parser::GetRoot() const {
800   CPDF_Object* obj =
801       m_pObjectsHolder->GetOrParseIndirectObject(GetRootObjNum());
802   return obj ? obj->GetDict() : nullptr;
803 }
804 
GetEncryptDict() const805 const CPDF_Dictionary* CPDF_Parser::GetEncryptDict() const {
806   if (!GetTrailer())
807     return nullptr;
808 
809   const CPDF_Object* pEncryptObj = GetTrailer()->GetObjectFor("Encrypt");
810   if (!pEncryptObj)
811     return nullptr;
812 
813   if (pEncryptObj->IsDictionary())
814     return ToDictionary(pEncryptObj);
815 
816   if (pEncryptObj->IsReference()) {
817     return ToDictionary(m_pObjectsHolder->GetOrParseIndirectObject(
818         pEncryptObj->AsReference()->GetRefObjNum()));
819   }
820   return nullptr;
821 }
822 
GetEncodedPassword() const823 ByteString CPDF_Parser::GetEncodedPassword() const {
824   return GetSecurityHandler()->GetEncodedPassword(GetPassword().AsStringView());
825 }
826 
GetTrailer() const827 const CPDF_Dictionary* CPDF_Parser::GetTrailer() const {
828   return m_CrossRefTable->trailer();
829 }
830 
GetMutableTrailerForTesting()831 CPDF_Dictionary* CPDF_Parser::GetMutableTrailerForTesting() {
832   return m_CrossRefTable->GetMutableTrailerForTesting();
833 }
834 
GetCombinedTrailer() const835 RetainPtr<CPDF_Dictionary> CPDF_Parser::GetCombinedTrailer() const {
836   return m_CrossRefTable->trailer()
837              ? ToDictionary(m_CrossRefTable->trailer()->Clone())
838              : RetainPtr<CPDF_Dictionary>();
839 }
840 
GetInfoObjNum() const841 uint32_t CPDF_Parser::GetInfoObjNum() const {
842   const CPDF_Reference* pRef =
843       ToReference(m_CrossRefTable->trailer()
844                       ? m_CrossRefTable->trailer()->GetObjectFor("Info")
845                       : nullptr);
846   return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum;
847 }
848 
GetRootObjNum() const849 uint32_t CPDF_Parser::GetRootObjNum() const {
850   const CPDF_Reference* pRef =
851       ToReference(m_CrossRefTable->trailer()
852                       ? m_CrossRefTable->trailer()->GetObjectFor("Root")
853                       : nullptr);
854   return pRef ? pRef->GetRefObjNum() : CPDF_Object::kInvalidObjNum;
855 }
856 
ParseIndirectObject(uint32_t objnum)857 RetainPtr<CPDF_Object> CPDF_Parser::ParseIndirectObject(uint32_t objnum) {
858   if (!IsValidObjectNumber(objnum))
859     return nullptr;
860 
861   // Prevent circular parsing the same object.
862   if (pdfium::ContainsKey(m_ParsingObjNums, objnum))
863     return nullptr;
864 
865   pdfium::ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum);
866   if (GetObjectType(objnum) == ObjectType::kNotCompressed) {
867     FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
868     if (pos <= 0)
869       return nullptr;
870     return ParseIndirectObjectAt(pos, objnum);
871   }
872   if (GetObjectType(objnum) != ObjectType::kCompressed)
873     return nullptr;
874 
875   const CPDF_ObjectStream* pObjStream =
876       GetObjectStream(m_CrossRefTable->GetObjectInfo(objnum)->archive_obj_num);
877   if (!pObjStream)
878     return nullptr;
879 
880   return pObjStream->ParseObject(m_pObjectsHolder.Get(), objnum);
881 }
882 
GetObjectStream(uint32_t object_number)883 const CPDF_ObjectStream* CPDF_Parser::GetObjectStream(uint32_t object_number) {
884   // Prevent circular parsing the same object.
885   if (pdfium::ContainsKey(m_ParsingObjNums, object_number))
886     return nullptr;
887 
888   pdfium::ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums,
889                                                     object_number);
890 
891   auto it = m_ObjectStreamMap.find(object_number);
892   if (it != m_ObjectStreamMap.end())
893     return it->second.get();
894 
895   const auto* info = m_CrossRefTable->GetObjectInfo(object_number);
896   if (!info || info->type != ObjectType::kObjStream)
897     return nullptr;
898 
899   const FX_FILESIZE object_pos = info->pos;
900   if (object_pos <= 0)
901     return nullptr;
902 
903   RetainPtr<CPDF_Object> object =
904       ParseIndirectObjectAt(object_pos, object_number);
905   if (!object)
906     return nullptr;
907 
908   std::unique_ptr<CPDF_ObjectStream> objs_stream =
909       CPDF_ObjectStream::Create(ToStream(object.Get()));
910   const CPDF_ObjectStream* result = objs_stream.get();
911   m_ObjectStreamMap[object_number] = std::move(objs_stream);
912 
913   return result;
914 }
915 
ParseIndirectObjectAt(FX_FILESIZE pos,uint32_t objnum)916 RetainPtr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAt(FX_FILESIZE pos,
917                                                           uint32_t objnum) {
918   const FX_FILESIZE saved_pos = m_pSyntax->GetPos();
919   m_pSyntax->SetPos(pos);
920 
921   auto result = m_pSyntax->GetIndirectObject(
922       m_pObjectsHolder.Get(), CPDF_SyntaxParser::ParseType::kLoose);
923   m_pSyntax->SetPos(saved_pos);
924   if (result && objnum && result->GetObjNum() != objnum)
925     return nullptr;
926 
927   const bool should_decrypt = m_pSecurityHandler &&
928                               m_pSecurityHandler->GetCryptoHandler() &&
929                               objnum != m_MetadataObjnum;
930   if (should_decrypt &&
931       !m_pSecurityHandler->GetCryptoHandler()->DecryptObjectTree(result)) {
932     return nullptr;
933   }
934   return result;
935 }
936 
GetFirstPageNo() const937 uint32_t CPDF_Parser::GetFirstPageNo() const {
938   return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0;
939 }
940 
SetLinearizedHeader(std::unique_ptr<CPDF_LinearizedHeader> pLinearized)941 void CPDF_Parser::SetLinearizedHeader(
942     std::unique_ptr<CPDF_LinearizedHeader> pLinearized) {
943   m_pLinearized = std::move(pLinearized);
944 }
945 
LoadTrailerV4()946 RetainPtr<CPDF_Dictionary> CPDF_Parser::LoadTrailerV4() {
947   if (m_pSyntax->GetKeyword() != "trailer")
948     return nullptr;
949 
950   return ToDictionary(m_pSyntax->GetObjectBody(m_pObjectsHolder.Get()));
951 }
952 
GetPermissions() const953 uint32_t CPDF_Parser::GetPermissions() const {
954   return m_pSecurityHandler ? m_pSecurityHandler->GetPermissions() : 0xFFFFFFFF;
955 }
956 
ParseLinearizedHeader()957 std::unique_ptr<CPDF_LinearizedHeader> CPDF_Parser::ParseLinearizedHeader() {
958   return CPDF_LinearizedHeader::Parse(m_pSyntax.get());
959 }
960 
StartLinearizedParse(const RetainPtr<CPDF_ReadValidator> & validator,const char * password)961 CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
962     const RetainPtr<CPDF_ReadValidator>& validator,
963     const char* password) {
964   ASSERT(!m_bHasParsed);
965   ASSERT(!m_bXRefTableRebuilt);
966   SetPassword(password);
967   m_bXRefStream = false;
968   m_LastXRefOffset = 0;
969 
970   if (!InitSyntaxParser(validator))
971     return FORMAT_ERROR;
972 
973   m_pLinearized = ParseLinearizedHeader();
974   if (!m_pLinearized)
975     return StartParseInternal();
976 
977   m_bHasParsed = true;
978 
979   m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
980   FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset;
981   bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, false);
982   if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) {
983     if (!RebuildCrossRef())
984       return FORMAT_ERROR;
985 
986     m_bXRefTableRebuilt = true;
987     m_LastXRefOffset = 0;
988   }
989   if (bLoadV4) {
990     RetainPtr<CPDF_Dictionary> trailer = LoadTrailerV4();
991     if (!trailer)
992       return SUCCESS;
993 
994     m_CrossRefTable->SetTrailer(std::move(trailer));
995     int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size");
996     if (xrefsize > 0)
997       ShrinkObjectMap(xrefsize);
998   }
999 
1000   Error eRet = SetEncryptHandler();
1001   if (eRet != SUCCESS)
1002     return eRet;
1003 
1004   if (!GetRoot() || !m_pObjectsHolder->TryInit()) {
1005     if (m_bXRefTableRebuilt)
1006       return FORMAT_ERROR;
1007 
1008     ReleaseEncryptHandler();
1009     if (!RebuildCrossRef())
1010       return FORMAT_ERROR;
1011 
1012     eRet = SetEncryptHandler();
1013     if (eRet != SUCCESS)
1014       return eRet;
1015 
1016     m_pObjectsHolder->TryInit();
1017     if (!GetRoot())
1018       return FORMAT_ERROR;
1019   }
1020 
1021   if (GetRootObjNum() == CPDF_Object::kInvalidObjNum) {
1022     ReleaseEncryptHandler();
1023     if (!RebuildCrossRef() || GetRootObjNum() == CPDF_Object::kInvalidObjNum)
1024       return FORMAT_ERROR;
1025 
1026     eRet = SetEncryptHandler();
1027     if (eRet != SUCCESS)
1028       return eRet;
1029   }
1030 
1031   if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1032     if (CPDF_Reference* pMetadata =
1033             ToReference(GetRoot()->GetObjectFor("Metadata")))
1034       m_MetadataObjnum = pMetadata->GetRefObjNum();
1035   }
1036   return SUCCESS;
1037 }
1038 
LoadLinearizedAllCrossRefV5(FX_FILESIZE main_xref_offset)1039 bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE main_xref_offset) {
1040   FX_FILESIZE xref_offset = main_xref_offset;
1041   if (!LoadCrossRefV5(&xref_offset, false))
1042     return false;
1043 
1044   std::set<FX_FILESIZE> seen_xref_offset;
1045   while (xref_offset) {
1046     seen_xref_offset.insert(xref_offset);
1047     if (!LoadCrossRefV5(&xref_offset, false))
1048       return false;
1049 
1050     // Check for circular references.
1051     if (pdfium::ContainsKey(seen_xref_offset, xref_offset))
1052       return false;
1053   }
1054   m_ObjectStreamMap.clear();
1055   m_bXRefStream = true;
1056   return true;
1057 }
1058 
LoadLinearizedMainXRefTable()1059 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
1060   const FX_SAFE_FILESIZE prev = GetTrailer()->GetIntegerFor("Prev");
1061   const FX_FILESIZE main_xref_offset = prev.ValueOrDefault(-1);
1062   if (main_xref_offset < 0)
1063     return FORMAT_ERROR;
1064 
1065   if (main_xref_offset == 0)
1066     return SUCCESS;
1067 
1068   const AutoRestorer<uint32_t> save_metadata_objnum(&m_MetadataObjnum);
1069   m_MetadataObjnum = 0;
1070   m_ObjectStreamMap.clear();
1071 
1072   if (!LoadLinearizedAllCrossRefV4(main_xref_offset) &&
1073       !LoadLinearizedAllCrossRefV5(main_xref_offset)) {
1074     m_LastXRefOffset = 0;
1075     return FORMAT_ERROR;
1076   }
1077 
1078   return SUCCESS;
1079 }
1080 
GetObjectTypeFromCrossRefStreamType(uint32_t cross_ref_stream_type) const1081 CPDF_Parser::ObjectType CPDF_Parser::GetObjectTypeFromCrossRefStreamType(
1082     uint32_t cross_ref_stream_type) const {
1083   switch (cross_ref_stream_type) {
1084     case 0:
1085       return CPDF_Parser::ObjectType::kFree;
1086     case 1:
1087       return CPDF_Parser::ObjectType::kNotCompressed;
1088     case 2:
1089       return CPDF_Parser::ObjectType::kCompressed;
1090     default:
1091       return CPDF_Parser::ObjectType::kNull;
1092   }
1093 }
1094