1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/include/fpdfapi/fpdf_parser.h"
8
9 #include <algorithm>
10 #include <memory>
11 #include <set>
12 #include <utility>
13 #include <vector>
14
15 #include "core/include/fpdfapi/fpdf_module.h"
16 #include "core/include/fpdfapi/fpdf_page.h"
17 #include "core/include/fxcrt/fx_ext.h"
18 #include "core/include/fxcrt/fx_safe_types.h"
19 #include "core/src/fpdfapi/fpdf_page/pageint.h"
20 #include "core/src/fpdfapi/fpdf_parser/parser_int.h"
21 #include "third_party/base/stl_util.h"
22
23 namespace {
24
25 // A limit on the size of the xref table. Theoretical limits are higher, but
26 // this may be large enough in practice.
27 const int32_t kMaxXRefSize = 1048576;
28
29 // A limit on the maximum object number in the xref table. Theoretical limits
30 // are higher, but this may be large enough in practice.
31 const FX_DWORD kMaxObjectNumber = 1048576;
32
33 struct SearchTagRecord {
34 const char* m_pTag;
35 FX_DWORD m_Len;
36 FX_DWORD m_Offset;
37 };
38
39 template <typename T>
40 class ScopedSetInsertion {
41 public:
ScopedSetInsertion(std::set<T> * org_set,T elem)42 ScopedSetInsertion(std::set<T>* org_set, T elem)
43 : m_Set(org_set), m_Entry(elem) {
44 m_Set->insert(m_Entry);
45 }
~ScopedSetInsertion()46 ~ScopedSetInsertion() { m_Set->erase(m_Entry); }
47
48 private:
49 std::set<T>* const m_Set;
50 const T m_Entry;
51 };
52
CompareFileSize(const void * p1,const void * p2)53 int CompareFileSize(const void* p1, const void* p2) {
54 return *(FX_FILESIZE*)p1 - *(FX_FILESIZE*)p2;
55 }
56
GetHeaderOffset(IFX_FileRead * pFile)57 int32_t GetHeaderOffset(IFX_FileRead* pFile) {
58 const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);
59 const size_t kBufSize = 4;
60 uint8_t buf[kBufSize];
61 int32_t offset = 0;
62 while (offset <= 1024) {
63 if (!pFile->ReadBlock(buf, offset, kBufSize))
64 return -1;
65
66 if (*(FX_DWORD*)buf == tag)
67 return offset;
68
69 ++offset;
70 }
71 return -1;
72 }
73
GetDirectInteger(CPDF_Dictionary * pDict,const CFX_ByteStringC & key)74 int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key) {
75 CPDF_Number* pObj = ToNumber(pDict->GetElement(key));
76 return pObj ? pObj->GetInteger() : 0;
77 }
78
GetVarInt(const uint8_t * p,int32_t n)79 FX_DWORD GetVarInt(const uint8_t* p, int32_t n) {
80 FX_DWORD result = 0;
81 for (int32_t i = 0; i < n; ++i)
82 result = result * 256 + p[i];
83 return result;
84 }
85
GetStreamNCount(CPDF_StreamAcc * pObjStream)86 int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) {
87 return pObjStream->GetDict()->GetInteger("N");
88 }
89
GetStreamFirst(CPDF_StreamAcc * pObjStream)90 int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) {
91 return pObjStream->GetDict()->GetInteger("First");
92 }
93
CanReadFromBitStream(const CFX_BitStream * hStream,const FX_SAFE_DWORD & num_bits)94 bool CanReadFromBitStream(const CFX_BitStream* hStream,
95 const FX_SAFE_DWORD& num_bits) {
96 return (num_bits.IsValid() &&
97 hStream->BitsRemaining() >= num_bits.ValueOrDie());
98 }
99
100 } // namespace
101
102 // TODO(thestig) Using unique_ptr with ReleaseDeleter is still not ideal.
103 // Come up or wait for something better.
104 using ScopedFileStream =
105 std::unique_ptr<IFX_FileStream, ReleaseDeleter<IFX_FileStream>>;
106
IsSignatureDict(const CPDF_Dictionary * pDict)107 bool IsSignatureDict(const CPDF_Dictionary* pDict) {
108 CPDF_Object* pType = pDict->GetElementValue("Type");
109 if (!pType)
110 pType = pDict->GetElementValue("FT");
111 return pType && pType->GetString() == "Sig";
112 }
113
CPDF_Parser()114 CPDF_Parser::CPDF_Parser() {
115 m_pDocument = NULL;
116 m_pTrailer = NULL;
117 m_pEncryptDict = NULL;
118 m_pLinearized = NULL;
119 m_dwFirstPageNo = 0;
120 m_dwXrefStartObjNum = 0;
121 m_bOwnFileRead = TRUE;
122 m_FileVersion = 0;
123 m_bForceUseSecurityHandler = FALSE;
124 }
~CPDF_Parser()125 CPDF_Parser::~CPDF_Parser() {
126 CloseParser(FALSE);
127 }
128
GetLastObjNum() const129 FX_DWORD CPDF_Parser::GetLastObjNum() const {
130 return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first;
131 }
132
IsValidObjectNumber(FX_DWORD objnum) const133 bool CPDF_Parser::IsValidObjectNumber(FX_DWORD objnum) const {
134 return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first;
135 }
136
SetEncryptDictionary(CPDF_Dictionary * pDict)137 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {
138 m_pEncryptDict = pDict;
139 }
140
GetObjectPositionOrZero(FX_DWORD objnum) const141 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(FX_DWORD objnum) const {
142 auto it = m_ObjectInfo.find(objnum);
143 return it != m_ObjectInfo.end() ? it->second.pos : 0;
144 }
145
ShrinkObjectMap(FX_DWORD objnum)146 void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) {
147 if (objnum == 0) {
148 m_ObjectInfo.clear();
149 return;
150 }
151
152 auto it = m_ObjectInfo.lower_bound(objnum);
153 while (it != m_ObjectInfo.end()) {
154 auto saved_it = it++;
155 m_ObjectInfo.erase(saved_it);
156 }
157
158 if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1))
159 m_ObjectInfo[objnum - 1].pos = 0;
160 }
161
CloseParser(FX_BOOL bReParse)162 void CPDF_Parser::CloseParser(FX_BOOL bReParse) {
163 m_bVersionUpdated = FALSE;
164 if (!bReParse) {
165 delete m_pDocument;
166 m_pDocument = NULL;
167 }
168 if (m_pTrailer) {
169 m_pTrailer->Release();
170 m_pTrailer = NULL;
171 }
172 ReleaseEncryptHandler();
173 SetEncryptDictionary(NULL);
174 if (m_bOwnFileRead && m_Syntax.m_pFileAccess) {
175 m_Syntax.m_pFileAccess->Release();
176 m_Syntax.m_pFileAccess = NULL;
177 }
178 FX_POSITION pos = m_ObjectStreamMap.GetStartPosition();
179 while (pos) {
180 void* objnum;
181 CPDF_StreamAcc* pStream;
182 m_ObjectStreamMap.GetNextAssoc(pos, objnum, (void*&)pStream);
183 delete pStream;
184 }
185 m_ObjectStreamMap.RemoveAll();
186 m_ObjCache.clear();
187
188 m_SortedOffset.RemoveAll();
189 m_ObjectInfo.clear();
190 m_V5Type.RemoveAll();
191 m_ObjVersion.RemoveAll();
192 int32_t iLen = m_Trailers.GetSize();
193 for (int32_t i = 0; i < iLen; ++i) {
194 if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i))
195 trailer->Release();
196 }
197 m_Trailers.RemoveAll();
198 if (m_pLinearized) {
199 m_pLinearized->Release();
200 m_pLinearized = NULL;
201 }
202 }
203 CPDF_SecurityHandler* FPDF_CreateStandardSecurityHandler();
204 CPDF_SecurityHandler* FPDF_CreatePubKeyHandler(void*);
StartParse(IFX_FileRead * pFileAccess,FX_BOOL bReParse,FX_BOOL bOwnFileRead)205 FX_DWORD CPDF_Parser::StartParse(IFX_FileRead* pFileAccess,
206 FX_BOOL bReParse,
207 FX_BOOL bOwnFileRead) {
208 CloseParser(bReParse);
209 m_bXRefStream = FALSE;
210 m_LastXRefOffset = 0;
211 m_bOwnFileRead = bOwnFileRead;
212
213 int32_t offset = GetHeaderOffset(pFileAccess);
214 if (offset == -1) {
215 if (bOwnFileRead && pFileAccess)
216 pFileAccess->Release();
217 return PDFPARSE_ERROR_FORMAT;
218 }
219 m_Syntax.InitParser(pFileAccess, offset);
220
221 uint8_t ch;
222 if (!m_Syntax.GetCharAt(5, ch))
223 return PDFPARSE_ERROR_FORMAT;
224 if (std::isdigit(ch))
225 m_FileVersion = FXSYS_toDecimalDigit(ch) * 10;
226
227 if (!m_Syntax.GetCharAt(7, ch))
228 return PDFPARSE_ERROR_FORMAT;
229 if (std::isdigit(ch))
230 m_FileVersion += FXSYS_toDecimalDigit(ch);
231
232 if (m_Syntax.m_FileLen < m_Syntax.m_HeaderOffset + 9)
233 return PDFPARSE_ERROR_FORMAT;
234
235 m_Syntax.RestorePos(m_Syntax.m_FileLen - m_Syntax.m_HeaderOffset - 9);
236 if (!bReParse)
237 m_pDocument = new CPDF_Document(this);
238
239 FX_BOOL bXRefRebuilt = FALSE;
240 if (m_Syntax.SearchWord("startxref", TRUE, FALSE, 4096)) {
241 FX_FILESIZE startxref_offset = m_Syntax.SavePos();
242 void* pResult = FXSYS_bsearch(&startxref_offset, m_SortedOffset.GetData(),
243 m_SortedOffset.GetSize(), sizeof(FX_FILESIZE),
244 CompareFileSize);
245 if (!pResult)
246 m_SortedOffset.Add(startxref_offset);
247
248 m_Syntax.GetKeyword();
249 bool bNumber;
250 CFX_ByteString xrefpos_str = m_Syntax.GetNextWord(&bNumber);
251 if (!bNumber)
252 return PDFPARSE_ERROR_FORMAT;
253
254 m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);
255 if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
256 !LoadAllCrossRefV5(m_LastXRefOffset)) {
257 if (!RebuildCrossRef())
258 return PDFPARSE_ERROR_FORMAT;
259
260 bXRefRebuilt = TRUE;
261 m_LastXRefOffset = 0;
262 }
263 } else {
264 if (!RebuildCrossRef())
265 return PDFPARSE_ERROR_FORMAT;
266
267 bXRefRebuilt = TRUE;
268 }
269 FX_DWORD dwRet = SetEncryptHandler();
270 if (dwRet != PDFPARSE_ERROR_SUCCESS)
271 return dwRet;
272
273 m_pDocument->LoadDoc();
274 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
275 if (bXRefRebuilt)
276 return PDFPARSE_ERROR_FORMAT;
277
278 ReleaseEncryptHandler();
279 if (!RebuildCrossRef())
280 return PDFPARSE_ERROR_FORMAT;
281
282 dwRet = SetEncryptHandler();
283 if (dwRet != PDFPARSE_ERROR_SUCCESS)
284 return dwRet;
285
286 m_pDocument->LoadDoc();
287 if (!m_pDocument->GetRoot())
288 return PDFPARSE_ERROR_FORMAT;
289 }
290 FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
291 sizeof(FX_FILESIZE), CompareFileSize);
292 if (GetRootObjNum() == 0) {
293 ReleaseEncryptHandler();
294 if (!RebuildCrossRef() || GetRootObjNum() == 0)
295 return PDFPARSE_ERROR_FORMAT;
296
297 dwRet = SetEncryptHandler();
298 if (dwRet != PDFPARSE_ERROR_SUCCESS)
299 return dwRet;
300 }
301 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
302 CPDF_Reference* pMetadata =
303 ToReference(m_pDocument->GetRoot()->GetElement("Metadata"));
304 if (pMetadata)
305 m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum();
306 }
307 return PDFPARSE_ERROR_SUCCESS;
308 }
SetEncryptHandler()309 FX_DWORD CPDF_Parser::SetEncryptHandler() {
310 ReleaseEncryptHandler();
311 SetEncryptDictionary(NULL);
312 if (!m_pTrailer) {
313 return PDFPARSE_ERROR_FORMAT;
314 }
315 CPDF_Object* pEncryptObj = m_pTrailer->GetElement("Encrypt");
316 if (pEncryptObj) {
317 if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) {
318 SetEncryptDictionary(pEncryptDict);
319 } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) {
320 pEncryptObj =
321 m_pDocument->GetIndirectObject(pRef->GetRefObjNum(), nullptr);
322 if (pEncryptObj)
323 SetEncryptDictionary(pEncryptObj->GetDict());
324 }
325 }
326 if (m_bForceUseSecurityHandler) {
327 FX_DWORD err = PDFPARSE_ERROR_HANDLER;
328 if (!m_pSecurityHandler) {
329 return PDFPARSE_ERROR_HANDLER;
330 }
331 if (!m_pSecurityHandler->OnInit(this, m_pEncryptDict)) {
332 return err;
333 }
334 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler(
335 m_pSecurityHandler->CreateCryptoHandler());
336 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get())) {
337 return PDFPARSE_ERROR_HANDLER;
338 }
339 m_Syntax.SetEncrypt(pCryptoHandler.release());
340 } else if (m_pEncryptDict) {
341 CFX_ByteString filter = m_pEncryptDict->GetString("Filter");
342 std::unique_ptr<CPDF_SecurityHandler> pSecurityHandler;
343 FX_DWORD err = PDFPARSE_ERROR_HANDLER;
344 if (filter == "Standard") {
345 pSecurityHandler.reset(FPDF_CreateStandardSecurityHandler());
346 err = PDFPARSE_ERROR_PASSWORD;
347 }
348 if (!pSecurityHandler) {
349 return PDFPARSE_ERROR_HANDLER;
350 }
351 if (!pSecurityHandler->OnInit(this, m_pEncryptDict)) {
352 return err;
353 }
354 m_pSecurityHandler = std::move(pSecurityHandler);
355 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler(
356 m_pSecurityHandler->CreateCryptoHandler());
357 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get())) {
358 return PDFPARSE_ERROR_HANDLER;
359 }
360 m_Syntax.SetEncrypt(pCryptoHandler.release());
361 }
362 return PDFPARSE_ERROR_SUCCESS;
363 }
ReleaseEncryptHandler()364 void CPDF_Parser::ReleaseEncryptHandler() {
365 m_Syntax.m_pCryptoHandler.reset();
366 if (!m_bForceUseSecurityHandler) {
367 m_pSecurityHandler.reset();
368 }
369 }
370
GetObjectOffset(FX_DWORD objnum) const371 FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum) const {
372 if (!IsValidObjectNumber(objnum))
373 return 0;
374
375 if (m_V5Type[objnum] == 1)
376 return GetObjectPositionOrZero(objnum);
377
378 if (m_V5Type[objnum] == 2) {
379 FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
380 return GetObjectPositionOrZero(pos);
381 }
382 return 0;
383 }
384
LoadAllCrossRefV4(FX_FILESIZE xrefpos)385 FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
386 if (!LoadCrossRefV4(xrefpos, 0, TRUE)) {
387 return FALSE;
388 }
389 m_pTrailer = LoadTrailerV4();
390 if (!m_pTrailer) {
391 return FALSE;
392 }
393
394 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
395 if (xrefsize <= 0 || xrefsize > kMaxXRefSize) {
396 return FALSE;
397 }
398 ShrinkObjectMap(xrefsize);
399 m_V5Type.SetSize(xrefsize);
400 CFX_FileSizeArray CrossRefList;
401 CFX_FileSizeArray XRefStreamList;
402 CrossRefList.Add(xrefpos);
403 XRefStreamList.Add(GetDirectInteger(m_pTrailer, "XRefStm"));
404
405 std::set<FX_FILESIZE> seen_xrefpos;
406 seen_xrefpos.insert(xrefpos);
407 // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not
408 // numerical, GetDirectInteger() returns 0. Loading will end.
409 xrefpos = GetDirectInteger(m_pTrailer, "Prev");
410 while (xrefpos) {
411 // Check for circular references.
412 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
413 return FALSE;
414 seen_xrefpos.insert(xrefpos);
415 CrossRefList.InsertAt(0, xrefpos);
416 LoadCrossRefV4(xrefpos, 0, TRUE);
417 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
418 LoadTrailerV4());
419 if (!pDict)
420 return FALSE;
421 xrefpos = GetDirectInteger(pDict.get(), "Prev");
422
423 XRefStreamList.InsertAt(0, pDict->GetInteger("XRefStm"));
424 m_Trailers.Add(pDict.release());
425 }
426 for (int32_t i = 0; i < CrossRefList.GetSize(); i++) {
427 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE))
428 return FALSE;
429 }
430 return TRUE;
431 }
LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,FX_DWORD dwObjCount)432 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
433 FX_DWORD dwObjCount) {
434 if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) {
435 return FALSE;
436 }
437 m_pTrailer = LoadTrailerV4();
438 if (!m_pTrailer) {
439 return FALSE;
440 }
441 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
442 if (xrefsize == 0) {
443 return FALSE;
444 }
445 CFX_FileSizeArray CrossRefList, XRefStreamList;
446 CrossRefList.Add(xrefpos);
447 XRefStreamList.Add(GetDirectInteger(m_pTrailer, "XRefStm"));
448
449 std::set<FX_FILESIZE> seen_xrefpos;
450 seen_xrefpos.insert(xrefpos);
451 xrefpos = GetDirectInteger(m_pTrailer, "Prev");
452 while (xrefpos) {
453 // Check for circular references.
454 if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
455 return FALSE;
456 seen_xrefpos.insert(xrefpos);
457 CrossRefList.InsertAt(0, xrefpos);
458 LoadCrossRefV4(xrefpos, 0, TRUE);
459 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
460 LoadTrailerV4());
461 if (!pDict) {
462 return FALSE;
463 }
464 xrefpos = GetDirectInteger(pDict.get(), "Prev");
465
466 XRefStreamList.InsertAt(0, pDict->GetInteger("XRefStm"));
467 m_Trailers.Add(pDict.release());
468 }
469 for (int32_t i = 1; i < CrossRefList.GetSize(); i++)
470 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) {
471 return FALSE;
472 }
473 return TRUE;
474 }
LoadLinearizedCrossRefV4(FX_FILESIZE pos,FX_DWORD dwObjCount)475 FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
476 FX_DWORD dwObjCount) {
477 FX_FILESIZE dwStartPos = pos - m_Syntax.m_HeaderOffset;
478 m_Syntax.RestorePos(dwStartPos);
479 void* pResult =
480 FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
481 sizeof(FX_FILESIZE), CompareFileSize);
482 if (!pResult) {
483 m_SortedOffset.Add(pos);
484 }
485 FX_DWORD start_objnum = 0;
486 FX_DWORD count = dwObjCount;
487 FX_FILESIZE SavedPos = m_Syntax.SavePos();
488 const int32_t recordsize = 20;
489 std::vector<char> buf(1024 * recordsize + 1);
490 buf[1024 * recordsize] = '\0';
491 int32_t nBlocks = count / 1024 + 1;
492 for (int32_t block = 0; block < nBlocks; block++) {
493 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
494 FX_DWORD dwReadSize = block_size * recordsize;
495 if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_Syntax.m_FileLen) {
496 return FALSE;
497 }
498 if (!m_Syntax.ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
499 dwReadSize)) {
500 return FALSE;
501 }
502 for (int32_t i = 0; i < block_size; i++) {
503 FX_DWORD objnum = start_objnum + block * 1024 + i;
504 char* pEntry = &buf[i * recordsize];
505 if (pEntry[17] == 'f') {
506 m_ObjectInfo[objnum].pos = 0;
507 m_V5Type.SetAtGrow(objnum, 0);
508 } else {
509 int32_t offset = FXSYS_atoi(pEntry);
510 if (offset == 0) {
511 for (int32_t c = 0; c < 10; c++) {
512 if (!std::isdigit(pEntry[c]))
513 return FALSE;
514 }
515 }
516 m_ObjectInfo[objnum].pos = offset;
517 int32_t version = FXSYS_atoi(pEntry + 11);
518 if (version >= 1) {
519 m_bVersionUpdated = TRUE;
520 }
521 m_ObjVersion.SetAtGrow(objnum, version);
522 if (m_ObjectInfo[objnum].pos < m_Syntax.m_FileLen) {
523 void* pResult = FXSYS_bsearch(
524 &m_ObjectInfo[objnum].pos, m_SortedOffset.GetData(),
525 m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), CompareFileSize);
526 if (!pResult) {
527 m_SortedOffset.Add(m_ObjectInfo[objnum].pos);
528 }
529 }
530 m_V5Type.SetAtGrow(objnum, 1);
531 }
532 }
533 }
534 m_Syntax.RestorePos(SavedPos + count * recordsize);
535 return TRUE;
536 }
537
FindPosInOffsets(FX_FILESIZE pos) const538 bool CPDF_Parser::FindPosInOffsets(FX_FILESIZE pos) const {
539 return FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
540 sizeof(FX_FILESIZE), CompareFileSize);
541 }
542
LoadCrossRefV4(FX_FILESIZE pos,FX_FILESIZE streampos,FX_BOOL bSkip)543 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
544 FX_FILESIZE streampos,
545 FX_BOOL bSkip) {
546 m_Syntax.RestorePos(pos);
547 if (m_Syntax.GetKeyword() != "xref")
548 return false;
549
550 if (!FindPosInOffsets(pos))
551 m_SortedOffset.Add(pos);
552
553 if (streampos && !FindPosInOffsets(streampos))
554 m_SortedOffset.Add(streampos);
555
556 while (1) {
557 FX_FILESIZE SavedPos = m_Syntax.SavePos();
558 bool bIsNumber;
559 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
560 if (word.IsEmpty())
561 return false;
562
563 if (!bIsNumber) {
564 m_Syntax.RestorePos(SavedPos);
565 break;
566 }
567 FX_DWORD start_objnum = FXSYS_atoi(word);
568 if (start_objnum >= kMaxObjectNumber)
569 return false;
570
571 FX_DWORD count = m_Syntax.GetDirectNum();
572 m_Syntax.ToNextWord();
573 SavedPos = m_Syntax.SavePos();
574 const int32_t recordsize = 20;
575 m_dwXrefStartObjNum = start_objnum;
576 if (!bSkip) {
577 std::vector<char> buf(1024 * recordsize + 1);
578 buf[1024 * recordsize] = '\0';
579 int32_t nBlocks = count / 1024 + 1;
580 for (int32_t block = 0; block < nBlocks; block++) {
581 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
582 m_Syntax.ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
583 block_size * recordsize);
584 for (int32_t i = 0; i < block_size; i++) {
585 FX_DWORD objnum = start_objnum + block * 1024 + i;
586 char* pEntry = &buf[i * recordsize];
587 if (pEntry[17] == 'f') {
588 m_ObjectInfo[objnum].pos = 0;
589 m_V5Type.SetAtGrow(objnum, 0);
590 } else {
591 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
592 if (offset == 0) {
593 for (int32_t c = 0; c < 10; c++) {
594 if (!std::isdigit(pEntry[c]))
595 return false;
596 }
597 }
598 m_ObjectInfo[objnum].pos = offset;
599 int32_t version = FXSYS_atoi(pEntry + 11);
600 if (version >= 1) {
601 m_bVersionUpdated = TRUE;
602 }
603 m_ObjVersion.SetAtGrow(objnum, version);
604 if (m_ObjectInfo[objnum].pos < m_Syntax.m_FileLen &&
605 !FindPosInOffsets(m_ObjectInfo[objnum].pos)) {
606 m_SortedOffset.Add(m_ObjectInfo[objnum].pos);
607 }
608 m_V5Type.SetAtGrow(objnum, 1);
609 }
610 }
611 }
612 }
613 m_Syntax.RestorePos(SavedPos + count * recordsize);
614 }
615 return !streampos || LoadCrossRefV5(&streampos, FALSE);
616 }
617
LoadAllCrossRefV5(FX_FILESIZE xrefpos)618 FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
619 if (!LoadCrossRefV5(&xrefpos, TRUE)) {
620 return FALSE;
621 }
622 std::set<FX_FILESIZE> seen_xrefpos;
623 while (xrefpos) {
624 seen_xrefpos.insert(xrefpos);
625 if (!LoadCrossRefV5(&xrefpos, FALSE)) {
626 return FALSE;
627 }
628 // Check for circular references.
629 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) {
630 return FALSE;
631 }
632 }
633 m_ObjectStreamMap.InitHashTable(101, FALSE);
634 m_bXRefStream = TRUE;
635 return TRUE;
636 }
637
RebuildCrossRef()638 FX_BOOL CPDF_Parser::RebuildCrossRef() {
639 m_ObjectInfo.clear();
640 m_V5Type.RemoveAll();
641 m_SortedOffset.RemoveAll();
642 m_ObjVersion.RemoveAll();
643 if (m_pTrailer) {
644 m_pTrailer->Release();
645 m_pTrailer = NULL;
646 }
647 int32_t status = 0;
648 int32_t inside_index = 0;
649 FX_DWORD objnum = 0;
650 FX_DWORD gennum = 0;
651 int32_t depth = 0;
652 const FX_DWORD kBufferSize = 4096;
653 std::vector<uint8_t> buffer(kBufferSize);
654 FX_FILESIZE pos = m_Syntax.m_HeaderOffset;
655 FX_FILESIZE start_pos = 0;
656 FX_FILESIZE start_pos1 = 0;
657 FX_FILESIZE last_obj = -1;
658 FX_FILESIZE last_xref = -1;
659 FX_FILESIZE last_trailer = -1;
660 while (pos < m_Syntax.m_FileLen) {
661 const FX_FILESIZE saved_pos = pos;
662 bool bOverFlow = false;
663 FX_DWORD size = std::min((FX_DWORD)(m_Syntax.m_FileLen - pos), kBufferSize);
664 if (!m_Syntax.m_pFileAccess->ReadBlock(buffer.data(), pos, size))
665 break;
666
667 for (FX_DWORD i = 0; i < size; i++) {
668 uint8_t byte = buffer[i];
669 switch (status) {
670 case 0:
671 if (PDFCharIsWhitespace(byte))
672 status = 1;
673
674 if (std::isdigit(byte)) {
675 --i;
676 status = 1;
677 }
678
679 if (byte == '%') {
680 inside_index = 0;
681 status = 9;
682 }
683
684 if (byte == '(') {
685 status = 10;
686 depth = 1;
687 }
688
689 if (byte == '<') {
690 inside_index = 1;
691 status = 11;
692 }
693
694 if (byte == '\\')
695 status = 13;
696
697 if (byte == 't') {
698 status = 7;
699 inside_index = 1;
700 }
701 break;
702 case 1:
703 if (PDFCharIsWhitespace(byte)) {
704 break;
705 } else if (std::isdigit(byte)) {
706 start_pos = pos + i;
707 status = 2;
708 objnum = FXSYS_toDecimalDigit(byte);
709 } else if (byte == 't') {
710 status = 7;
711 inside_index = 1;
712 } else if (byte == 'x') {
713 status = 8;
714 inside_index = 1;
715 } else {
716 --i;
717 status = 0;
718 }
719 break;
720 case 2:
721 if (std::isdigit(byte)) {
722 objnum = objnum * 10 + FXSYS_toDecimalDigit(byte);
723 break;
724 } else if (PDFCharIsWhitespace(byte)) {
725 status = 3;
726 } else {
727 --i;
728 status = 14;
729 inside_index = 0;
730 }
731 break;
732 case 3:
733 if (std::isdigit(byte)) {
734 start_pos1 = pos + i;
735 status = 4;
736 gennum = FXSYS_toDecimalDigit(byte);
737 } else if (PDFCharIsWhitespace(byte)) {
738 break;
739 } else if (byte == 't') {
740 status = 7;
741 inside_index = 1;
742 } else {
743 --i;
744 status = 0;
745 }
746 break;
747 case 4:
748 if (std::isdigit(byte)) {
749 gennum = gennum * 10 + FXSYS_toDecimalDigit(byte);
750 break;
751 } else if (PDFCharIsWhitespace(byte)) {
752 status = 5;
753 } else {
754 --i;
755 status = 0;
756 }
757 break;
758 case 5:
759 if (byte == 'o') {
760 status = 6;
761 inside_index = 1;
762 } else if (PDFCharIsWhitespace(byte)) {
763 break;
764 } else if (std::isdigit(byte)) {
765 objnum = gennum;
766 gennum = FXSYS_toDecimalDigit(byte);
767 start_pos = start_pos1;
768 start_pos1 = pos + i;
769 status = 4;
770 } else if (byte == 't') {
771 status = 7;
772 inside_index = 1;
773 } else {
774 --i;
775 status = 0;
776 }
777 break;
778 case 6:
779 switch (inside_index) {
780 case 1:
781 if (byte != 'b') {
782 --i;
783 status = 0;
784 } else {
785 inside_index++;
786 }
787 break;
788 case 2:
789 if (byte != 'j') {
790 --i;
791 status = 0;
792 } else {
793 inside_index++;
794 }
795 break;
796 case 3:
797 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
798 if (objnum > 0x1000000) {
799 status = 0;
800 break;
801 }
802 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset;
803 last_obj = start_pos;
804 void* pResult =
805 FXSYS_bsearch(&obj_pos, m_SortedOffset.GetData(),
806 m_SortedOffset.GetSize(), sizeof(FX_FILESIZE),
807 CompareFileSize);
808 if (!pResult) {
809 m_SortedOffset.Add(obj_pos);
810 }
811 FX_FILESIZE obj_end = 0;
812 CPDF_Object* pObject = ParseIndirectObjectAtByStrict(
813 m_pDocument, obj_pos, objnum, NULL, &obj_end);
814 if (CPDF_Stream* pStream = ToStream(pObject)) {
815 if (CPDF_Dictionary* pDict = pStream->GetDict()) {
816 if ((pDict->KeyExist("Type")) &&
817 (pDict->GetString("Type") == "XRef" &&
818 pDict->KeyExist("Size"))) {
819 CPDF_Object* pRoot = pDict->GetElement("Root");
820 if (pRoot && pRoot->GetDict() &&
821 pRoot->GetDict()->GetElement("Pages")) {
822 if (m_pTrailer)
823 m_pTrailer->Release();
824 m_pTrailer = ToDictionary(pDict->Clone());
825 }
826 }
827 }
828 }
829 FX_FILESIZE offset = 0;
830 m_Syntax.RestorePos(obj_pos);
831 offset = m_Syntax.FindTag("obj", 0);
832 if (offset == -1) {
833 offset = 0;
834 } else {
835 offset += 3;
836 }
837 FX_FILESIZE nLen = obj_end - obj_pos - offset;
838 if ((FX_DWORD)nLen > size - i) {
839 pos = obj_end + m_Syntax.m_HeaderOffset;
840 bOverFlow = true;
841 } else {
842 i += (FX_DWORD)nLen;
843 }
844 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&
845 m_ObjectInfo[objnum].pos) {
846 if (pObject) {
847 FX_DWORD oldgen = m_ObjVersion.GetAt(objnum);
848 m_ObjectInfo[objnum].pos = obj_pos;
849 m_ObjVersion.SetAt(objnum, (int16_t)gennum);
850 if (oldgen != gennum) {
851 m_bVersionUpdated = TRUE;
852 }
853 }
854 } else {
855 m_ObjectInfo[objnum].pos = obj_pos;
856 m_V5Type.SetAtGrow(objnum, 1);
857 m_ObjVersion.SetAtGrow(objnum, (int16_t)gennum);
858 }
859 if (pObject) {
860 pObject->Release();
861 }
862 }
863 --i;
864 status = 0;
865 break;
866 }
867 break;
868 case 7:
869 if (inside_index == 7) {
870 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
871 last_trailer = pos + i - 7;
872 m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset);
873 CPDF_Object* pObj =
874 m_Syntax.GetObject(m_pDocument, 0, 0, nullptr, true);
875 if (pObj) {
876 if (!pObj->IsDictionary() && !pObj->AsStream()) {
877 pObj->Release();
878 } else {
879 CPDF_Stream* pStream = pObj->AsStream();
880 if (CPDF_Dictionary* pTrailer =
881 pStream ? pStream->GetDict() : pObj->AsDictionary()) {
882 if (m_pTrailer) {
883 CPDF_Object* pRoot = pTrailer->GetElement("Root");
884 CPDF_Reference* pRef = ToReference(pRoot);
885 if (!pRoot ||
886 (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) &&
887 m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) {
888 auto it = pTrailer->begin();
889 while (it != pTrailer->end()) {
890 const CFX_ByteString& key = it->first;
891 CPDF_Object* pElement = it->second;
892 ++it;
893 FX_DWORD dwObjNum =
894 pElement ? pElement->GetObjNum() : 0;
895 if (dwObjNum) {
896 m_pTrailer->SetAtReference(key, m_pDocument,
897 dwObjNum);
898 } else {
899 m_pTrailer->SetAt(key, pElement->Clone());
900 }
901 }
902 pObj->Release();
903 } else {
904 pObj->Release();
905 }
906 } else {
907 if (pObj->IsStream()) {
908 m_pTrailer = ToDictionary(pTrailer->Clone());
909 pObj->Release();
910 } else {
911 m_pTrailer = pTrailer;
912 }
913 FX_FILESIZE dwSavePos = m_Syntax.SavePos();
914 CFX_ByteString strWord = m_Syntax.GetKeyword();
915 if (!strWord.Compare("startxref")) {
916 bool bNumber;
917 CFX_ByteString bsOffset =
918 m_Syntax.GetNextWord(&bNumber);
919 if (bNumber) {
920 m_LastXRefOffset = FXSYS_atoi(bsOffset);
921 }
922 }
923 m_Syntax.RestorePos(dwSavePos);
924 }
925 } else {
926 pObj->Release();
927 }
928 }
929 }
930 }
931 --i;
932 status = 0;
933 } else if (byte == "trailer"[inside_index]) {
934 inside_index++;
935 } else {
936 --i;
937 status = 0;
938 }
939 break;
940 case 8:
941 if (inside_index == 4) {
942 last_xref = pos + i - 4;
943 status = 1;
944 } else if (byte == "xref"[inside_index]) {
945 inside_index++;
946 } else {
947 --i;
948 status = 0;
949 }
950 break;
951 case 9:
952 if (byte == '\r' || byte == '\n') {
953 status = 0;
954 }
955 break;
956 case 10:
957 if (byte == ')') {
958 if (depth > 0) {
959 depth--;
960 }
961 } else if (byte == '(') {
962 depth++;
963 }
964 if (!depth) {
965 status = 0;
966 }
967 break;
968 case 11:
969 if (byte == '>' || (byte == '<' && inside_index == 1))
970 status = 0;
971 inside_index = 0;
972 break;
973 case 13:
974 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
975 --i;
976 status = 0;
977 }
978 break;
979 case 14:
980 if (PDFCharIsWhitespace(byte)) {
981 status = 0;
982 } else if (byte == '%' || byte == '(' || byte == '<' ||
983 byte == '\\') {
984 status = 0;
985 --i;
986 } else if (inside_index == 6) {
987 status = 0;
988 --i;
989 } else if (byte == "endobj"[inside_index]) {
990 inside_index++;
991 }
992 break;
993 }
994 if (bOverFlow) {
995 size = 0;
996 break;
997 }
998 }
999 pos += size;
1000
1001 // If the position has not changed at all in a loop iteration, then break
1002 // out to prevent infinite looping.
1003 if (pos == saved_pos)
1004 break;
1005 }
1006 if (last_xref != -1 && last_xref > last_obj) {
1007 last_trailer = last_xref;
1008 } else if (last_trailer == -1 || last_xref < last_obj) {
1009 last_trailer = m_Syntax.m_FileLen;
1010 }
1011 FX_FILESIZE offset = last_trailer - m_Syntax.m_HeaderOffset;
1012 void* pResult =
1013 FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
1014 sizeof(FX_FILESIZE), CompareFileSize);
1015 if (!pResult) {
1016 m_SortedOffset.Add(offset);
1017 }
1018 return m_pTrailer && !m_ObjectInfo.empty();
1019 }
1020
LoadCrossRefV5(FX_FILESIZE * pos,FX_BOOL bMainXRef)1021 FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) {
1022 CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0, nullptr);
1023 if (!pObject)
1024 return FALSE;
1025 if (m_pDocument) {
1026 FX_BOOL bInserted = FALSE;
1027 CPDF_Dictionary* pDict = m_pDocument->GetRoot();
1028 if (!pDict || pDict->GetObjNum() != pObject->m_ObjNum) {
1029 bInserted = m_pDocument->InsertIndirectObject(pObject->m_ObjNum, pObject);
1030 } else {
1031 if (pObject->IsStream())
1032 pObject->Release();
1033 }
1034 if (!bInserted)
1035 return FALSE;
1036 }
1037
1038 CPDF_Stream* pStream = pObject->AsStream();
1039 if (!pStream)
1040 return FALSE;
1041
1042 *pos = pStream->GetDict()->GetInteger("Prev");
1043 int32_t size = pStream->GetDict()->GetInteger("Size");
1044 if (size < 0) {
1045 pStream->Release();
1046 return FALSE;
1047 }
1048 if (bMainXRef) {
1049 m_pTrailer = ToDictionary(pStream->GetDict()->Clone());
1050 ShrinkObjectMap(size);
1051 if (m_V5Type.SetSize(size)) {
1052 FXSYS_memset(m_V5Type.GetData(), 0, size);
1053 }
1054 } else {
1055 m_Trailers.Add(ToDictionary(pStream->GetDict()->Clone()));
1056 }
1057 std::vector<std::pair<int32_t, int32_t> > arrIndex;
1058 CPDF_Array* pArray = pStream->GetDict()->GetArray("Index");
1059 if (pArray) {
1060 FX_DWORD nPairSize = pArray->GetCount() / 2;
1061 for (FX_DWORD i = 0; i < nPairSize; i++) {
1062 CPDF_Object* pStartNumObj = pArray->GetElement(i * 2);
1063 CPDF_Object* pCountObj = pArray->GetElement(i * 2 + 1);
1064 if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) {
1065 int nStartNum = pStartNumObj->GetInteger();
1066 int nCount = pCountObj->GetInteger();
1067 if (nStartNum >= 0 && nCount > 0) {
1068 arrIndex.push_back(std::make_pair(nStartNum, nCount));
1069 }
1070 }
1071 }
1072 }
1073 if (arrIndex.size() == 0) {
1074 arrIndex.push_back(std::make_pair(0, size));
1075 }
1076 pArray = pStream->GetDict()->GetArray("W");
1077 if (!pArray) {
1078 pStream->Release();
1079 return FALSE;
1080 }
1081 CFX_DWordArray WidthArray;
1082 FX_SAFE_DWORD dwAccWidth = 0;
1083 for (FX_DWORD i = 0; i < pArray->GetCount(); i++) {
1084 WidthArray.Add(pArray->GetInteger(i));
1085 dwAccWidth += WidthArray[i];
1086 }
1087 if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) {
1088 pStream->Release();
1089 return FALSE;
1090 }
1091 FX_DWORD totalWidth = dwAccWidth.ValueOrDie();
1092 CPDF_StreamAcc acc;
1093 acc.LoadAllData(pStream);
1094 const uint8_t* pData = acc.GetData();
1095 FX_DWORD dwTotalSize = acc.GetSize();
1096 FX_DWORD segindex = 0;
1097 for (FX_DWORD i = 0; i < arrIndex.size(); i++) {
1098 int32_t startnum = arrIndex[i].first;
1099 if (startnum < 0) {
1100 continue;
1101 }
1102 m_dwXrefStartObjNum =
1103 pdfium::base::checked_cast<FX_DWORD, int32_t>(startnum);
1104 FX_DWORD count =
1105 pdfium::base::checked_cast<FX_DWORD, int32_t>(arrIndex[i].second);
1106 FX_SAFE_DWORD dwCaculatedSize = segindex;
1107 dwCaculatedSize += count;
1108 dwCaculatedSize *= totalWidth;
1109 if (!dwCaculatedSize.IsValid() ||
1110 dwCaculatedSize.ValueOrDie() > dwTotalSize) {
1111 continue;
1112 }
1113 const uint8_t* segstart = pData + segindex * totalWidth;
1114 FX_SAFE_DWORD dwMaxObjNum = startnum;
1115 dwMaxObjNum += count;
1116 FX_DWORD dwV5Size =
1117 pdfium::base::checked_cast<FX_DWORD, int32_t>(m_V5Type.GetSize());
1118 if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) {
1119 continue;
1120 }
1121 for (FX_DWORD j = 0; j < count; j++) {
1122 int32_t type = 1;
1123 const uint8_t* entrystart = segstart + j * totalWidth;
1124 if (WidthArray[0]) {
1125 type = GetVarInt(entrystart, WidthArray[0]);
1126 }
1127 if (m_V5Type[startnum + j] == 255) {
1128 FX_FILESIZE offset =
1129 GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1130 m_ObjectInfo[startnum + j].pos = offset;
1131 void* pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(),
1132 m_SortedOffset.GetSize(),
1133 sizeof(FX_FILESIZE), CompareFileSize);
1134 if (!pResult) {
1135 m_SortedOffset.Add(offset);
1136 }
1137 continue;
1138 }
1139 if (m_V5Type[startnum + j]) {
1140 continue;
1141 }
1142 m_V5Type[startnum + j] = type;
1143 if (type == 0) {
1144 m_ObjectInfo[startnum + j].pos = 0;
1145 } else {
1146 FX_FILESIZE offset =
1147 GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1148 m_ObjectInfo[startnum + j].pos = offset;
1149 if (type == 1) {
1150 void* pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(),
1151 m_SortedOffset.GetSize(),
1152 sizeof(FX_FILESIZE), CompareFileSize);
1153 if (!pResult) {
1154 m_SortedOffset.Add(offset);
1155 }
1156 } else {
1157 if (offset < 0 || offset >= m_V5Type.GetSize()) {
1158 pStream->Release();
1159 return FALSE;
1160 }
1161 m_V5Type[offset] = 255;
1162 }
1163 }
1164 }
1165 segindex += count;
1166 }
1167 pStream->Release();
1168 return TRUE;
1169 }
GetIDArray()1170 CPDF_Array* CPDF_Parser::GetIDArray() {
1171 CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetElement("ID") : NULL;
1172 if (!pID)
1173 return nullptr;
1174
1175 if (CPDF_Reference* pRef = pID->AsReference()) {
1176 pID = ParseIndirectObject(nullptr, pRef->GetRefObjNum());
1177 m_pTrailer->SetAt("ID", pID);
1178 }
1179 return ToArray(pID);
1180 }
GetRootObjNum()1181 FX_DWORD CPDF_Parser::GetRootObjNum() {
1182 CPDF_Reference* pRef =
1183 ToReference(m_pTrailer ? m_pTrailer->GetElement("Root") : nullptr);
1184 return pRef ? pRef->GetRefObjNum() : 0;
1185 }
GetInfoObjNum()1186 FX_DWORD CPDF_Parser::GetInfoObjNum() {
1187 CPDF_Reference* pRef =
1188 ToReference(m_pTrailer ? m_pTrailer->GetElement("Info") : nullptr);
1189 return pRef ? pRef->GetRefObjNum() : 0;
1190 }
IsFormStream(FX_DWORD objnum,FX_BOOL & bForm)1191 FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm) {
1192 bForm = FALSE;
1193 if (!IsValidObjectNumber(objnum))
1194 return TRUE;
1195 if (m_V5Type[objnum] == 0)
1196 return TRUE;
1197 if (m_V5Type[objnum] == 2)
1198 return TRUE;
1199 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1200 void* pResult =
1201 FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
1202 sizeof(FX_FILESIZE), CompareFileSize);
1203 if (!pResult) {
1204 return TRUE;
1205 }
1206 if ((FX_FILESIZE*)pResult - (FX_FILESIZE*)m_SortedOffset.GetData() ==
1207 m_SortedOffset.GetSize() - 1) {
1208 return FALSE;
1209 }
1210 FX_FILESIZE size = ((FX_FILESIZE*)pResult)[1] - pos;
1211 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1212 m_Syntax.RestorePos(pos);
1213 const char kFormStream[] = "/Form\0stream";
1214 const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1);
1215 bForm = m_Syntax.SearchMultiWord(kFormStreamStr, TRUE, size) == 0;
1216 m_Syntax.RestorePos(SavedPos);
1217 return TRUE;
1218 }
1219
ParseIndirectObject(CPDF_IndirectObjectHolder * pObjList,FX_DWORD objnum,PARSE_CONTEXT * pContext)1220 CPDF_Object* CPDF_Parser::ParseIndirectObject(
1221 CPDF_IndirectObjectHolder* pObjList,
1222 FX_DWORD objnum,
1223 PARSE_CONTEXT* pContext) {
1224 if (!IsValidObjectNumber(objnum))
1225 return nullptr;
1226
1227 // Prevent circular parsing the same object.
1228 if (pdfium::ContainsKey(m_ParsingObjNums, objnum))
1229 return nullptr;
1230 ScopedSetInsertion<FX_DWORD> local_insert(&m_ParsingObjNums, objnum);
1231
1232 if (m_V5Type[objnum] == 1 || m_V5Type[objnum] == 255) {
1233 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1234 if (pos <= 0)
1235 return nullptr;
1236 return ParseIndirectObjectAt(pObjList, pos, objnum, pContext);
1237 }
1238 if (m_V5Type[objnum] != 2)
1239 return nullptr;
1240
1241 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
1242 if (!pObjStream)
1243 return nullptr;
1244
1245 ScopedFileStream file(FX_CreateMemoryStream(
1246 (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE));
1247 CPDF_SyntaxParser syntax;
1248 syntax.InitParser(file.get(), 0);
1249 const int32_t offset = GetStreamFirst(pObjStream);
1250
1251 // Read object numbers from |pObjStream| into a cache.
1252 if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) {
1253 for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) {
1254 FX_DWORD thisnum = syntax.GetDirectNum();
1255 FX_DWORD thisoff = syntax.GetDirectNum();
1256 m_ObjCache[pObjStream][thisnum] = thisoff;
1257 }
1258 }
1259
1260 const auto it = m_ObjCache[pObjStream].find(objnum);
1261 if (it == m_ObjCache[pObjStream].end())
1262 return nullptr;
1263
1264 syntax.RestorePos(offset + it->second);
1265 return syntax.GetObject(pObjList, 0, 0, pContext, true);
1266 }
1267
GetObjectStream(FX_DWORD objnum)1268 CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum) {
1269 CPDF_StreamAcc* pStreamAcc = nullptr;
1270 if (m_ObjectStreamMap.Lookup((void*)(uintptr_t)objnum, (void*&)pStreamAcc))
1271 return pStreamAcc;
1272
1273 const CPDF_Stream* pStream = ToStream(
1274 m_pDocument ? m_pDocument->GetIndirectObject(objnum, nullptr) : nullptr);
1275 if (!pStream)
1276 return nullptr;
1277
1278 pStreamAcc = new CPDF_StreamAcc;
1279 pStreamAcc->LoadAllData(pStream);
1280 m_ObjectStreamMap.SetAt((void*)(uintptr_t)objnum, pStreamAcc);
1281 return pStreamAcc;
1282 }
1283
GetObjectSize(FX_DWORD objnum) const1284 FX_FILESIZE CPDF_Parser::GetObjectSize(FX_DWORD objnum) const {
1285 if (!IsValidObjectNumber(objnum))
1286 return 0;
1287
1288 if (m_V5Type[objnum] == 2)
1289 objnum = GetObjectPositionOrZero(objnum);
1290
1291 if (m_V5Type[objnum] == 1 || m_V5Type[objnum] == 255) {
1292 FX_FILESIZE offset = GetObjectPositionOrZero(objnum);
1293 if (offset == 0)
1294 return 0;
1295
1296 FX_FILESIZE* pResult = static_cast<FX_FILESIZE*>(FXSYS_bsearch(
1297 &offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
1298 sizeof(FX_FILESIZE), CompareFileSize));
1299 if (!pResult)
1300 return 0;
1301
1302 if (pResult - m_SortedOffset.GetData() == m_SortedOffset.GetSize() - 1)
1303 return 0;
1304
1305 return pResult[1] - offset;
1306 }
1307 return 0;
1308 }
1309
GetIndirectBinary(FX_DWORD objnum,uint8_t * & pBuffer,FX_DWORD & size)1310 void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum,
1311 uint8_t*& pBuffer,
1312 FX_DWORD& size) {
1313 pBuffer = NULL;
1314 size = 0;
1315 if (!IsValidObjectNumber(objnum))
1316 return;
1317
1318 if (m_V5Type[objnum] == 2) {
1319 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
1320 if (!pObjStream)
1321 return;
1322
1323 int32_t offset = GetStreamFirst(pObjStream);
1324 const uint8_t* pData = pObjStream->GetData();
1325 FX_DWORD totalsize = pObjStream->GetSize();
1326 ScopedFileStream file(
1327 FX_CreateMemoryStream((uint8_t*)pData, (size_t)totalsize, FALSE));
1328 CPDF_SyntaxParser syntax;
1329 syntax.InitParser(file.get(), 0);
1330 for (int i = GetStreamNCount(pObjStream); i > 0; --i) {
1331 FX_DWORD thisnum = syntax.GetDirectNum();
1332 FX_DWORD thisoff = syntax.GetDirectNum();
1333 if (thisnum != objnum)
1334 continue;
1335
1336 if (i == 1) {
1337 size = totalsize - (thisoff + offset);
1338 } else {
1339 syntax.GetDirectNum(); // Skip nextnum.
1340 FX_DWORD nextoff = syntax.GetDirectNum();
1341 size = nextoff - thisoff;
1342 }
1343 pBuffer = FX_Alloc(uint8_t, size);
1344 FXSYS_memcpy(pBuffer, pData + thisoff + offset, size);
1345 return;
1346 }
1347 return;
1348 }
1349
1350 if (m_V5Type[objnum] != 1)
1351 return;
1352
1353 FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
1354 if (pos == 0) {
1355 return;
1356 }
1357 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1358 m_Syntax.RestorePos(pos);
1359 bool bIsNumber;
1360 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
1361 if (!bIsNumber) {
1362 m_Syntax.RestorePos(SavedPos);
1363 return;
1364 }
1365 FX_DWORD parser_objnum = FXSYS_atoi(word);
1366 if (parser_objnum && parser_objnum != objnum) {
1367 m_Syntax.RestorePos(SavedPos);
1368 return;
1369 }
1370 word = m_Syntax.GetNextWord(&bIsNumber);
1371 if (!bIsNumber) {
1372 m_Syntax.RestorePos(SavedPos);
1373 return;
1374 }
1375 if (m_Syntax.GetKeyword() != "obj") {
1376 m_Syntax.RestorePos(SavedPos);
1377 return;
1378 }
1379 void* pResult =
1380 FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
1381 sizeof(FX_FILESIZE), CompareFileSize);
1382 if (!pResult) {
1383 m_Syntax.RestorePos(SavedPos);
1384 return;
1385 }
1386 FX_FILESIZE nextoff = ((FX_FILESIZE*)pResult)[1];
1387 FX_BOOL bNextOffValid = FALSE;
1388 if (nextoff != pos) {
1389 m_Syntax.RestorePos(nextoff);
1390 word = m_Syntax.GetNextWord(&bIsNumber);
1391 if (word == "xref") {
1392 bNextOffValid = TRUE;
1393 } else if (bIsNumber) {
1394 word = m_Syntax.GetNextWord(&bIsNumber);
1395 if (bIsNumber && m_Syntax.GetKeyword() == "obj") {
1396 bNextOffValid = TRUE;
1397 }
1398 }
1399 }
1400 if (!bNextOffValid) {
1401 m_Syntax.RestorePos(pos);
1402 while (1) {
1403 if (m_Syntax.GetKeyword() == "endobj") {
1404 break;
1405 }
1406 if (m_Syntax.SavePos() == m_Syntax.m_FileLen) {
1407 break;
1408 }
1409 }
1410 nextoff = m_Syntax.SavePos();
1411 }
1412 size = (FX_DWORD)(nextoff - pos);
1413 pBuffer = FX_Alloc(uint8_t, size);
1414 m_Syntax.RestorePos(pos);
1415 m_Syntax.ReadBlock(pBuffer, size);
1416 m_Syntax.RestorePos(SavedPos);
1417 }
1418
ParseIndirectObjectAt(CPDF_IndirectObjectHolder * pObjList,FX_FILESIZE pos,FX_DWORD objnum,PARSE_CONTEXT * pContext)1419 CPDF_Object* CPDF_Parser::ParseIndirectObjectAt(
1420 CPDF_IndirectObjectHolder* pObjList,
1421 FX_FILESIZE pos,
1422 FX_DWORD objnum,
1423 PARSE_CONTEXT* pContext) {
1424 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1425 m_Syntax.RestorePos(pos);
1426 bool bIsNumber;
1427 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
1428 if (!bIsNumber) {
1429 m_Syntax.RestorePos(SavedPos);
1430 return NULL;
1431 }
1432 FX_FILESIZE objOffset = m_Syntax.SavePos();
1433 objOffset -= word.GetLength();
1434 FX_DWORD parser_objnum = FXSYS_atoi(word);
1435 if (objnum && parser_objnum != objnum) {
1436 m_Syntax.RestorePos(SavedPos);
1437 return NULL;
1438 }
1439 word = m_Syntax.GetNextWord(&bIsNumber);
1440 if (!bIsNumber) {
1441 m_Syntax.RestorePos(SavedPos);
1442 return NULL;
1443 }
1444 FX_DWORD parser_gennum = FXSYS_atoi(word);
1445 if (m_Syntax.GetKeyword() != "obj") {
1446 m_Syntax.RestorePos(SavedPos);
1447 return NULL;
1448 }
1449 CPDF_Object* pObj =
1450 m_Syntax.GetObject(pObjList, objnum, parser_gennum, pContext, true);
1451 m_Syntax.SavePos();
1452 CFX_ByteString bsWord = m_Syntax.GetKeyword();
1453 if (bsWord == "endobj") {
1454 m_Syntax.SavePos();
1455 }
1456 m_Syntax.RestorePos(SavedPos);
1457 if (pObj) {
1458 if (!objnum)
1459 pObj->m_ObjNum = parser_objnum;
1460 pObj->m_GenNum = parser_gennum;
1461 }
1462 return pObj;
1463 }
ParseIndirectObjectAtByStrict(CPDF_IndirectObjectHolder * pObjList,FX_FILESIZE pos,FX_DWORD objnum,PARSE_CONTEXT * pContext,FX_FILESIZE * pResultPos)1464 CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict(
1465 CPDF_IndirectObjectHolder* pObjList,
1466 FX_FILESIZE pos,
1467 FX_DWORD objnum,
1468 PARSE_CONTEXT* pContext,
1469 FX_FILESIZE* pResultPos) {
1470 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1471 m_Syntax.RestorePos(pos);
1472 bool bIsNumber;
1473 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
1474 if (!bIsNumber) {
1475 m_Syntax.RestorePos(SavedPos);
1476 return NULL;
1477 }
1478 FX_DWORD parser_objnum = FXSYS_atoi(word);
1479 if (objnum && parser_objnum != objnum) {
1480 m_Syntax.RestorePos(SavedPos);
1481 return NULL;
1482 }
1483 word = m_Syntax.GetNextWord(&bIsNumber);
1484 if (!bIsNumber) {
1485 m_Syntax.RestorePos(SavedPos);
1486 return NULL;
1487 }
1488 FX_DWORD gennum = FXSYS_atoi(word);
1489 if (m_Syntax.GetKeyword() != "obj") {
1490 m_Syntax.RestorePos(SavedPos);
1491 return NULL;
1492 }
1493 CPDF_Object* pObj =
1494 m_Syntax.GetObjectByStrict(pObjList, objnum, gennum, pContext);
1495 if (pResultPos) {
1496 *pResultPos = m_Syntax.m_Pos;
1497 }
1498 m_Syntax.RestorePos(SavedPos);
1499 return pObj;
1500 }
1501
LoadTrailerV4()1502 CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() {
1503 if (m_Syntax.GetKeyword() != "trailer")
1504 return nullptr;
1505
1506 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj(
1507 m_Syntax.GetObject(m_pDocument, 0, 0, nullptr, true));
1508 if (!ToDictionary(pObj.get()))
1509 return nullptr;
1510 return pObj.release()->AsDictionary();
1511 }
1512
GetPermissions(FX_BOOL bCheckRevision)1513 FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) {
1514 if (!m_pSecurityHandler) {
1515 return (FX_DWORD)-1;
1516 }
1517 FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions();
1518 if (m_pEncryptDict && m_pEncryptDict->GetString("Filter") == "Standard") {
1519 dwPermission &= 0xFFFFFFFC;
1520 dwPermission |= 0xFFFFF0C0;
1521 if (bCheckRevision && m_pEncryptDict->GetInteger("R") == 2) {
1522 dwPermission &= 0xFFFFF0FF;
1523 }
1524 }
1525 return dwPermission;
1526 }
IsOwner()1527 FX_BOOL CPDF_Parser::IsOwner() {
1528 return !m_pSecurityHandler || m_pSecurityHandler->IsOwner();
1529 }
SetSecurityHandler(CPDF_SecurityHandler * pSecurityHandler,FX_BOOL bForced)1530 void CPDF_Parser::SetSecurityHandler(CPDF_SecurityHandler* pSecurityHandler,
1531 FX_BOOL bForced) {
1532 m_bForceUseSecurityHandler = bForced;
1533 m_pSecurityHandler.reset(pSecurityHandler);
1534 if (m_bForceUseSecurityHandler) {
1535 return;
1536 }
1537 m_Syntax.m_pCryptoHandler.reset(pSecurityHandler->CreateCryptoHandler());
1538 m_Syntax.m_pCryptoHandler->Init(NULL, pSecurityHandler);
1539 }
IsLinearizedFile(IFX_FileRead * pFileAccess,FX_DWORD offset)1540 FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess,
1541 FX_DWORD offset) {
1542 m_Syntax.InitParser(pFileAccess, offset);
1543 m_Syntax.RestorePos(m_Syntax.m_HeaderOffset + 9);
1544 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1545 bool bIsNumber;
1546 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber);
1547 if (!bIsNumber) {
1548 return FALSE;
1549 }
1550 FX_DWORD objnum = FXSYS_atoi(word);
1551 word = m_Syntax.GetNextWord(&bIsNumber);
1552 if (!bIsNumber) {
1553 return FALSE;
1554 }
1555 FX_DWORD gennum = FXSYS_atoi(word);
1556 if (m_Syntax.GetKeyword() != "obj") {
1557 m_Syntax.RestorePos(SavedPos);
1558 return FALSE;
1559 }
1560 m_pLinearized = m_Syntax.GetObject(nullptr, objnum, gennum, nullptr, true);
1561 if (!m_pLinearized) {
1562 return FALSE;
1563 }
1564
1565 CPDF_Dictionary* pDict = m_pLinearized->GetDict();
1566 if (pDict && pDict->GetElement("Linearized")) {
1567 m_Syntax.GetNextWord(nullptr);
1568
1569 CPDF_Object* pLen = pDict->GetElement("L");
1570 if (!pLen) {
1571 m_pLinearized->Release();
1572 m_pLinearized = NULL;
1573 return FALSE;
1574 }
1575 if (pLen->GetInteger() != (int)pFileAccess->GetSize()) {
1576 return FALSE;
1577 }
1578
1579 if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P")))
1580 m_dwFirstPageNo = pNo->GetInteger();
1581
1582 if (CPDF_Number* pTable = ToNumber(pDict->GetElement("T")))
1583 m_LastXRefOffset = pTable->GetInteger();
1584
1585 return TRUE;
1586 }
1587 m_pLinearized->Release();
1588 m_pLinearized = NULL;
1589 return FALSE;
1590 }
StartAsynParse(IFX_FileRead * pFileAccess,FX_BOOL bReParse,FX_BOOL bOwnFileRead)1591 FX_DWORD CPDF_Parser::StartAsynParse(IFX_FileRead* pFileAccess,
1592 FX_BOOL bReParse,
1593 FX_BOOL bOwnFileRead) {
1594 CloseParser(bReParse);
1595 m_bXRefStream = FALSE;
1596 m_LastXRefOffset = 0;
1597 m_bOwnFileRead = bOwnFileRead;
1598 int32_t offset = GetHeaderOffset(pFileAccess);
1599 if (offset == -1) {
1600 return PDFPARSE_ERROR_FORMAT;
1601 }
1602 if (!IsLinearizedFile(pFileAccess, offset)) {
1603 m_Syntax.m_pFileAccess = NULL;
1604 return StartParse(pFileAccess, bReParse, bOwnFileRead);
1605 }
1606 if (!bReParse) {
1607 m_pDocument = new CPDF_Document(this);
1608 }
1609 FX_FILESIZE dwFirstXRefOffset = m_Syntax.SavePos();
1610 FX_BOOL bXRefRebuilt = FALSE;
1611 FX_BOOL bLoadV4 = FALSE;
1612 if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE)) &&
1613 !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) {
1614 if (!RebuildCrossRef()) {
1615 return PDFPARSE_ERROR_FORMAT;
1616 }
1617 bXRefRebuilt = TRUE;
1618 m_LastXRefOffset = 0;
1619 }
1620 if (bLoadV4) {
1621 m_pTrailer = LoadTrailerV4();
1622 if (!m_pTrailer) {
1623 return PDFPARSE_ERROR_SUCCESS;
1624 }
1625
1626 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size");
1627 if (xrefsize > 0) {
1628 ShrinkObjectMap(xrefsize);
1629 m_V5Type.SetSize(xrefsize);
1630 }
1631 }
1632 FX_DWORD dwRet = SetEncryptHandler();
1633 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
1634 return dwRet;
1635 }
1636 m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
1637 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
1638 if (bXRefRebuilt) {
1639 return PDFPARSE_ERROR_FORMAT;
1640 }
1641 ReleaseEncryptHandler();
1642 if (!RebuildCrossRef()) {
1643 return PDFPARSE_ERROR_FORMAT;
1644 }
1645 dwRet = SetEncryptHandler();
1646 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
1647 return dwRet;
1648 }
1649 m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
1650 if (!m_pDocument->GetRoot()) {
1651 return PDFPARSE_ERROR_FORMAT;
1652 }
1653 }
1654 FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
1655 sizeof(FX_FILESIZE), CompareFileSize);
1656 if (GetRootObjNum() == 0) {
1657 ReleaseEncryptHandler();
1658 if (!RebuildCrossRef() || GetRootObjNum() == 0)
1659 return PDFPARSE_ERROR_FORMAT;
1660
1661 dwRet = SetEncryptHandler();
1662 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
1663 return dwRet;
1664 }
1665 }
1666 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1667 if (CPDF_Reference* pMetadata =
1668 ToReference(m_pDocument->GetRoot()->GetElement("Metadata")))
1669 m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum();
1670 }
1671 return PDFPARSE_ERROR_SUCCESS;
1672 }
LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos)1673 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
1674 if (!LoadCrossRefV5(&xrefpos, FALSE)) {
1675 return FALSE;
1676 }
1677 std::set<FX_FILESIZE> seen_xrefpos;
1678 while (xrefpos) {
1679 seen_xrefpos.insert(xrefpos);
1680 if (!LoadCrossRefV5(&xrefpos, FALSE)) {
1681 return FALSE;
1682 }
1683 // Check for circular references.
1684 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) {
1685 return FALSE;
1686 }
1687 }
1688 m_ObjectStreamMap.InitHashTable(101, FALSE);
1689 m_bXRefStream = TRUE;
1690 return TRUE;
1691 }
LoadLinearizedMainXRefTable()1692 FX_DWORD CPDF_Parser::LoadLinearizedMainXRefTable() {
1693 FX_DWORD dwSaveMetadataObjnum = m_Syntax.m_MetadataObjnum;
1694 m_Syntax.m_MetadataObjnum = 0;
1695 if (m_pTrailer) {
1696 m_pTrailer->Release();
1697 m_pTrailer = NULL;
1698 }
1699 m_Syntax.RestorePos(m_LastXRefOffset - m_Syntax.m_HeaderOffset);
1700 uint8_t ch = 0;
1701 FX_DWORD dwCount = 0;
1702 m_Syntax.GetNextChar(ch);
1703 while (PDFCharIsWhitespace(ch)) {
1704 ++dwCount;
1705 if (m_Syntax.m_FileLen >=
1706 (FX_FILESIZE)(m_Syntax.SavePos() + m_Syntax.m_HeaderOffset)) {
1707 break;
1708 }
1709 m_Syntax.GetNextChar(ch);
1710 }
1711 m_LastXRefOffset += dwCount;
1712 FX_POSITION pos = m_ObjectStreamMap.GetStartPosition();
1713 while (pos) {
1714 void* objnum;
1715 CPDF_StreamAcc* pStream;
1716 m_ObjectStreamMap.GetNextAssoc(pos, objnum, (void*&)pStream);
1717 delete pStream;
1718 }
1719 m_ObjectStreamMap.RemoveAll();
1720 m_ObjCache.clear();
1721
1722 if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) &&
1723 !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
1724 m_LastXRefOffset = 0;
1725 m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;
1726 return PDFPARSE_ERROR_FORMAT;
1727 }
1728 FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(),
1729 sizeof(FX_FILESIZE), CompareFileSize);
1730 m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;
1731 return PDFPARSE_ERROR_SUCCESS;
1732 }
1733
1734 // static
1735 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
1736
CPDF_SyntaxParser()1737 CPDF_SyntaxParser::CPDF_SyntaxParser() {
1738 m_pFileAccess = NULL;
1739 m_pFileBuf = NULL;
1740 m_BufSize = CPDF_ModuleMgr::kFileBufSize;
1741 m_pFileBuf = NULL;
1742 m_MetadataObjnum = 0;
1743 m_dwWordPos = 0;
1744 m_bFileStream = FALSE;
1745 }
1746
~CPDF_SyntaxParser()1747 CPDF_SyntaxParser::~CPDF_SyntaxParser() {
1748 FX_Free(m_pFileBuf);
1749 }
1750
GetCharAt(FX_FILESIZE pos,uint8_t & ch)1751 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
1752 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
1753 m_Pos = pos;
1754 return GetNextChar(ch);
1755 }
1756
GetNextChar(uint8_t & ch)1757 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
1758 FX_FILESIZE pos = m_Pos + m_HeaderOffset;
1759 if (pos >= m_FileLen) {
1760 return FALSE;
1761 }
1762 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
1763 FX_FILESIZE read_pos = pos;
1764 FX_DWORD read_size = m_BufSize;
1765 if ((FX_FILESIZE)read_size > m_FileLen) {
1766 read_size = (FX_DWORD)m_FileLen;
1767 }
1768 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
1769 if (m_FileLen < (FX_FILESIZE)read_size) {
1770 read_pos = 0;
1771 read_size = (FX_DWORD)m_FileLen;
1772 } else {
1773 read_pos = m_FileLen - read_size;
1774 }
1775 }
1776 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) {
1777 return FALSE;
1778 }
1779 m_BufOffset = read_pos;
1780 }
1781 ch = m_pFileBuf[pos - m_BufOffset];
1782 m_Pos++;
1783 return TRUE;
1784 }
GetCharAtBackward(FX_FILESIZE pos,uint8_t & ch)1785 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {
1786 pos += m_HeaderOffset;
1787 if (pos >= m_FileLen) {
1788 return FALSE;
1789 }
1790 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
1791 FX_FILESIZE read_pos;
1792 if (pos < (FX_FILESIZE)m_BufSize) {
1793 read_pos = 0;
1794 } else {
1795 read_pos = pos - m_BufSize + 1;
1796 }
1797 FX_DWORD read_size = m_BufSize;
1798 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
1799 if (m_FileLen < (FX_FILESIZE)read_size) {
1800 read_pos = 0;
1801 read_size = (FX_DWORD)m_FileLen;
1802 } else {
1803 read_pos = m_FileLen - read_size;
1804 }
1805 }
1806 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) {
1807 return FALSE;
1808 }
1809 m_BufOffset = read_pos;
1810 }
1811 ch = m_pFileBuf[pos - m_BufOffset];
1812 return TRUE;
1813 }
ReadBlock(uint8_t * pBuf,FX_DWORD size)1814 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) {
1815 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) {
1816 return FALSE;
1817 }
1818 m_Pos += size;
1819 return TRUE;
1820 }
1821
GetNextWordInternal(bool * bIsNumber)1822 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
1823 m_WordSize = 0;
1824 if (bIsNumber)
1825 *bIsNumber = true;
1826 uint8_t ch;
1827 if (!GetNextChar(ch)) {
1828 return;
1829 }
1830 while (1) {
1831 while (PDFCharIsWhitespace(ch)) {
1832 if (!GetNextChar(ch))
1833 return;
1834 }
1835 if (ch != '%')
1836 break;
1837
1838 while (1) {
1839 if (!GetNextChar(ch))
1840 return;
1841 if (PDFCharIsLineEnding(ch))
1842 break;
1843 }
1844 }
1845
1846 if (PDFCharIsDelimiter(ch)) {
1847 if (bIsNumber)
1848 *bIsNumber = false;
1849 m_WordBuffer[m_WordSize++] = ch;
1850 if (ch == '/') {
1851 while (1) {
1852 if (!GetNextChar(ch))
1853 return;
1854
1855 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
1856 m_Pos--;
1857 return;
1858 }
1859
1860 if (m_WordSize < sizeof(m_WordBuffer) - 1)
1861 m_WordBuffer[m_WordSize++] = ch;
1862 }
1863 } else if (ch == '<') {
1864 if (!GetNextChar(ch))
1865 return;
1866 if (ch == '<')
1867 m_WordBuffer[m_WordSize++] = ch;
1868 else
1869 m_Pos--;
1870 } else if (ch == '>') {
1871 if (!GetNextChar(ch))
1872 return;
1873 if (ch == '>')
1874 m_WordBuffer[m_WordSize++] = ch;
1875 else
1876 m_Pos--;
1877 }
1878 return;
1879 }
1880
1881 while (1) {
1882 if (m_WordSize < sizeof(m_WordBuffer) - 1)
1883 m_WordBuffer[m_WordSize++] = ch;
1884
1885 if (!PDFCharIsNumeric(ch))
1886 if (bIsNumber)
1887 *bIsNumber = false;
1888 if (!GetNextChar(ch))
1889 return;
1890
1891 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
1892 m_Pos--;
1893 break;
1894 }
1895 }
1896 }
1897
ReadString()1898 CFX_ByteString CPDF_SyntaxParser::ReadString() {
1899 uint8_t ch;
1900 if (!GetNextChar(ch)) {
1901 return CFX_ByteString();
1902 }
1903 CFX_ByteTextBuf buf;
1904 int32_t parlevel = 0;
1905 int32_t status = 0, iEscCode = 0;
1906 while (1) {
1907 switch (status) {
1908 case 0:
1909 if (ch == ')') {
1910 if (parlevel == 0) {
1911 return buf.GetByteString();
1912 }
1913 parlevel--;
1914 buf.AppendChar(')');
1915 } else if (ch == '(') {
1916 parlevel++;
1917 buf.AppendChar('(');
1918 } else if (ch == '\\') {
1919 status = 1;
1920 } else {
1921 buf.AppendChar(ch);
1922 }
1923 break;
1924 case 1:
1925 if (ch >= '0' && ch <= '7') {
1926 iEscCode = FXSYS_toDecimalDigit(ch);
1927 status = 2;
1928 break;
1929 }
1930 if (ch == 'n') {
1931 buf.AppendChar('\n');
1932 } else if (ch == 'r') {
1933 buf.AppendChar('\r');
1934 } else if (ch == 't') {
1935 buf.AppendChar('\t');
1936 } else if (ch == 'b') {
1937 buf.AppendChar('\b');
1938 } else if (ch == 'f') {
1939 buf.AppendChar('\f');
1940 } else if (ch == '\r') {
1941 status = 4;
1942 break;
1943 } else if (ch == '\n') {
1944 } else {
1945 buf.AppendChar(ch);
1946 }
1947 status = 0;
1948 break;
1949 case 2:
1950 if (ch >= '0' && ch <= '7') {
1951 iEscCode = iEscCode * 8 + FXSYS_toDecimalDigit(ch);
1952 status = 3;
1953 } else {
1954 buf.AppendChar(iEscCode);
1955 status = 0;
1956 continue;
1957 }
1958 break;
1959 case 3:
1960 if (ch >= '0' && ch <= '7') {
1961 iEscCode = iEscCode * 8 + FXSYS_toDecimalDigit(ch);
1962 buf.AppendChar(iEscCode);
1963 status = 0;
1964 } else {
1965 buf.AppendChar(iEscCode);
1966 status = 0;
1967 continue;
1968 }
1969 break;
1970 case 4:
1971 status = 0;
1972 if (ch != '\n') {
1973 continue;
1974 }
1975 break;
1976 }
1977 if (!GetNextChar(ch)) {
1978 break;
1979 }
1980 }
1981 GetNextChar(ch);
1982 return buf.GetByteString();
1983 }
ReadHexString()1984 CFX_ByteString CPDF_SyntaxParser::ReadHexString() {
1985 uint8_t ch;
1986 if (!GetNextChar(ch))
1987 return CFX_ByteString();
1988
1989 CFX_BinaryBuf buf;
1990 bool bFirst = true;
1991 uint8_t code = 0;
1992 while (1) {
1993 if (ch == '>')
1994 break;
1995
1996 if (std::isxdigit(ch)) {
1997 int val = FXSYS_toHexDigit(ch);
1998 if (bFirst) {
1999 code = val * 16;
2000 } else {
2001 code += val;
2002 buf.AppendByte((uint8_t)code);
2003 }
2004 bFirst = !bFirst;
2005 }
2006
2007 if (!GetNextChar(ch))
2008 break;
2009 }
2010 if (!bFirst)
2011 buf.AppendByte((uint8_t)code);
2012
2013 return buf.GetByteString();
2014 }
ToNextLine()2015 void CPDF_SyntaxParser::ToNextLine() {
2016 uint8_t ch;
2017 while (GetNextChar(ch)) {
2018 if (ch == '\n') {
2019 break;
2020 }
2021 if (ch == '\r') {
2022 GetNextChar(ch);
2023 if (ch != '\n') {
2024 --m_Pos;
2025 }
2026 break;
2027 }
2028 }
2029 }
ToNextWord()2030 void CPDF_SyntaxParser::ToNextWord() {
2031 uint8_t ch;
2032 if (!GetNextChar(ch))
2033 return;
2034
2035 while (1) {
2036 while (PDFCharIsWhitespace(ch)) {
2037 m_dwWordPos = m_Pos;
2038 if (!GetNextChar(ch))
2039 return;
2040 }
2041
2042 if (ch != '%')
2043 break;
2044
2045 while (1) {
2046 if (!GetNextChar(ch))
2047 return;
2048 if (PDFCharIsLineEnding(ch))
2049 break;
2050 }
2051 }
2052 m_Pos--;
2053 }
2054
GetNextWord(bool * bIsNumber)2055 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
2056 GetNextWordInternal(bIsNumber);
2057 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);
2058 }
2059
GetKeyword()2060 CFX_ByteString CPDF_SyntaxParser::GetKeyword() {
2061 return GetNextWord(nullptr);
2062 }
2063
GetObject(CPDF_IndirectObjectHolder * pObjList,FX_DWORD objnum,FX_DWORD gennum,PARSE_CONTEXT * pContext,FX_BOOL bDecrypt)2064 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList,
2065 FX_DWORD objnum,
2066 FX_DWORD gennum,
2067 PARSE_CONTEXT* pContext,
2068 FX_BOOL bDecrypt) {
2069 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
2070 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) {
2071 return NULL;
2072 }
2073 FX_FILESIZE SavedPos = m_Pos;
2074 FX_BOOL bTypeOnly = pContext && (pContext->m_Flags & PDFPARSE_TYPEONLY);
2075 bool bIsNumber;
2076 CFX_ByteString word = GetNextWord(&bIsNumber);
2077 if (word.GetLength() == 0) {
2078 if (bTypeOnly)
2079 return (CPDF_Object*)PDFOBJ_INVALID;
2080 return NULL;
2081 }
2082 if (bIsNumber) {
2083 FX_FILESIZE SavedPos = m_Pos;
2084 CFX_ByteString nextword = GetNextWord(&bIsNumber);
2085 if (bIsNumber) {
2086 CFX_ByteString nextword2 = GetNextWord(nullptr);
2087 if (nextword2 == "R") {
2088 FX_DWORD objnum = FXSYS_atoi(word);
2089 if (bTypeOnly)
2090 return (CPDF_Object*)PDFOBJ_REFERENCE;
2091 return new CPDF_Reference(pObjList, objnum);
2092 }
2093 }
2094 m_Pos = SavedPos;
2095 if (bTypeOnly)
2096 return (CPDF_Object*)PDFOBJ_NUMBER;
2097 return new CPDF_Number(word);
2098 }
2099 if (word == "true" || word == "false") {
2100 if (bTypeOnly)
2101 return (CPDF_Object*)PDFOBJ_BOOLEAN;
2102 return new CPDF_Boolean(word == "true");
2103 }
2104 if (word == "null") {
2105 if (bTypeOnly)
2106 return (CPDF_Object*)PDFOBJ_NULL;
2107 return new CPDF_Null;
2108 }
2109 if (word == "(") {
2110 if (bTypeOnly)
2111 return (CPDF_Object*)PDFOBJ_STRING;
2112 CFX_ByteString str = ReadString();
2113 if (m_pCryptoHandler && bDecrypt) {
2114 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2115 }
2116 return new CPDF_String(str, FALSE);
2117 }
2118 if (word == "<") {
2119 if (bTypeOnly)
2120 return (CPDF_Object*)PDFOBJ_STRING;
2121 CFX_ByteString str = ReadHexString();
2122 if (m_pCryptoHandler && bDecrypt) {
2123 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2124 }
2125 return new CPDF_String(str, TRUE);
2126 }
2127 if (word == "[") {
2128 if (bTypeOnly)
2129 return (CPDF_Object*)PDFOBJ_ARRAY;
2130 CPDF_Array* pArray = new CPDF_Array;
2131 while (CPDF_Object* pObj =
2132 GetObject(pObjList, objnum, gennum, nullptr, true)) {
2133 pArray->Add(pObj);
2134 }
2135 return pArray;
2136 }
2137 if (word[0] == '/') {
2138 if (bTypeOnly)
2139 return (CPDF_Object*)PDFOBJ_NAME;
2140 return new CPDF_Name(
2141 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
2142 }
2143 if (word == "<<") {
2144 if (bTypeOnly)
2145 return (CPDF_Object*)PDFOBJ_DICTIONARY;
2146
2147 if (pContext)
2148 pContext->m_DictStart = SavedPos;
2149
2150 int32_t nKeys = 0;
2151 FX_FILESIZE dwSignValuePos = 0;
2152 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
2153 new CPDF_Dictionary);
2154 while (1) {
2155 CFX_ByteString key = GetNextWord(nullptr);
2156 if (key.IsEmpty())
2157 return nullptr;
2158
2159 FX_FILESIZE SavedPos = m_Pos - key.GetLength();
2160 if (key == ">>")
2161 break;
2162
2163 if (key == "endobj") {
2164 m_Pos = SavedPos;
2165 break;
2166 }
2167 if (key[0] != '/')
2168 continue;
2169
2170 ++nKeys;
2171 key = PDF_NameDecode(key);
2172 if (key.IsEmpty())
2173 continue;
2174
2175 if (key == "/Contents")
2176 dwSignValuePos = m_Pos;
2177
2178 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, nullptr, true);
2179 if (!pObj)
2180 continue;
2181
2182 CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1);
2183 pDict->SetAt(keyNoSlash, pObj);
2184 }
2185
2186 // Only when this is a signature dictionary and has contents, we reset the
2187 // contents to the un-decrypted form.
2188 if (IsSignatureDict(pDict.get()) && dwSignValuePos) {
2189 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
2190 m_Pos = dwSignValuePos;
2191 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, nullptr, FALSE);
2192 pDict->SetAt("Contents", pObj);
2193 }
2194 if (pContext) {
2195 pContext->m_DictEnd = m_Pos;
2196 if (pContext->m_Flags & PDFPARSE_NOSTREAM) {
2197 return pDict.release();
2198 }
2199 }
2200 FX_FILESIZE SavedPos = m_Pos;
2201 CFX_ByteString nextword = GetNextWord(nullptr);
2202 if (nextword != "stream") {
2203 m_Pos = SavedPos;
2204 return pDict.release();
2205 }
2206
2207 return ReadStream(pDict.release(), pContext, objnum, gennum);
2208 }
2209 if (word == ">>") {
2210 m_Pos = SavedPos;
2211 return nullptr;
2212 }
2213 if (bTypeOnly)
2214 return (CPDF_Object*)PDFOBJ_INVALID;
2215
2216 return nullptr;
2217 }
2218
GetObjectByStrict(CPDF_IndirectObjectHolder * pObjList,FX_DWORD objnum,FX_DWORD gennum,PARSE_CONTEXT * pContext)2219 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(
2220 CPDF_IndirectObjectHolder* pObjList,
2221 FX_DWORD objnum,
2222 FX_DWORD gennum,
2223 PARSE_CONTEXT* pContext) {
2224 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
2225 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) {
2226 return NULL;
2227 }
2228 FX_FILESIZE SavedPos = m_Pos;
2229 FX_BOOL bTypeOnly = pContext && (pContext->m_Flags & PDFPARSE_TYPEONLY);
2230 bool bIsNumber;
2231 CFX_ByteString word = GetNextWord(&bIsNumber);
2232 if (word.GetLength() == 0) {
2233 if (bTypeOnly)
2234 return (CPDF_Object*)PDFOBJ_INVALID;
2235 return nullptr;
2236 }
2237 if (bIsNumber) {
2238 FX_FILESIZE SavedPos = m_Pos;
2239 CFX_ByteString nextword = GetNextWord(&bIsNumber);
2240 if (bIsNumber) {
2241 CFX_ByteString nextword2 = GetNextWord(nullptr);
2242 if (nextword2 == "R") {
2243 if (bTypeOnly)
2244 return (CPDF_Object*)PDFOBJ_REFERENCE;
2245 FX_DWORD objnum = FXSYS_atoi(word);
2246 return new CPDF_Reference(pObjList, objnum);
2247 }
2248 }
2249 m_Pos = SavedPos;
2250 if (bTypeOnly)
2251 return (CPDF_Object*)PDFOBJ_NUMBER;
2252 return new CPDF_Number(word);
2253 }
2254 if (word == "true" || word == "false") {
2255 if (bTypeOnly)
2256 return (CPDF_Object*)PDFOBJ_BOOLEAN;
2257 return new CPDF_Boolean(word == "true");
2258 }
2259 if (word == "null") {
2260 if (bTypeOnly)
2261 return (CPDF_Object*)PDFOBJ_NULL;
2262 return new CPDF_Null;
2263 }
2264 if (word == "(") {
2265 if (bTypeOnly)
2266 return (CPDF_Object*)PDFOBJ_STRING;
2267 CFX_ByteString str = ReadString();
2268 if (m_pCryptoHandler)
2269 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2270 return new CPDF_String(str, FALSE);
2271 }
2272 if (word == "<") {
2273 if (bTypeOnly)
2274 return (CPDF_Object*)PDFOBJ_STRING;
2275 CFX_ByteString str = ReadHexString();
2276 if (m_pCryptoHandler)
2277 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2278 return new CPDF_String(str, TRUE);
2279 }
2280 if (word == "[") {
2281 if (bTypeOnly)
2282 return (CPDF_Object*)PDFOBJ_ARRAY;
2283 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray(
2284 new CPDF_Array);
2285 while (CPDF_Object* pObj =
2286 GetObject(pObjList, objnum, gennum, nullptr, true)) {
2287 pArray->Add(pObj);
2288 }
2289 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr;
2290 }
2291 if (word[0] == '/') {
2292 if (bTypeOnly)
2293 return (CPDF_Object*)PDFOBJ_NAME;
2294 return new CPDF_Name(
2295 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
2296 }
2297 if (word == "<<") {
2298 if (bTypeOnly)
2299 return (CPDF_Object*)PDFOBJ_DICTIONARY;
2300 if (pContext)
2301 pContext->m_DictStart = SavedPos;
2302
2303 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict(
2304 new CPDF_Dictionary);
2305 while (1) {
2306 FX_FILESIZE SavedPos = m_Pos;
2307 CFX_ByteString key = GetNextWord(nullptr);
2308 if (key.IsEmpty())
2309 return nullptr;
2310
2311 if (key == ">>")
2312 break;
2313
2314 if (key == "endobj") {
2315 m_Pos = SavedPos;
2316 break;
2317 }
2318 if (key[0] != '/')
2319 continue;
2320
2321 key = PDF_NameDecode(key);
2322 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj(
2323 GetObject(pObjList, objnum, gennum, nullptr, true));
2324 if (!obj) {
2325 uint8_t ch;
2326 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {
2327 }
2328 return nullptr;
2329 }
2330 if (key.GetLength() > 1) {
2331 pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1),
2332 obj.release());
2333 }
2334 }
2335 if (pContext) {
2336 pContext->m_DictEnd = m_Pos;
2337 if (pContext->m_Flags & PDFPARSE_NOSTREAM) {
2338 return pDict.release();
2339 }
2340 }
2341 FX_FILESIZE SavedPos = m_Pos;
2342 CFX_ByteString nextword = GetNextWord(nullptr);
2343 if (nextword != "stream") {
2344 m_Pos = SavedPos;
2345 return pDict.release();
2346 }
2347
2348 return ReadStream(pDict.release(), pContext, objnum, gennum);
2349 }
2350 if (word == ">>") {
2351 m_Pos = SavedPos;
2352 return nullptr;
2353 }
2354 if (bTypeOnly)
2355 return (CPDF_Object*)PDFOBJ_INVALID;
2356 return nullptr;
2357 }
2358
ReadEOLMarkers(FX_FILESIZE pos)2359 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
2360 unsigned char byte1 = 0;
2361 unsigned char byte2 = 0;
2362 GetCharAt(pos, byte1);
2363 GetCharAt(pos + 1, byte2);
2364 unsigned int markers = 0;
2365 if (byte1 == '\r' && byte2 == '\n') {
2366 markers = 2;
2367 } else if (byte1 == '\r' || byte1 == '\n') {
2368 markers = 1;
2369 }
2370 return markers;
2371 }
ReadStream(CPDF_Dictionary * pDict,PARSE_CONTEXT * pContext,FX_DWORD objnum,FX_DWORD gennum)2372 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict,
2373 PARSE_CONTEXT* pContext,
2374 FX_DWORD objnum,
2375 FX_DWORD gennum) {
2376 CPDF_Object* pLenObj = pDict->GetElement("Length");
2377 FX_FILESIZE len = -1;
2378 CPDF_Reference* pLenObjRef = ToReference(pLenObj);
2379
2380 bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() &&
2381 pLenObjRef->GetRefObjNum() != objnum);
2382 if (pLenObj && differingObjNum)
2383 len = pLenObj->GetInteger();
2384
2385 // Locate the start of stream.
2386 ToNextLine();
2387 FX_FILESIZE streamStartPos = m_Pos;
2388 if (pContext) {
2389 pContext->m_DataStart = streamStartPos;
2390 }
2391
2392 const CFX_ByteStringC kEndStreamStr("endstream");
2393 const CFX_ByteStringC kEndObjStr("endobj");
2394 CPDF_CryptoHandler* pCryptoHandler =
2395 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();
2396 if (!pCryptoHandler) {
2397 FX_BOOL bSearchForKeyword = TRUE;
2398 if (len >= 0) {
2399 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
2400 pos += len;
2401 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) {
2402 m_Pos = pos.ValueOrDie();
2403 }
2404 m_Pos += ReadEOLMarkers(m_Pos);
2405 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
2406 GetNextWordInternal(nullptr);
2407 // Earlier version of PDF specification doesn't require EOL marker before
2408 // 'endstream' keyword. If keyword 'endstream' follows the bytes in
2409 // specified length, it signals the end of stream.
2410 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(),
2411 kEndStreamStr.GetLength()) == 0) {
2412 bSearchForKeyword = FALSE;
2413 }
2414 }
2415 if (bSearchForKeyword) {
2416 // If len is not available, len needs to be calculated
2417 // by searching the keywords "endstream" or "endobj".
2418 m_Pos = streamStartPos;
2419 FX_FILESIZE endStreamOffset = 0;
2420 while (endStreamOffset >= 0) {
2421 endStreamOffset = FindTag(kEndStreamStr, 0);
2422 if (endStreamOffset < 0) {
2423 // Can't find any "endstream".
2424 break;
2425 }
2426 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,
2427 kEndStreamStr, TRUE)) {
2428 // Stop searching when the keyword "endstream" is found.
2429 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();
2430 break;
2431 }
2432 }
2433 m_Pos = streamStartPos;
2434 FX_FILESIZE endObjOffset = 0;
2435 while (endObjOffset >= 0) {
2436 endObjOffset = FindTag(kEndObjStr, 0);
2437 if (endObjOffset < 0) {
2438 // Can't find any "endobj".
2439 break;
2440 }
2441 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,
2442 TRUE)) {
2443 // Stop searching when the keyword "endobj" is found.
2444 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();
2445 break;
2446 }
2447 }
2448 if (endStreamOffset < 0 && endObjOffset < 0) {
2449 // Can't find "endstream" or "endobj".
2450 pDict->Release();
2451 return nullptr;
2452 }
2453 if (endStreamOffset < 0 && endObjOffset >= 0) {
2454 // Correct the position of end stream.
2455 endStreamOffset = endObjOffset;
2456 } else if (endStreamOffset >= 0 && endObjOffset < 0) {
2457 // Correct the position of end obj.
2458 endObjOffset = endStreamOffset;
2459 } else if (endStreamOffset > endObjOffset) {
2460 endStreamOffset = endObjOffset;
2461 }
2462 len = endStreamOffset;
2463 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
2464 if (numMarkers == 2) {
2465 len -= 2;
2466 } else {
2467 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
2468 if (numMarkers == 1) {
2469 len -= 1;
2470 }
2471 }
2472 if (len < 0) {
2473 pDict->Release();
2474 return nullptr;
2475 }
2476 pDict->SetAtInteger("Length", len);
2477 }
2478 m_Pos = streamStartPos;
2479 }
2480 if (len < 0) {
2481 pDict->Release();
2482 return nullptr;
2483 }
2484 uint8_t* pData = nullptr;
2485 if (len > 0) {
2486 pData = FX_Alloc(uint8_t, len);
2487 ReadBlock(pData, len);
2488 if (pCryptoHandler) {
2489 CFX_BinaryBuf dest_buf;
2490 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));
2491 void* context = pCryptoHandler->DecryptStart(objnum, gennum);
2492 pCryptoHandler->DecryptStream(context, pData, len, dest_buf);
2493 pCryptoHandler->DecryptFinish(context, dest_buf);
2494 FX_Free(pData);
2495 pData = dest_buf.GetBuffer();
2496 len = dest_buf.GetSize();
2497 dest_buf.DetachBuffer();
2498 }
2499 }
2500 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict);
2501 if (pContext) {
2502 pContext->m_DataEnd = pContext->m_DataStart + len;
2503 }
2504 streamStartPos = m_Pos;
2505 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
2506 GetNextWordInternal(nullptr);
2507 int numMarkers = ReadEOLMarkers(m_Pos);
2508 if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 &&
2509 FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) ==
2510 0) {
2511 m_Pos = streamStartPos;
2512 }
2513 return pStream;
2514 }
InitParser(IFX_FileRead * pFileAccess,FX_DWORD HeaderOffset)2515 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess,
2516 FX_DWORD HeaderOffset) {
2517 FX_Free(m_pFileBuf);
2518 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);
2519 m_HeaderOffset = HeaderOffset;
2520 m_FileLen = pFileAccess->GetSize();
2521 m_Pos = 0;
2522 m_pFileAccess = pFileAccess;
2523 m_BufOffset = 0;
2524 pFileAccess->ReadBlock(
2525 m_pFileBuf, 0,
2526 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));
2527 }
GetDirectNum()2528 int32_t CPDF_SyntaxParser::GetDirectNum() {
2529 bool bIsNumber;
2530 GetNextWordInternal(&bIsNumber);
2531 if (!bIsNumber)
2532 return 0;
2533
2534 m_WordBuffer[m_WordSize] = 0;
2535 return FXSYS_atoi(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));
2536 }
2537
IsWholeWord(FX_FILESIZE startpos,FX_FILESIZE limit,const CFX_ByteStringC & tag,FX_BOOL checkKeyword)2538 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
2539 FX_FILESIZE limit,
2540 const CFX_ByteStringC& tag,
2541 FX_BOOL checkKeyword) {
2542 const FX_DWORD taglen = tag.GetLength();
2543 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
2544 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
2545 !PDFCharIsWhitespace(tag[taglen - 1]);
2546 uint8_t ch;
2547 if (bCheckRight && startpos + (int32_t)taglen <= limit &&
2548 GetCharAt(startpos + (int32_t)taglen, ch)) {
2549 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
2550 (checkKeyword && PDFCharIsDelimiter(ch))) {
2551 return false;
2552 }
2553 }
2554
2555 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
2556 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
2557 (checkKeyword && PDFCharIsDelimiter(ch))) {
2558 return false;
2559 }
2560 }
2561 return true;
2562 }
2563
SearchWord(const CFX_ByteStringC & tag,FX_BOOL bWholeWord,FX_BOOL bForward,FX_FILESIZE limit)2564 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
2565 FX_BOOL bWholeWord,
2566 FX_BOOL bForward,
2567 FX_FILESIZE limit) {
2568 int32_t taglen = tag.GetLength();
2569 if (taglen == 0) {
2570 return FALSE;
2571 }
2572 FX_FILESIZE pos = m_Pos;
2573 int32_t offset = 0;
2574 if (!bForward) {
2575 offset = taglen - 1;
2576 }
2577 const uint8_t* tag_data = tag.GetPtr();
2578 uint8_t byte;
2579 while (1) {
2580 if (bForward) {
2581 if (limit) {
2582 if (pos >= m_Pos + limit) {
2583 return FALSE;
2584 }
2585 }
2586 if (!GetCharAt(pos, byte)) {
2587 return FALSE;
2588 }
2589 } else {
2590 if (limit) {
2591 if (pos <= m_Pos - limit) {
2592 return FALSE;
2593 }
2594 }
2595 if (!GetCharAtBackward(pos, byte)) {
2596 return FALSE;
2597 }
2598 }
2599 if (byte == tag_data[offset]) {
2600 if (bForward) {
2601 offset++;
2602 if (offset < taglen) {
2603 pos++;
2604 continue;
2605 }
2606 } else {
2607 offset--;
2608 if (offset >= 0) {
2609 pos--;
2610 continue;
2611 }
2612 }
2613 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
2614 if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) {
2615 m_Pos = startpos;
2616 return TRUE;
2617 }
2618 }
2619 if (bForward) {
2620 offset = byte == tag_data[0] ? 1 : 0;
2621 pos++;
2622 } else {
2623 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
2624 pos--;
2625 }
2626 if (pos < 0) {
2627 return FALSE;
2628 }
2629 }
2630 return FALSE;
2631 }
2632
SearchMultiWord(const CFX_ByteStringC & tags,FX_BOOL bWholeWord,FX_FILESIZE limit)2633 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags,
2634 FX_BOOL bWholeWord,
2635 FX_FILESIZE limit) {
2636 int32_t ntags = 1;
2637 for (int i = 0; i < tags.GetLength(); ++i) {
2638 if (tags[i] == 0) {
2639 ++ntags;
2640 }
2641 }
2642
2643 std::vector<SearchTagRecord> patterns(ntags);
2644 FX_DWORD start = 0;
2645 FX_DWORD itag = 0;
2646 FX_DWORD max_len = 0;
2647 for (int i = 0; i <= tags.GetLength(); ++i) {
2648 if (tags[i] == 0) {
2649 FX_DWORD len = i - start;
2650 max_len = std::max(len, max_len);
2651 patterns[itag].m_pTag = tags.GetCStr() + start;
2652 patterns[itag].m_Len = len;
2653 patterns[itag].m_Offset = 0;
2654 start = i + 1;
2655 ++itag;
2656 }
2657 }
2658
2659 const FX_FILESIZE pos_limit = m_Pos + limit;
2660 for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) {
2661 uint8_t byte;
2662 if (!GetCharAt(pos, byte))
2663 break;
2664
2665 for (int i = 0; i < ntags; ++i) {
2666 SearchTagRecord& pat = patterns[i];
2667 if (pat.m_pTag[pat.m_Offset] != byte) {
2668 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
2669 continue;
2670 }
2671
2672 ++pat.m_Offset;
2673 if (pat.m_Offset != pat.m_Len)
2674 continue;
2675
2676 if (!bWholeWord ||
2677 IsWholeWord(pos - pat.m_Len, limit,
2678 CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) {
2679 return i;
2680 }
2681
2682 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0;
2683 }
2684 }
2685 return -1;
2686 }
2687
FindTag(const CFX_ByteStringC & tag,FX_FILESIZE limit)2688 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,
2689 FX_FILESIZE limit) {
2690 int32_t taglen = tag.GetLength();
2691 int32_t match = 0;
2692 limit += m_Pos;
2693 FX_FILESIZE startpos = m_Pos;
2694 while (1) {
2695 uint8_t ch;
2696 if (!GetNextChar(ch)) {
2697 return -1;
2698 }
2699 if (ch == tag[match]) {
2700 match++;
2701 if (match == taglen) {
2702 return m_Pos - startpos - taglen;
2703 }
2704 } else {
2705 match = ch == tag[0] ? 1 : 0;
2706 }
2707 if (limit && m_Pos == limit) {
2708 return -1;
2709 }
2710 }
2711 return -1;
2712 }
GetBinary(uint8_t * buffer,FX_DWORD size)2713 void CPDF_SyntaxParser::GetBinary(uint8_t* buffer, FX_DWORD size) {
2714 FX_DWORD offset = 0;
2715 uint8_t ch;
2716 while (1) {
2717 if (!GetNextChar(ch)) {
2718 return;
2719 }
2720 buffer[offset++] = ch;
2721 if (offset == size) {
2722 break;
2723 }
2724 }
2725 }
2726
2727 class CPDF_DataAvail final : public IPDF_DataAvail {
2728 public:
2729 CPDF_DataAvail(IFX_FileAvail* pFileAvail,
2730 IFX_FileRead* pFileRead,
2731 FX_BOOL bSupportHintTable);
2732 ~CPDF_DataAvail() override;
2733
2734 // IPDF_DataAvail:
2735 DocAvailStatus IsDocAvail(IFX_DownloadHints* pHints) override;
2736 void SetDocument(CPDF_Document* pDoc) override;
2737 DocAvailStatus IsPageAvail(int iPage, IFX_DownloadHints* pHints) override;
2738 DocFormStatus IsFormAvail(IFX_DownloadHints* pHints) override;
2739 DocLinearizationStatus IsLinearizedPDF() override;
IsLinearized()2740 FX_BOOL IsLinearized() override { return m_bLinearized; }
2741 void GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, FX_DWORD* pSize) override;
2742
2743 int GetPageCount() const;
2744 CPDF_Dictionary* GetPage(int index);
2745
2746 friend class CPDF_HintTables;
2747
2748 protected:
2749 static const int kMaxDataAvailRecursionDepth = 64;
2750 static int s_CurrentDataAvailRecursionDepth;
2751 static const int kMaxPageRecursionDepth = 1024;
2752
2753 FX_DWORD GetObjectSize(FX_DWORD objnum, FX_FILESIZE& offset);
2754 FX_BOOL IsObjectsAvail(CFX_ArrayTemplate<CPDF_Object*>& obj_array,
2755 FX_BOOL bParsePage,
2756 IFX_DownloadHints* pHints,
2757 CFX_ArrayTemplate<CPDF_Object*>& ret_array);
2758 FX_BOOL CheckDocStatus(IFX_DownloadHints* pHints);
2759 FX_BOOL CheckHeader(IFX_DownloadHints* pHints);
2760 FX_BOOL CheckFirstPage(IFX_DownloadHints* pHints);
2761 FX_BOOL CheckHintTables(IFX_DownloadHints* pHints);
2762 FX_BOOL CheckEnd(IFX_DownloadHints* pHints);
2763 FX_BOOL CheckCrossRef(IFX_DownloadHints* pHints);
2764 FX_BOOL CheckCrossRefItem(IFX_DownloadHints* pHints);
2765 FX_BOOL CheckTrailer(IFX_DownloadHints* pHints);
2766 FX_BOOL CheckRoot(IFX_DownloadHints* pHints);
2767 FX_BOOL CheckInfo(IFX_DownloadHints* pHints);
2768 FX_BOOL CheckPages(IFX_DownloadHints* pHints);
2769 FX_BOOL CheckPage(IFX_DownloadHints* pHints);
2770 FX_BOOL CheckResources(IFX_DownloadHints* pHints);
2771 FX_BOOL CheckAnnots(IFX_DownloadHints* pHints);
2772 FX_BOOL CheckAcroForm(IFX_DownloadHints* pHints);
2773 FX_BOOL CheckAcroFormSubObject(IFX_DownloadHints* pHints);
2774 FX_BOOL CheckTrailerAppend(IFX_DownloadHints* pHints);
2775 FX_BOOL CheckPageStatus(IFX_DownloadHints* pHints);
2776 FX_BOOL CheckAllCrossRefStream(IFX_DownloadHints* pHints);
2777
2778 int32_t CheckCrossRefStream(IFX_DownloadHints* pHints,
2779 FX_FILESIZE& xref_offset);
2780 FX_BOOL IsLinearizedFile(uint8_t* pData, FX_DWORD dwLen);
2781 void SetStartOffset(FX_FILESIZE dwOffset);
2782 FX_BOOL GetNextToken(CFX_ByteString& token);
2783 FX_BOOL GetNextChar(uint8_t& ch);
2784 CPDF_Object* ParseIndirectObjectAt(
2785 FX_FILESIZE pos,
2786 FX_DWORD objnum,
2787 CPDF_IndirectObjectHolder* pObjList = NULL);
2788 CPDF_Object* GetObject(FX_DWORD objnum,
2789 IFX_DownloadHints* pHints,
2790 FX_BOOL* pExistInFile);
2791 FX_BOOL GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages);
2792 FX_BOOL PreparePageItem();
2793 FX_BOOL LoadPages(IFX_DownloadHints* pHints);
2794 FX_BOOL LoadAllXref(IFX_DownloadHints* pHints);
2795 FX_BOOL LoadAllFile(IFX_DownloadHints* pHints);
2796 DocAvailStatus CheckLinearizedData(IFX_DownloadHints* pHints);
2797 FX_BOOL CheckPageAnnots(int iPage, IFX_DownloadHints* pHints);
2798
2799 DocAvailStatus CheckLinearizedFirstPage(int iPage, IFX_DownloadHints* pHints);
2800 FX_BOOL HaveResourceAncestor(CPDF_Dictionary* pDict);
2801 FX_BOOL CheckPage(int32_t iPage, IFX_DownloadHints* pHints);
2802 FX_BOOL LoadDocPages(IFX_DownloadHints* pHints);
2803 FX_BOOL LoadDocPage(int32_t iPage, IFX_DownloadHints* pHints);
2804 FX_BOOL CheckPageNode(CPDF_PageNode& pageNodes,
2805 int32_t iPage,
2806 int32_t& iCount,
2807 IFX_DownloadHints* pHints,
2808 int level);
2809 FX_BOOL CheckUnkownPageNode(FX_DWORD dwPageNo,
2810 CPDF_PageNode* pPageNode,
2811 IFX_DownloadHints* pHints);
2812 FX_BOOL CheckArrayPageNode(FX_DWORD dwPageNo,
2813 CPDF_PageNode* pPageNode,
2814 IFX_DownloadHints* pHints);
2815 FX_BOOL CheckPageCount(IFX_DownloadHints* pHints);
2816 bool IsFirstCheck(int iPage);
2817 void ResetFirstCheck(int iPage);
2818 FX_BOOL IsDataAvail(FX_FILESIZE offset,
2819 FX_DWORD size,
2820 IFX_DownloadHints* pHints);
2821
2822 CPDF_Parser m_parser;
2823
2824 CPDF_SyntaxParser m_syntaxParser;
2825
2826 CPDF_Object* m_pRoot;
2827
2828 FX_DWORD m_dwRootObjNum;
2829
2830 FX_DWORD m_dwInfoObjNum;
2831
2832 CPDF_Object* m_pLinearized;
2833
2834 CPDF_Object* m_pTrailer;
2835
2836 FX_BOOL m_bDocAvail;
2837
2838 FX_FILESIZE m_dwHeaderOffset;
2839
2840 FX_FILESIZE m_dwLastXRefOffset;
2841
2842 FX_FILESIZE m_dwXRefOffset;
2843
2844 FX_FILESIZE m_dwTrailerOffset;
2845
2846 FX_FILESIZE m_dwCurrentOffset;
2847
2848 PDF_DATAAVAIL_STATUS m_docStatus;
2849
2850 FX_FILESIZE m_dwFileLen;
2851
2852 CPDF_Document* m_pDocument;
2853
2854 std::set<FX_DWORD> m_ObjectSet;
2855
2856 CFX_ArrayTemplate<CPDF_Object*> m_objs_array;
2857
2858 FX_FILESIZE m_Pos;
2859
2860 FX_FILESIZE m_bufferOffset;
2861
2862 FX_DWORD m_bufferSize;
2863
2864 CFX_ByteString m_WordBuf;
2865
2866 uint8_t m_bufferData[512];
2867
2868 CFX_FileSizeArray m_CrossOffset;
2869
2870 CFX_DWordArray m_XRefStreamList;
2871
2872 CFX_DWordArray m_PageObjList;
2873
2874 FX_DWORD m_PagesObjNum;
2875
2876 FX_BOOL m_bLinearized;
2877
2878 FX_DWORD m_dwFirstPageNo;
2879
2880 FX_BOOL m_bLinearedDataOK;
2881
2882 FX_BOOL m_bMainXRefLoadTried;
2883
2884 FX_BOOL m_bMainXRefLoadedOK;
2885
2886 FX_BOOL m_bPagesTreeLoad;
2887
2888 FX_BOOL m_bPagesLoad;
2889
2890 CPDF_Parser* m_pCurrentParser;
2891
2892 FX_FILESIZE m_dwCurrentXRefSteam;
2893
2894 FX_BOOL m_bAnnotsLoad;
2895
2896 FX_BOOL m_bHaveAcroForm;
2897
2898 FX_DWORD m_dwAcroFormObjNum;
2899
2900 FX_BOOL m_bAcroFormLoad;
2901
2902 CPDF_Object* m_pAcroForm;
2903
2904 CFX_ArrayTemplate<CPDF_Object*> m_arrayAcroforms;
2905
2906 CPDF_Dictionary* m_pPageDict;
2907
2908 CPDF_Object* m_pPageResource;
2909
2910 FX_BOOL m_bNeedDownLoadResource;
2911
2912 FX_BOOL m_bPageLoadedOK;
2913
2914 FX_BOOL m_bLinearizedFormParamLoad;
2915
2916 CFX_ArrayTemplate<CPDF_Object*> m_PagesArray;
2917
2918 FX_DWORD m_dwEncryptObjNum;
2919
2920 FX_FILESIZE m_dwPrevXRefOffset;
2921
2922 FX_BOOL m_bTotalLoadPageTree;
2923
2924 FX_BOOL m_bCurPageDictLoadOK;
2925
2926 CPDF_PageNode m_pageNodes;
2927
2928 std::set<FX_DWORD> m_pageMapCheckState;
2929 std::set<FX_DWORD> m_pagesLoadState;
2930
2931 std::unique_ptr<CPDF_HintTables> m_pHintTables;
2932 FX_BOOL m_bSupportHintTable;
2933 };
2934
IPDF_DataAvail(IFX_FileAvail * pFileAvail,IFX_FileRead * pFileRead)2935 IPDF_DataAvail::IPDF_DataAvail(IFX_FileAvail* pFileAvail,
2936 IFX_FileRead* pFileRead)
2937 : m_pFileAvail(pFileAvail), m_pFileRead(pFileRead) {}
2938
2939 // static
Create(IFX_FileAvail * pFileAvail,IFX_FileRead * pFileRead)2940 IPDF_DataAvail* IPDF_DataAvail::Create(IFX_FileAvail* pFileAvail,
2941 IFX_FileRead* pFileRead) {
2942 return new CPDF_DataAvail(pFileAvail, pFileRead, TRUE);
2943 }
2944
2945 // static
2946 int CPDF_DataAvail::s_CurrentDataAvailRecursionDepth = 0;
2947
CPDF_DataAvail(IFX_FileAvail * pFileAvail,IFX_FileRead * pFileRead,FX_BOOL bSupportHintTable)2948 CPDF_DataAvail::CPDF_DataAvail(IFX_FileAvail* pFileAvail,
2949 IFX_FileRead* pFileRead,
2950 FX_BOOL bSupportHintTable)
2951 : IPDF_DataAvail(pFileAvail, pFileRead) {
2952 m_Pos = 0;
2953 m_dwFileLen = 0;
2954 if (m_pFileRead) {
2955 m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize();
2956 }
2957 m_dwCurrentOffset = 0;
2958 m_dwXRefOffset = 0;
2959 m_bufferOffset = 0;
2960 m_dwFirstPageNo = 0;
2961 m_bufferSize = 0;
2962 m_PagesObjNum = 0;
2963 m_dwCurrentXRefSteam = 0;
2964 m_dwAcroFormObjNum = 0;
2965 m_dwInfoObjNum = 0;
2966 m_pDocument = 0;
2967 m_dwEncryptObjNum = 0;
2968 m_dwPrevXRefOffset = 0;
2969 m_dwLastXRefOffset = 0;
2970 m_bDocAvail = FALSE;
2971 m_bMainXRefLoadTried = FALSE;
2972 m_bDocAvail = FALSE;
2973 m_bLinearized = FALSE;
2974 m_bPagesLoad = FALSE;
2975 m_bPagesTreeLoad = FALSE;
2976 m_bMainXRefLoadedOK = FALSE;
2977 m_bAnnotsLoad = FALSE;
2978 m_bHaveAcroForm = FALSE;
2979 m_bAcroFormLoad = FALSE;
2980 m_bPageLoadedOK = FALSE;
2981 m_bNeedDownLoadResource = FALSE;
2982 m_bLinearizedFormParamLoad = FALSE;
2983 m_pLinearized = NULL;
2984 m_pRoot = NULL;
2985 m_pTrailer = NULL;
2986 m_pCurrentParser = NULL;
2987 m_pAcroForm = NULL;
2988 m_pPageDict = NULL;
2989 m_pPageResource = NULL;
2990 m_docStatus = PDF_DATAAVAIL_HEADER;
2991 m_parser.m_bOwnFileRead = FALSE;
2992 m_bTotalLoadPageTree = FALSE;
2993 m_bCurPageDictLoadOK = FALSE;
2994 m_bLinearedDataOK = FALSE;
2995 m_bSupportHintTable = bSupportHintTable;
2996 }
~CPDF_DataAvail()2997 CPDF_DataAvail::~CPDF_DataAvail() {
2998 if (m_pLinearized) {
2999 m_pLinearized->Release();
3000 }
3001 if (m_pRoot) {
3002 m_pRoot->Release();
3003 }
3004 if (m_pTrailer) {
3005 m_pTrailer->Release();
3006 }
3007
3008 int iSize = m_arrayAcroforms.GetSize();
3009 for (int i = 0; i < iSize; ++i) {
3010 m_arrayAcroforms.GetAt(i)->Release();
3011 }
3012 }
SetDocument(CPDF_Document * pDoc)3013 void CPDF_DataAvail::SetDocument(CPDF_Document* pDoc) {
3014 m_pDocument = pDoc;
3015 }
GetObjectSize(FX_DWORD objnum,FX_FILESIZE & offset)3016 FX_DWORD CPDF_DataAvail::GetObjectSize(FX_DWORD objnum, FX_FILESIZE& offset) {
3017 CPDF_Parser* pParser = (CPDF_Parser*)(m_pDocument->GetParser());
3018 if (!pParser || !pParser->IsValidObjectNumber(objnum))
3019 return 0;
3020
3021 if (pParser->m_V5Type[objnum] == 2)
3022 objnum = pParser->m_ObjectInfo[objnum].pos;
3023
3024 if (pParser->m_V5Type[objnum] == 1 || pParser->m_V5Type[objnum] == 255) {
3025 offset = pParser->m_ObjectInfo[objnum].pos;
3026 if (offset == 0) {
3027 return 0;
3028 }
3029 void* pResult = FXSYS_bsearch(&offset, pParser->m_SortedOffset.GetData(),
3030 pParser->m_SortedOffset.GetSize(),
3031 sizeof(FX_FILESIZE), CompareFileSize);
3032 if (!pResult) {
3033 return 0;
3034 }
3035 if ((FX_FILESIZE*)pResult -
3036 (FX_FILESIZE*)pParser->m_SortedOffset.GetData() ==
3037 pParser->m_SortedOffset.GetSize() - 1) {
3038 return 0;
3039 }
3040 return (FX_DWORD)(((FX_FILESIZE*)pResult)[1] - offset);
3041 }
3042 return 0;
3043 }
IsObjectsAvail(CFX_ArrayTemplate<CPDF_Object * > & obj_array,FX_BOOL bParsePage,IFX_DownloadHints * pHints,CFX_ArrayTemplate<CPDF_Object * > & ret_array)3044 FX_BOOL CPDF_DataAvail::IsObjectsAvail(
3045 CFX_ArrayTemplate<CPDF_Object*>& obj_array,
3046 FX_BOOL bParsePage,
3047 IFX_DownloadHints* pHints,
3048 CFX_ArrayTemplate<CPDF_Object*>& ret_array) {
3049 if (!obj_array.GetSize()) {
3050 return TRUE;
3051 }
3052 FX_DWORD count = 0;
3053 CFX_ArrayTemplate<CPDF_Object*> new_obj_array;
3054 int32_t i = 0;
3055 for (i = 0; i < obj_array.GetSize(); i++) {
3056 CPDF_Object* pObj = obj_array[i];
3057 if (!pObj)
3058 continue;
3059
3060 int32_t type = pObj->GetType();
3061 switch (type) {
3062 case PDFOBJ_ARRAY: {
3063 CPDF_Array* pArray = pObj->GetArray();
3064 for (FX_DWORD k = 0; k < pArray->GetCount(); k++) {
3065 new_obj_array.Add(pArray->GetElement(k));
3066 }
3067 } break;
3068 case PDFOBJ_STREAM:
3069 pObj = pObj->GetDict();
3070 case PDFOBJ_DICTIONARY: {
3071 CPDF_Dictionary* pDict = pObj->GetDict();
3072 if (pDict && pDict->GetString("Type") == "Page" && !bParsePage) {
3073 continue;
3074 }
3075 for (const auto& it : *pDict) {
3076 const CFX_ByteString& key = it.first;
3077 CPDF_Object* value = it.second;
3078 if (key != "Parent") {
3079 new_obj_array.Add(value);
3080 }
3081 }
3082 } break;
3083 case PDFOBJ_REFERENCE: {
3084 CPDF_Reference* pRef = pObj->AsReference();
3085 FX_DWORD dwNum = pRef->GetRefObjNum();
3086 FX_FILESIZE offset;
3087 FX_DWORD size = GetObjectSize(dwNum, offset);
3088 if (size == 0 || offset < 0 || offset >= m_dwFileLen) {
3089 break;
3090 }
3091 if (!IsDataAvail(offset, size, pHints)) {
3092 ret_array.Add(pObj);
3093 count++;
3094 } else if (!pdfium::ContainsKey(m_ObjectSet, dwNum)) {
3095 m_ObjectSet.insert(dwNum);
3096 CPDF_Object* pReferred =
3097 m_pDocument->GetIndirectObject(pRef->GetRefObjNum(), nullptr);
3098 if (pReferred) {
3099 new_obj_array.Add(pReferred);
3100 }
3101 }
3102 } break;
3103 }
3104 }
3105 if (count > 0) {
3106 int32_t iSize = new_obj_array.GetSize();
3107 for (i = 0; i < iSize; ++i) {
3108 CPDF_Object* pObj = new_obj_array[i];
3109 if (CPDF_Reference* pRef = pObj->AsReference()) {
3110 FX_DWORD dwNum = pRef->GetRefObjNum();
3111 if (!pdfium::ContainsKey(m_ObjectSet, dwNum))
3112 ret_array.Add(pObj);
3113 } else {
3114 ret_array.Add(pObj);
3115 }
3116 }
3117 return FALSE;
3118 }
3119 obj_array.RemoveAll();
3120 obj_array.Append(new_obj_array);
3121 return IsObjectsAvail(obj_array, FALSE, pHints, ret_array);
3122 }
3123
IsDocAvail(IFX_DownloadHints * pHints)3124 IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
3125 IFX_DownloadHints* pHints) {
3126 if (!m_dwFileLen && m_pFileRead) {
3127 m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize();
3128 if (!m_dwFileLen) {
3129 return DataError;
3130 }
3131 }
3132 while (!m_bDocAvail) {
3133 if (!CheckDocStatus(pHints)) {
3134 return DataNotAvailable;
3135 }
3136 }
3137 return DataAvailable;
3138 }
3139
CheckAcroFormSubObject(IFX_DownloadHints * pHints)3140 FX_BOOL CPDF_DataAvail::CheckAcroFormSubObject(IFX_DownloadHints* pHints) {
3141 if (!m_objs_array.GetSize()) {
3142 m_objs_array.RemoveAll();
3143 m_ObjectSet.clear();
3144 CFX_ArrayTemplate<CPDF_Object*> obj_array;
3145 obj_array.Append(m_arrayAcroforms);
3146 FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array);
3147 if (bRet) {
3148 m_objs_array.RemoveAll();
3149 }
3150 return bRet;
3151 }
3152 CFX_ArrayTemplate<CPDF_Object*> new_objs_array;
3153 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
3154 if (bRet) {
3155 int32_t iSize = m_arrayAcroforms.GetSize();
3156 for (int32_t i = 0; i < iSize; ++i) {
3157 m_arrayAcroforms.GetAt(i)->Release();
3158 }
3159 m_arrayAcroforms.RemoveAll();
3160 } else {
3161 m_objs_array.RemoveAll();
3162 m_objs_array.Append(new_objs_array);
3163 }
3164 return bRet;
3165 }
CheckAcroForm(IFX_DownloadHints * pHints)3166 FX_BOOL CPDF_DataAvail::CheckAcroForm(IFX_DownloadHints* pHints) {
3167 FX_BOOL bExist = FALSE;
3168 m_pAcroForm = GetObject(m_dwAcroFormObjNum, pHints, &bExist);
3169 if (!bExist) {
3170 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3171 return TRUE;
3172 }
3173 if (!m_pAcroForm) {
3174 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3175 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3176 return TRUE;
3177 }
3178 return FALSE;
3179 }
3180 m_arrayAcroforms.Add(m_pAcroForm);
3181 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3182 return TRUE;
3183 }
CheckDocStatus(IFX_DownloadHints * pHints)3184 FX_BOOL CPDF_DataAvail::CheckDocStatus(IFX_DownloadHints* pHints) {
3185 switch (m_docStatus) {
3186 case PDF_DATAAVAIL_HEADER:
3187 return CheckHeader(pHints);
3188 case PDF_DATAAVAIL_FIRSTPAGE:
3189 case PDF_DATAAVAIL_FIRSTPAGE_PREPARE:
3190 return CheckFirstPage(pHints);
3191 case PDF_DATAAVAIL_HINTTABLE:
3192 return CheckHintTables(pHints);
3193 case PDF_DATAAVAIL_END:
3194 return CheckEnd(pHints);
3195 case PDF_DATAAVAIL_CROSSREF:
3196 return CheckCrossRef(pHints);
3197 case PDF_DATAAVAIL_CROSSREF_ITEM:
3198 return CheckCrossRefItem(pHints);
3199 case PDF_DATAAVAIL_CROSSREF_STREAM:
3200 return CheckAllCrossRefStream(pHints);
3201 case PDF_DATAAVAIL_TRAILER:
3202 return CheckTrailer(pHints);
3203 case PDF_DATAAVAIL_TRAILER_APPEND:
3204 return CheckTrailerAppend(pHints);
3205 case PDF_DATAAVAIL_LOADALLCROSSREF:
3206 return LoadAllXref(pHints);
3207 case PDF_DATAAVAIL_LOADALLFILE:
3208 return LoadAllFile(pHints);
3209 case PDF_DATAAVAIL_ROOT:
3210 return CheckRoot(pHints);
3211 case PDF_DATAAVAIL_INFO:
3212 return CheckInfo(pHints);
3213 case PDF_DATAAVAIL_ACROFORM:
3214 return CheckAcroForm(pHints);
3215 case PDF_DATAAVAIL_PAGETREE:
3216 if (m_bTotalLoadPageTree) {
3217 return CheckPages(pHints);
3218 }
3219 return LoadDocPages(pHints);
3220 case PDF_DATAAVAIL_PAGE:
3221 if (m_bTotalLoadPageTree) {
3222 return CheckPage(pHints);
3223 }
3224 m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD;
3225 return TRUE;
3226 case PDF_DATAAVAIL_ERROR:
3227 return LoadAllFile(pHints);
3228 case PDF_DATAAVAIL_PAGE_LATERLOAD:
3229 m_docStatus = PDF_DATAAVAIL_PAGE;
3230 default:
3231 m_bDocAvail = TRUE;
3232 return TRUE;
3233 }
3234 }
CheckPageStatus(IFX_DownloadHints * pHints)3235 FX_BOOL CPDF_DataAvail::CheckPageStatus(IFX_DownloadHints* pHints) {
3236 switch (m_docStatus) {
3237 case PDF_DATAAVAIL_PAGETREE:
3238 return CheckPages(pHints);
3239 case PDF_DATAAVAIL_PAGE:
3240 return CheckPage(pHints);
3241 case PDF_DATAAVAIL_ERROR:
3242 return LoadAllFile(pHints);
3243 default:
3244 m_bPagesTreeLoad = TRUE;
3245 m_bPagesLoad = TRUE;
3246 return TRUE;
3247 }
3248 }
LoadAllFile(IFX_DownloadHints * pHints)3249 FX_BOOL CPDF_DataAvail::LoadAllFile(IFX_DownloadHints* pHints) {
3250 if (m_pFileAvail->IsDataAvail(0, (FX_DWORD)m_dwFileLen)) {
3251 m_docStatus = PDF_DATAAVAIL_DONE;
3252 return TRUE;
3253 }
3254 pHints->AddSegment(0, (FX_DWORD)m_dwFileLen);
3255 return FALSE;
3256 }
LoadAllXref(IFX_DownloadHints * pHints)3257 FX_BOOL CPDF_DataAvail::LoadAllXref(IFX_DownloadHints* pHints) {
3258 m_parser.m_Syntax.InitParser(m_pFileRead, (FX_DWORD)m_dwHeaderOffset);
3259 m_parser.m_bOwnFileRead = FALSE;
3260 if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) &&
3261 !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) {
3262 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3263 return FALSE;
3264 }
3265 FXSYS_qsort(m_parser.m_SortedOffset.GetData(),
3266 m_parser.m_SortedOffset.GetSize(), sizeof(FX_FILESIZE),
3267 CompareFileSize);
3268 m_dwRootObjNum = m_parser.GetRootObjNum();
3269 m_dwInfoObjNum = m_parser.GetInfoObjNum();
3270 m_pCurrentParser = &m_parser;
3271 m_docStatus = PDF_DATAAVAIL_ROOT;
3272 return TRUE;
3273 }
GetObject(FX_DWORD objnum,IFX_DownloadHints * pHints,FX_BOOL * pExistInFile)3274 CPDF_Object* CPDF_DataAvail::GetObject(FX_DWORD objnum,
3275 IFX_DownloadHints* pHints,
3276 FX_BOOL* pExistInFile) {
3277 CPDF_Object* pRet = nullptr;
3278 FX_DWORD size = 0;
3279 FX_FILESIZE offset = 0;
3280 CPDF_Parser* pParser = nullptr;
3281 if (pExistInFile)
3282 *pExistInFile = TRUE;
3283
3284 if (m_pDocument) {
3285 size = GetObjectSize(objnum, offset);
3286 pParser = (CPDF_Parser*)(m_pDocument->GetParser());
3287 } else {
3288 size = (FX_DWORD)m_parser.GetObjectSize(objnum);
3289 offset = m_parser.GetObjectOffset(objnum);
3290 pParser = &m_parser;
3291 }
3292 if (!IsDataAvail(offset, size, pHints)) {
3293 return nullptr;
3294 }
3295 if (pParser) {
3296 pRet = pParser->ParseIndirectObject(NULL, objnum, NULL);
3297 }
3298
3299 if (!pRet && pExistInFile) {
3300 *pExistInFile = FALSE;
3301 }
3302
3303 return pRet;
3304 }
3305
CheckInfo(IFX_DownloadHints * pHints)3306 FX_BOOL CPDF_DataAvail::CheckInfo(IFX_DownloadHints* pHints) {
3307 FX_BOOL bExist = FALSE;
3308 CPDF_Object* pInfo = GetObject(m_dwInfoObjNum, pHints, &bExist);
3309 if (!bExist) {
3310 if (m_bHaveAcroForm) {
3311 m_docStatus = PDF_DATAAVAIL_ACROFORM;
3312 } else {
3313 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3314 }
3315 return TRUE;
3316 }
3317 if (!pInfo) {
3318 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3319 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3320 return TRUE;
3321 }
3322 if (m_Pos == m_dwFileLen) {
3323 m_docStatus = PDF_DATAAVAIL_ERROR;
3324 }
3325 return FALSE;
3326 }
3327 if (pInfo) {
3328 pInfo->Release();
3329 }
3330 if (m_bHaveAcroForm) {
3331 m_docStatus = PDF_DATAAVAIL_ACROFORM;
3332 } else {
3333 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3334 }
3335 return TRUE;
3336 }
CheckRoot(IFX_DownloadHints * pHints)3337 FX_BOOL CPDF_DataAvail::CheckRoot(IFX_DownloadHints* pHints) {
3338 FX_BOOL bExist = FALSE;
3339 m_pRoot = GetObject(m_dwRootObjNum, pHints, &bExist);
3340 if (!bExist) {
3341 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3342 return TRUE;
3343 }
3344 if (!m_pRoot) {
3345 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3346 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3347 return TRUE;
3348 }
3349 return FALSE;
3350 }
3351 CPDF_Dictionary* pDict = m_pRoot->GetDict();
3352 if (!pDict) {
3353 m_docStatus = PDF_DATAAVAIL_ERROR;
3354 return FALSE;
3355 }
3356 CPDF_Reference* pRef = ToReference(pDict->GetElement("Pages"));
3357 if (!pRef) {
3358 m_docStatus = PDF_DATAAVAIL_ERROR;
3359 return FALSE;
3360 }
3361
3362 m_PagesObjNum = pRef->GetRefObjNum();
3363 CPDF_Reference* pAcroFormRef =
3364 ToReference(m_pRoot->GetDict()->GetElement("AcroForm"));
3365 if (pAcroFormRef) {
3366 m_bHaveAcroForm = TRUE;
3367 m_dwAcroFormObjNum = pAcroFormRef->GetRefObjNum();
3368 }
3369
3370 if (m_dwInfoObjNum) {
3371 m_docStatus = PDF_DATAAVAIL_INFO;
3372 } else {
3373 m_docStatus =
3374 m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE;
3375 }
3376 return TRUE;
3377 }
PreparePageItem()3378 FX_BOOL CPDF_DataAvail::PreparePageItem() {
3379 CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
3380 CPDF_Reference* pRef =
3381 ToReference(pRoot ? pRoot->GetElement("Pages") : nullptr);
3382 if (!pRef) {
3383 m_docStatus = PDF_DATAAVAIL_ERROR;
3384 return FALSE;
3385 }
3386
3387 m_PagesObjNum = pRef->GetRefObjNum();
3388 m_pCurrentParser = (CPDF_Parser*)m_pDocument->GetParser();
3389 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3390 return TRUE;
3391 }
IsFirstCheck(int iPage)3392 bool CPDF_DataAvail::IsFirstCheck(int iPage) {
3393 return m_pageMapCheckState.insert(iPage).second;
3394 }
ResetFirstCheck(int iPage)3395 void CPDF_DataAvail::ResetFirstCheck(int iPage) {
3396 m_pageMapCheckState.erase(iPage);
3397 }
CheckPage(IFX_DownloadHints * pHints)3398 FX_BOOL CPDF_DataAvail::CheckPage(IFX_DownloadHints* pHints) {
3399 FX_DWORD iPageObjs = m_PageObjList.GetSize();
3400 CFX_DWordArray UnavailObjList;
3401 for (FX_DWORD i = 0; i < iPageObjs; ++i) {
3402 FX_DWORD dwPageObjNum = m_PageObjList.GetAt(i);
3403 FX_BOOL bExist = FALSE;
3404 CPDF_Object* pObj = GetObject(dwPageObjNum, pHints, &bExist);
3405 if (!pObj) {
3406 if (bExist) {
3407 UnavailObjList.Add(dwPageObjNum);
3408 }
3409 continue;
3410 }
3411 if (pObj->IsArray()) {
3412 CPDF_Array* pArray = pObj->GetArray();
3413 if (pArray) {
3414 int32_t iSize = pArray->GetCount();
3415 for (int32_t j = 0; j < iSize; ++j) {
3416 if (CPDF_Reference* pRef = ToReference(pArray->GetElement(j)))
3417 UnavailObjList.Add(pRef->GetRefObjNum());
3418 }
3419 }
3420 }
3421 if (!pObj->IsDictionary()) {
3422 pObj->Release();
3423 continue;
3424 }
3425 CFX_ByteString type = pObj->GetDict()->GetString("Type");
3426 if (type == "Pages") {
3427 m_PagesArray.Add(pObj);
3428 continue;
3429 }
3430 pObj->Release();
3431 }
3432 m_PageObjList.RemoveAll();
3433 if (UnavailObjList.GetSize()) {
3434 m_PageObjList.Append(UnavailObjList);
3435 return FALSE;
3436 }
3437 FX_DWORD iPages = m_PagesArray.GetSize();
3438 for (FX_DWORD i = 0; i < iPages; i++) {
3439 CPDF_Object* pPages = m_PagesArray.GetAt(i);
3440 if (!pPages)
3441 continue;
3442
3443 if (!GetPageKids(m_pCurrentParser, pPages)) {
3444 pPages->Release();
3445 while (++i < iPages) {
3446 pPages = m_PagesArray.GetAt(i);
3447 pPages->Release();
3448 }
3449 m_PagesArray.RemoveAll();
3450 m_docStatus = PDF_DATAAVAIL_ERROR;
3451 return FALSE;
3452 }
3453 pPages->Release();
3454 }
3455 m_PagesArray.RemoveAll();
3456 if (!m_PageObjList.GetSize()) {
3457 m_docStatus = PDF_DATAAVAIL_DONE;
3458 }
3459 return TRUE;
3460 }
GetPageKids(CPDF_Parser * pParser,CPDF_Object * pPages)3461 FX_BOOL CPDF_DataAvail::GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages) {
3462 if (!pParser) {
3463 m_docStatus = PDF_DATAAVAIL_ERROR;
3464 return FALSE;
3465 }
3466 CPDF_Dictionary* pDict = pPages->GetDict();
3467 CPDF_Object* pKids = pDict ? pDict->GetElement("Kids") : NULL;
3468 if (!pKids) {
3469 return TRUE;
3470 }
3471 switch (pKids->GetType()) {
3472 case PDFOBJ_REFERENCE:
3473 m_PageObjList.Add(pKids->AsReference()->GetRefObjNum());
3474 break;
3475 case PDFOBJ_ARRAY: {
3476 CPDF_Array* pKidsArray = pKids->AsArray();
3477 for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) {
3478 if (CPDF_Reference* pRef = ToReference(pKidsArray->GetElement(i)))
3479 m_PageObjList.Add(pRef->GetRefObjNum());
3480 }
3481 } break;
3482 default:
3483 m_docStatus = PDF_DATAAVAIL_ERROR;
3484 return FALSE;
3485 }
3486 return TRUE;
3487 }
CheckPages(IFX_DownloadHints * pHints)3488 FX_BOOL CPDF_DataAvail::CheckPages(IFX_DownloadHints* pHints) {
3489 FX_BOOL bExist = FALSE;
3490 CPDF_Object* pPages = GetObject(m_PagesObjNum, pHints, &bExist);
3491 if (!bExist) {
3492 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3493 return TRUE;
3494 }
3495 if (!pPages) {
3496 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3497 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3498 return TRUE;
3499 }
3500 return FALSE;
3501 }
3502 if (!GetPageKids(m_pCurrentParser, pPages)) {
3503 pPages->Release();
3504 m_docStatus = PDF_DATAAVAIL_ERROR;
3505 return FALSE;
3506 }
3507 pPages->Release();
3508 m_docStatus = PDF_DATAAVAIL_PAGE;
3509 return TRUE;
3510 }
CheckHeader(IFX_DownloadHints * pHints)3511 FX_BOOL CPDF_DataAvail::CheckHeader(IFX_DownloadHints* pHints) {
3512 FX_DWORD req_size = 1024;
3513 if ((FX_FILESIZE)req_size > m_dwFileLen) {
3514 req_size = (FX_DWORD)m_dwFileLen;
3515 }
3516 if (m_pFileAvail->IsDataAvail(0, req_size)) {
3517 uint8_t buffer[1024];
3518 m_pFileRead->ReadBlock(buffer, 0, req_size);
3519 if (IsLinearizedFile(buffer, req_size)) {
3520 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE;
3521 } else {
3522 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3523 return FALSE;
3524 }
3525 m_docStatus = PDF_DATAAVAIL_END;
3526 }
3527 return TRUE;
3528 }
3529 pHints->AddSegment(0, req_size);
3530 return FALSE;
3531 }
CheckFirstPage(IFX_DownloadHints * pHints)3532 FX_BOOL CPDF_DataAvail::CheckFirstPage(IFX_DownloadHints* pHints) {
3533 CPDF_Dictionary* pDict = m_pLinearized->GetDict();
3534 CPDF_Object* pEndOffSet = pDict ? pDict->GetElement("E") : NULL;
3535 if (!pEndOffSet) {
3536 m_docStatus = PDF_DATAAVAIL_ERROR;
3537 return FALSE;
3538 }
3539 CPDF_Object* pXRefOffset = pDict ? pDict->GetElement("T") : NULL;
3540 if (!pXRefOffset) {
3541 m_docStatus = PDF_DATAAVAIL_ERROR;
3542 return FALSE;
3543 }
3544 CPDF_Object* pFileLen = pDict ? pDict->GetElement("L") : NULL;
3545 if (!pFileLen) {
3546 m_docStatus = PDF_DATAAVAIL_ERROR;
3547 return FALSE;
3548 }
3549 FX_BOOL bNeedDownLoad = FALSE;
3550 if (pEndOffSet->IsNumber()) {
3551 FX_DWORD dwEnd = pEndOffSet->GetInteger();
3552 dwEnd += 512;
3553 if ((FX_FILESIZE)dwEnd > m_dwFileLen) {
3554 dwEnd = (FX_DWORD)m_dwFileLen;
3555 }
3556 int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen);
3557 int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0;
3558 if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) {
3559 pHints->AddSegment(iStartPos, iSize);
3560 bNeedDownLoad = TRUE;
3561 }
3562 }
3563 m_dwLastXRefOffset = 0;
3564 FX_FILESIZE dwFileLen = 0;
3565 if (pXRefOffset->IsNumber())
3566 m_dwLastXRefOffset = pXRefOffset->GetInteger();
3567
3568 if (pFileLen->IsNumber())
3569 dwFileLen = pFileLen->GetInteger();
3570
3571 if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset,
3572 (FX_DWORD)(dwFileLen - m_dwLastXRefOffset))) {
3573 if (m_docStatus == PDF_DATAAVAIL_FIRSTPAGE) {
3574 FX_DWORD dwSize = (FX_DWORD)(dwFileLen - m_dwLastXRefOffset);
3575 FX_FILESIZE offset = m_dwLastXRefOffset;
3576 if (dwSize < 512 && dwFileLen > 512) {
3577 dwSize = 512;
3578 offset = dwFileLen - 512;
3579 }
3580 pHints->AddSegment(offset, dwSize);
3581 }
3582 } else {
3583 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE;
3584 }
3585 if (bNeedDownLoad || m_docStatus != PDF_DATAAVAIL_FIRSTPAGE_PREPARE) {
3586 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE;
3587 return FALSE;
3588 }
3589 m_docStatus =
3590 m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE;
3591 return TRUE;
3592 }
IsDataAvail(FX_FILESIZE offset,FX_DWORD size,IFX_DownloadHints * pHints)3593 FX_BOOL CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset,
3594 FX_DWORD size,
3595 IFX_DownloadHints* pHints) {
3596 if (offset > m_dwFileLen)
3597 return TRUE;
3598 FX_SAFE_DWORD safeSize = pdfium::base::checked_cast<FX_DWORD>(offset);
3599 safeSize += size;
3600 safeSize += 512;
3601 if (!safeSize.IsValid() || safeSize.ValueOrDie() > m_dwFileLen)
3602 size = m_dwFileLen - offset;
3603 else
3604 size += 512;
3605 if (!m_pFileAvail->IsDataAvail(offset, size)) {
3606 pHints->AddSegment(offset, size);
3607 return FALSE;
3608 }
3609 return TRUE;
3610 }
CheckHintTables(IFX_DownloadHints * pHints)3611 FX_BOOL CPDF_DataAvail::CheckHintTables(IFX_DownloadHints* pHints) {
3612 CPDF_Dictionary* pDict = m_pLinearized->GetDict();
3613 if (!pDict) {
3614 m_docStatus = PDF_DATAAVAIL_ERROR;
3615 return FALSE;
3616 }
3617 if (!pDict->KeyExist("H") || !pDict->KeyExist("O") || !pDict->KeyExist("N")) {
3618 m_docStatus = PDF_DATAAVAIL_ERROR;
3619 return FALSE;
3620 }
3621 int nPageCount = pDict->GetElementValue("N")->GetInteger();
3622 if (nPageCount <= 1) {
3623 m_docStatus = PDF_DATAAVAIL_DONE;
3624 return TRUE;
3625 }
3626 CPDF_Array* pHintStreamRange = pDict->GetArray("H");
3627 FX_FILESIZE szHSStart =
3628 pHintStreamRange->GetElementValue(0)
3629 ? pHintStreamRange->GetElementValue(0)->GetInteger()
3630 : 0;
3631 FX_FILESIZE szHSLength =
3632 pHintStreamRange->GetElementValue(1)
3633 ? pHintStreamRange->GetElementValue(1)->GetInteger()
3634 : 0;
3635 if (szHSStart < 0 || szHSLength <= 0) {
3636 m_docStatus = PDF_DATAAVAIL_ERROR;
3637 return FALSE;
3638 }
3639 if (!IsDataAvail(szHSStart, szHSLength, pHints)) {
3640 return FALSE;
3641 }
3642 m_syntaxParser.InitParser(m_pFileRead, m_dwHeaderOffset);
3643 std::unique_ptr<CPDF_HintTables> pHintTables(
3644 new CPDF_HintTables(this, pDict));
3645 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pHintStream(
3646 ParseIndirectObjectAt(szHSStart, 0));
3647 CPDF_Stream* pStream = ToStream(pHintStream.get());
3648 if (pStream && pHintTables->LoadHintStream(pStream))
3649 m_pHintTables = std::move(pHintTables);
3650
3651 m_docStatus = PDF_DATAAVAIL_DONE;
3652 return TRUE;
3653 }
ParseIndirectObjectAt(FX_FILESIZE pos,FX_DWORD objnum,CPDF_IndirectObjectHolder * pObjList)3654 CPDF_Object* CPDF_DataAvail::ParseIndirectObjectAt(
3655 FX_FILESIZE pos,
3656 FX_DWORD objnum,
3657 CPDF_IndirectObjectHolder* pObjList) {
3658 FX_FILESIZE SavedPos = m_syntaxParser.SavePos();
3659 m_syntaxParser.RestorePos(pos);
3660 bool bIsNumber;
3661 CFX_ByteString word = m_syntaxParser.GetNextWord(&bIsNumber);
3662 if (!bIsNumber)
3663 return nullptr;
3664
3665 FX_DWORD parser_objnum = FXSYS_atoi(word);
3666 if (objnum && parser_objnum != objnum)
3667 return nullptr;
3668
3669 word = m_syntaxParser.GetNextWord(&bIsNumber);
3670 if (!bIsNumber)
3671 return nullptr;
3672
3673 FX_DWORD gennum = FXSYS_atoi(word);
3674 if (m_syntaxParser.GetKeyword() != "obj") {
3675 m_syntaxParser.RestorePos(SavedPos);
3676 return nullptr;
3677 }
3678 CPDF_Object* pObj =
3679 m_syntaxParser.GetObject(pObjList, parser_objnum, gennum, nullptr, true);
3680 m_syntaxParser.RestorePos(SavedPos);
3681 return pObj;
3682 }
IsLinearizedPDF()3683 IPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
3684 FX_DWORD req_size = 1024;
3685 if (!m_pFileAvail->IsDataAvail(0, req_size)) {
3686 return LinearizationUnknown;
3687 }
3688 if (!m_pFileRead) {
3689 return NotLinearized;
3690 }
3691 FX_FILESIZE dwSize = m_pFileRead->GetSize();
3692 if (dwSize < (FX_FILESIZE)req_size) {
3693 return LinearizationUnknown;
3694 }
3695 uint8_t buffer[1024];
3696 m_pFileRead->ReadBlock(buffer, 0, req_size);
3697 if (IsLinearizedFile(buffer, req_size)) {
3698 return Linearized;
3699 }
3700 return NotLinearized;
3701 }
IsLinearizedFile(uint8_t * pData,FX_DWORD dwLen)3702 FX_BOOL CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, FX_DWORD dwLen) {
3703 ScopedFileStream file(FX_CreateMemoryStream(pData, (size_t)dwLen, FALSE));
3704 int32_t offset = GetHeaderOffset(file.get());
3705 if (offset == -1) {
3706 m_docStatus = PDF_DATAAVAIL_ERROR;
3707 return FALSE;
3708 }
3709 m_dwHeaderOffset = offset;
3710 m_syntaxParser.InitParser(file.get(), offset);
3711 m_syntaxParser.RestorePos(m_syntaxParser.m_HeaderOffset + 9);
3712 bool bNumber;
3713 CFX_ByteString wordObjNum = m_syntaxParser.GetNextWord(&bNumber);
3714 if (!bNumber)
3715 return FALSE;
3716
3717 FX_DWORD objnum = FXSYS_atoi(wordObjNum);
3718 if (m_pLinearized) {
3719 m_pLinearized->Release();
3720 m_pLinearized = NULL;
3721 }
3722 m_pLinearized =
3723 ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum);
3724 if (!m_pLinearized) {
3725 return FALSE;
3726 }
3727
3728 CPDF_Dictionary* pDict = m_pLinearized->GetDict();
3729 if (pDict && pDict->GetElement("Linearized")) {
3730 CPDF_Object* pLen = pDict->GetElement("L");
3731 if (!pLen) {
3732 return FALSE;
3733 }
3734 if ((FX_FILESIZE)pLen->GetInteger() != m_pFileRead->GetSize()) {
3735 return FALSE;
3736 }
3737 m_bLinearized = TRUE;
3738
3739 if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P")))
3740 m_dwFirstPageNo = pNo->GetInteger();
3741
3742 return TRUE;
3743 }
3744 return FALSE;
3745 }
CheckEnd(IFX_DownloadHints * pHints)3746 FX_BOOL CPDF_DataAvail::CheckEnd(IFX_DownloadHints* pHints) {
3747 FX_DWORD req_pos = (FX_DWORD)(m_dwFileLen > 1024 ? m_dwFileLen - 1024 : 0);
3748 FX_DWORD dwSize = (FX_DWORD)(m_dwFileLen - req_pos);
3749 if (m_pFileAvail->IsDataAvail(req_pos, dwSize)) {
3750 uint8_t buffer[1024];
3751 m_pFileRead->ReadBlock(buffer, req_pos, dwSize);
3752 ScopedFileStream file(FX_CreateMemoryStream(buffer, (size_t)dwSize, FALSE));
3753 m_syntaxParser.InitParser(file.get(), 0);
3754 m_syntaxParser.RestorePos(dwSize - 1);
3755 if (m_syntaxParser.SearchWord("startxref", TRUE, FALSE, dwSize)) {
3756 m_syntaxParser.GetNextWord(nullptr);
3757 bool bNumber;
3758 CFX_ByteString xrefpos_str = m_syntaxParser.GetNextWord(&bNumber);
3759 if (!bNumber) {
3760 m_docStatus = PDF_DATAAVAIL_ERROR;
3761 return FALSE;
3762 }
3763 m_dwXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);
3764 if (!m_dwXRefOffset || m_dwXRefOffset > m_dwFileLen) {
3765 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3766 return TRUE;
3767 }
3768 m_dwLastXRefOffset = m_dwXRefOffset;
3769 SetStartOffset(m_dwXRefOffset);
3770 m_docStatus = PDF_DATAAVAIL_CROSSREF;
3771 return TRUE;
3772 }
3773 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3774 return TRUE;
3775 }
3776 pHints->AddSegment(req_pos, dwSize);
3777 return FALSE;
3778 }
CheckCrossRefStream(IFX_DownloadHints * pHints,FX_FILESIZE & xref_offset)3779 int32_t CPDF_DataAvail::CheckCrossRefStream(IFX_DownloadHints* pHints,
3780 FX_FILESIZE& xref_offset) {
3781 xref_offset = 0;
3782 FX_DWORD req_size =
3783 (FX_DWORD)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3784 if (m_pFileAvail->IsDataAvail(m_Pos, req_size)) {
3785 int32_t iSize = (int32_t)(m_Pos + req_size - m_dwCurrentXRefSteam);
3786 CFX_BinaryBuf buf(iSize);
3787 uint8_t* pBuf = buf.GetBuffer();
3788 m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize);
3789 ScopedFileStream file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE));
3790 m_parser.m_Syntax.InitParser(file.get(), 0);
3791 bool bNumber;
3792 CFX_ByteString objnum = m_parser.m_Syntax.GetNextWord(&bNumber);
3793 if (!bNumber)
3794 return -1;
3795
3796 FX_DWORD objNum = FXSYS_atoi(objnum);
3797 CPDF_Object* pObj = m_parser.ParseIndirectObjectAt(NULL, 0, objNum, NULL);
3798 if (!pObj) {
3799 m_Pos += m_parser.m_Syntax.SavePos();
3800 return 0;
3801 }
3802 CPDF_Dictionary* pDict = pObj->GetDict();
3803 CPDF_Name* pName = ToName(pDict ? pDict->GetElement("Type") : nullptr);
3804 if (pName) {
3805 if (pName->GetString() == "XRef") {
3806 m_Pos += m_parser.m_Syntax.SavePos();
3807 xref_offset = pObj->GetDict()->GetInteger("Prev");
3808 pObj->Release();
3809 return 1;
3810 }
3811 }
3812 pObj->Release();
3813 return -1;
3814 }
3815 pHints->AddSegment(m_Pos, req_size);
3816 return 0;
3817 }
SetStartOffset(FX_FILESIZE dwOffset)3818 inline void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset) {
3819 m_Pos = dwOffset;
3820 }
3821
GetNextToken(CFX_ByteString & token)3822 FX_BOOL CPDF_DataAvail::GetNextToken(CFX_ByteString& token) {
3823 uint8_t ch;
3824 if (!GetNextChar(ch))
3825 return FALSE;
3826
3827 while (1) {
3828 while (PDFCharIsWhitespace(ch)) {
3829 if (!GetNextChar(ch))
3830 return FALSE;
3831 }
3832
3833 if (ch != '%')
3834 break;
3835
3836 while (1) {
3837 if (!GetNextChar(ch))
3838 return FALSE;
3839 if (PDFCharIsLineEnding(ch))
3840 break;
3841 }
3842 }
3843
3844 uint8_t buffer[256];
3845 FX_DWORD index = 0;
3846 if (PDFCharIsDelimiter(ch)) {
3847 buffer[index++] = ch;
3848 if (ch == '/') {
3849 while (1) {
3850 if (!GetNextChar(ch))
3851 return FALSE;
3852
3853 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
3854 m_Pos--;
3855 CFX_ByteString ret(buffer, index);
3856 token = ret;
3857 return TRUE;
3858 }
3859
3860 if (index < sizeof(buffer))
3861 buffer[index++] = ch;
3862 }
3863 } else if (ch == '<') {
3864 if (!GetNextChar(ch))
3865 return FALSE;
3866
3867 if (ch == '<')
3868 buffer[index++] = ch;
3869 else
3870 m_Pos--;
3871 } else if (ch == '>') {
3872 if (!GetNextChar(ch))
3873 return FALSE;
3874
3875 if (ch == '>')
3876 buffer[index++] = ch;
3877 else
3878 m_Pos--;
3879 }
3880
3881 CFX_ByteString ret(buffer, index);
3882 token = ret;
3883 return TRUE;
3884 }
3885
3886 while (1) {
3887 if (index < sizeof(buffer))
3888 buffer[index++] = ch;
3889
3890 if (!GetNextChar(ch))
3891 return FALSE;
3892
3893 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
3894 m_Pos--;
3895 break;
3896 }
3897 }
3898
3899 token = CFX_ByteString(buffer, index);
3900 return TRUE;
3901 }
3902
GetNextChar(uint8_t & ch)3903 FX_BOOL CPDF_DataAvail::GetNextChar(uint8_t& ch) {
3904 FX_FILESIZE pos = m_Pos;
3905 if (pos >= m_dwFileLen) {
3906 return FALSE;
3907 }
3908 if (m_bufferOffset >= pos ||
3909 (FX_FILESIZE)(m_bufferOffset + m_bufferSize) <= pos) {
3910 FX_FILESIZE read_pos = pos;
3911 FX_DWORD read_size = 512;
3912 if ((FX_FILESIZE)read_size > m_dwFileLen) {
3913 read_size = (FX_DWORD)m_dwFileLen;
3914 }
3915 if ((FX_FILESIZE)(read_pos + read_size) > m_dwFileLen) {
3916 read_pos = m_dwFileLen - read_size;
3917 }
3918 if (!m_pFileRead->ReadBlock(m_bufferData, read_pos, read_size)) {
3919 return FALSE;
3920 }
3921 m_bufferOffset = read_pos;
3922 m_bufferSize = read_size;
3923 }
3924 ch = m_bufferData[pos - m_bufferOffset];
3925 m_Pos++;
3926 return TRUE;
3927 }
CheckCrossRefItem(IFX_DownloadHints * pHints)3928 FX_BOOL CPDF_DataAvail::CheckCrossRefItem(IFX_DownloadHints* pHints) {
3929 int32_t iSize = 0;
3930 CFX_ByteString token;
3931 while (1) {
3932 if (!GetNextToken(token)) {
3933 iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3934 pHints->AddSegment(m_Pos, iSize);
3935 return FALSE;
3936 }
3937 if (token == "trailer") {
3938 m_dwTrailerOffset = m_Pos;
3939 m_docStatus = PDF_DATAAVAIL_TRAILER;
3940 return TRUE;
3941 }
3942 }
3943 }
CheckAllCrossRefStream(IFX_DownloadHints * pHints)3944 FX_BOOL CPDF_DataAvail::CheckAllCrossRefStream(IFX_DownloadHints* pHints) {
3945 FX_FILESIZE xref_offset = 0;
3946 int32_t nRet = CheckCrossRefStream(pHints, xref_offset);
3947 if (nRet == 1) {
3948 if (!xref_offset) {
3949 m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF;
3950 } else {
3951 m_dwCurrentXRefSteam = xref_offset;
3952 m_Pos = xref_offset;
3953 }
3954 return TRUE;
3955 }
3956 if (nRet == -1) {
3957 m_docStatus = PDF_DATAAVAIL_ERROR;
3958 }
3959 return FALSE;
3960 }
CheckCrossRef(IFX_DownloadHints * pHints)3961 FX_BOOL CPDF_DataAvail::CheckCrossRef(IFX_DownloadHints* pHints) {
3962 int32_t iSize = 0;
3963 CFX_ByteString token;
3964 if (!GetNextToken(token)) {
3965 iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3966 pHints->AddSegment(m_Pos, iSize);
3967 return FALSE;
3968 }
3969 if (token == "xref") {
3970 m_CrossOffset.InsertAt(0, m_dwXRefOffset);
3971 while (1) {
3972 if (!GetNextToken(token)) {
3973 iSize =
3974 (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3975 pHints->AddSegment(m_Pos, iSize);
3976 m_docStatus = PDF_DATAAVAIL_CROSSREF_ITEM;
3977 return FALSE;
3978 }
3979 if (token == "trailer") {
3980 m_dwTrailerOffset = m_Pos;
3981 m_docStatus = PDF_DATAAVAIL_TRAILER;
3982 return TRUE;
3983 }
3984 }
3985 } else {
3986 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3987 return TRUE;
3988 }
3989 return FALSE;
3990 }
CheckTrailerAppend(IFX_DownloadHints * pHints)3991 FX_BOOL CPDF_DataAvail::CheckTrailerAppend(IFX_DownloadHints* pHints) {
3992 if (m_Pos < m_dwFileLen) {
3993 FX_FILESIZE dwAppendPos = m_Pos + m_syntaxParser.SavePos();
3994 int32_t iSize = (int32_t)(
3995 dwAppendPos + 512 > m_dwFileLen ? m_dwFileLen - dwAppendPos : 512);
3996 if (!m_pFileAvail->IsDataAvail(dwAppendPos, iSize)) {
3997 pHints->AddSegment(dwAppendPos, iSize);
3998 return FALSE;
3999 }
4000 }
4001 if (m_dwPrevXRefOffset) {
4002 SetStartOffset(m_dwPrevXRefOffset);
4003 m_docStatus = PDF_DATAAVAIL_CROSSREF;
4004 } else {
4005 m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF;
4006 }
4007 return TRUE;
4008 }
4009
CheckTrailer(IFX_DownloadHints * pHints)4010 FX_BOOL CPDF_DataAvail::CheckTrailer(IFX_DownloadHints* pHints) {
4011 int32_t iTrailerSize =
4012 (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
4013 if (m_pFileAvail->IsDataAvail(m_Pos, iTrailerSize)) {
4014 int32_t iSize = (int32_t)(m_Pos + iTrailerSize - m_dwTrailerOffset);
4015 CFX_BinaryBuf buf(iSize);
4016 uint8_t* pBuf = buf.GetBuffer();
4017 if (!pBuf) {
4018 m_docStatus = PDF_DATAAVAIL_ERROR;
4019 return FALSE;
4020 }
4021 if (!m_pFileRead->ReadBlock(pBuf, m_dwTrailerOffset, iSize)) {
4022 return FALSE;
4023 }
4024 ScopedFileStream file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE));
4025 m_syntaxParser.InitParser(file.get(), 0);
4026 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pTrailer(
4027 m_syntaxParser.GetObject(nullptr, 0, 0, nullptr, true));
4028 if (!pTrailer) {
4029 m_Pos += m_syntaxParser.SavePos();
4030 pHints->AddSegment(m_Pos, iTrailerSize);
4031 return FALSE;
4032 }
4033 if (!pTrailer->IsDictionary())
4034 return FALSE;
4035
4036 CPDF_Dictionary* pTrailerDict = pTrailer->GetDict();
4037 CPDF_Object* pEncrypt = pTrailerDict->GetElement("Encrypt");
4038 if (ToReference(pEncrypt)) {
4039 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
4040 return TRUE;
4041 }
4042
4043 FX_DWORD xrefpos = GetDirectInteger(pTrailerDict, "Prev");
4044 if (xrefpos) {
4045 m_dwPrevXRefOffset = GetDirectInteger(pTrailerDict, "XRefStm");
4046 if (m_dwPrevXRefOffset) {
4047 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
4048 } else {
4049 m_dwPrevXRefOffset = xrefpos;
4050 if (m_dwPrevXRefOffset >= m_dwFileLen) {
4051 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
4052 } else {
4053 SetStartOffset(m_dwPrevXRefOffset);
4054 m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND;
4055 }
4056 }
4057 return TRUE;
4058 }
4059 m_dwPrevXRefOffset = 0;
4060 m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND;
4061 return TRUE;
4062 }
4063 pHints->AddSegment(m_Pos, iTrailerSize);
4064 return FALSE;
4065 }
4066
CheckPage(int32_t iPage,IFX_DownloadHints * pHints)4067 FX_BOOL CPDF_DataAvail::CheckPage(int32_t iPage, IFX_DownloadHints* pHints) {
4068 while (TRUE) {
4069 switch (m_docStatus) {
4070 case PDF_DATAAVAIL_PAGETREE:
4071 if (!LoadDocPages(pHints)) {
4072 return FALSE;
4073 }
4074 break;
4075 case PDF_DATAAVAIL_PAGE:
4076 if (!LoadDocPage(iPage, pHints)) {
4077 return FALSE;
4078 }
4079 break;
4080 case PDF_DATAAVAIL_ERROR:
4081 return LoadAllFile(pHints);
4082 default:
4083 m_bPagesTreeLoad = TRUE;
4084 m_bPagesLoad = TRUE;
4085 m_bCurPageDictLoadOK = TRUE;
4086 m_docStatus = PDF_DATAAVAIL_PAGE;
4087 return TRUE;
4088 }
4089 }
4090 }
CheckArrayPageNode(FX_DWORD dwPageNo,CPDF_PageNode * pPageNode,IFX_DownloadHints * pHints)4091 FX_BOOL CPDF_DataAvail::CheckArrayPageNode(FX_DWORD dwPageNo,
4092 CPDF_PageNode* pPageNode,
4093 IFX_DownloadHints* pHints) {
4094 FX_BOOL bExist = FALSE;
4095 CPDF_Object* pPages = GetObject(dwPageNo, pHints, &bExist);
4096 if (!bExist) {
4097 m_docStatus = PDF_DATAAVAIL_ERROR;
4098 return FALSE;
4099 }
4100 if (!pPages) {
4101 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
4102 m_docStatus = PDF_DATAAVAIL_ERROR;
4103 return FALSE;
4104 }
4105 return FALSE;
4106 }
4107
4108 CPDF_Array* pArray = pPages->AsArray();
4109 if (!pArray) {
4110 pPages->Release();
4111 m_docStatus = PDF_DATAAVAIL_ERROR;
4112 return FALSE;
4113 }
4114
4115 pPageNode->m_type = PDF_PAGENODE_PAGES;
4116 for (FX_DWORD i = 0; i < pArray->GetCount(); ++i) {
4117 CPDF_Reference* pKid = ToReference(pArray->GetElement(i));
4118 if (!pKid)
4119 continue;
4120
4121 CPDF_PageNode* pNode = new CPDF_PageNode();
4122 pPageNode->m_childNode.Add(pNode);
4123 pNode->m_dwPageNo = pKid->GetRefObjNum();
4124 }
4125 pPages->Release();
4126 return TRUE;
4127 }
CheckUnkownPageNode(FX_DWORD dwPageNo,CPDF_PageNode * pPageNode,IFX_DownloadHints * pHints)4128 FX_BOOL CPDF_DataAvail::CheckUnkownPageNode(FX_DWORD dwPageNo,
4129 CPDF_PageNode* pPageNode,
4130 IFX_DownloadHints* pHints) {
4131 FX_BOOL bExist = FALSE;
4132 CPDF_Object* pPage = GetObject(dwPageNo, pHints, &bExist);
4133 if (!bExist) {
4134 m_docStatus = PDF_DATAAVAIL_ERROR;
4135 return FALSE;
4136 }
4137 if (!pPage) {
4138 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
4139 m_docStatus = PDF_DATAAVAIL_ERROR;
4140 return FALSE;
4141 }
4142 return FALSE;
4143 }
4144 if (pPage->IsArray()) {
4145 pPageNode->m_dwPageNo = dwPageNo;
4146 pPageNode->m_type = PDF_PAGENODE_ARRAY;
4147 pPage->Release();
4148 return TRUE;
4149 }
4150 if (!pPage->IsDictionary()) {
4151 pPage->Release();
4152 m_docStatus = PDF_DATAAVAIL_ERROR;
4153 return FALSE;
4154 }
4155 pPageNode->m_dwPageNo = dwPageNo;
4156 CPDF_Dictionary* pDict = pPage->GetDict();
4157 CFX_ByteString type = pDict->GetString("Type");
4158 if (type == "Pages") {
4159 pPageNode->m_type = PDF_PAGENODE_PAGES;
4160 CPDF_Object* pKids = pDict->GetElement("Kids");
4161 if (!pKids) {
4162 m_docStatus = PDF_DATAAVAIL_PAGE;
4163 return TRUE;
4164 }
4165 switch (pKids->GetType()) {
4166 case PDFOBJ_REFERENCE: {
4167 CPDF_Reference* pKid = pKids->AsReference();
4168 CPDF_PageNode* pNode = new CPDF_PageNode();
4169 pPageNode->m_childNode.Add(pNode);
4170 pNode->m_dwPageNo = pKid->GetRefObjNum();
4171 } break;
4172 case PDFOBJ_ARRAY: {
4173 CPDF_Array* pKidsArray = pKids->AsArray();
4174 for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) {
4175 CPDF_Reference* pKid = ToReference(pKidsArray->GetElement(i));
4176 if (!pKid)
4177 continue;
4178
4179 CPDF_PageNode* pNode = new CPDF_PageNode();
4180 pPageNode->m_childNode.Add(pNode);
4181 pNode->m_dwPageNo = pKid->GetRefObjNum();
4182 }
4183 } break;
4184 default:
4185 break;
4186 }
4187 } else if (type == "Page") {
4188 pPageNode->m_type = PDF_PAGENODE_PAGE;
4189 } else {
4190 pPage->Release();
4191 m_docStatus = PDF_DATAAVAIL_ERROR;
4192 return FALSE;
4193 }
4194 pPage->Release();
4195 return TRUE;
4196 }
CheckPageNode(CPDF_PageNode & pageNodes,int32_t iPage,int32_t & iCount,IFX_DownloadHints * pHints,int level)4197 FX_BOOL CPDF_DataAvail::CheckPageNode(CPDF_PageNode& pageNodes,
4198 int32_t iPage,
4199 int32_t& iCount,
4200 IFX_DownloadHints* pHints,
4201 int level) {
4202 if (level >= kMaxPageRecursionDepth) {
4203 return FALSE;
4204 }
4205 int32_t iSize = pageNodes.m_childNode.GetSize();
4206 if (iSize <= 0 || iPage >= iSize) {
4207 m_docStatus = PDF_DATAAVAIL_ERROR;
4208 return FALSE;
4209 }
4210 for (int32_t i = 0; i < iSize; ++i) {
4211 CPDF_PageNode* pNode = pageNodes.m_childNode.GetAt(i);
4212 if (!pNode) {
4213 continue;
4214 }
4215 switch (pNode->m_type) {
4216 case PDF_PAGENODE_UNKOWN:
4217 if (!CheckUnkownPageNode(pNode->m_dwPageNo, pNode, pHints)) {
4218 return FALSE;
4219 }
4220 --i;
4221 break;
4222 case PDF_PAGENODE_PAGE:
4223 iCount++;
4224 if (iPage == iCount && m_pDocument) {
4225 m_pDocument->m_PageList.SetAt(iPage, pNode->m_dwPageNo);
4226 }
4227 break;
4228 case PDF_PAGENODE_PAGES:
4229 if (!CheckPageNode(*pNode, iPage, iCount, pHints, level + 1)) {
4230 return FALSE;
4231 }
4232 break;
4233 case PDF_PAGENODE_ARRAY:
4234 if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode, pHints)) {
4235 return FALSE;
4236 }
4237 --i;
4238 break;
4239 }
4240 if (iPage == iCount) {
4241 m_docStatus = PDF_DATAAVAIL_DONE;
4242 return TRUE;
4243 }
4244 }
4245 return TRUE;
4246 }
LoadDocPage(int32_t iPage,IFX_DownloadHints * pHints)4247 FX_BOOL CPDF_DataAvail::LoadDocPage(int32_t iPage, IFX_DownloadHints* pHints) {
4248 if (m_pDocument->GetPageCount() <= iPage ||
4249 m_pDocument->m_PageList.GetAt(iPage)) {
4250 m_docStatus = PDF_DATAAVAIL_DONE;
4251 return TRUE;
4252 }
4253 if (m_pageNodes.m_type == PDF_PAGENODE_PAGE) {
4254 if (iPage == 0) {
4255 m_docStatus = PDF_DATAAVAIL_DONE;
4256 return TRUE;
4257 }
4258 m_docStatus = PDF_DATAAVAIL_ERROR;
4259 return TRUE;
4260 }
4261 int32_t iCount = -1;
4262 return CheckPageNode(m_pageNodes, iPage, iCount, pHints, 0);
4263 }
CheckPageCount(IFX_DownloadHints * pHints)4264 FX_BOOL CPDF_DataAvail::CheckPageCount(IFX_DownloadHints* pHints) {
4265 FX_BOOL bExist = FALSE;
4266 CPDF_Object* pPages = GetObject(m_PagesObjNum, pHints, &bExist);
4267 if (!bExist) {
4268 m_docStatus = PDF_DATAAVAIL_ERROR;
4269 return FALSE;
4270 }
4271 if (!pPages) {
4272 return FALSE;
4273 }
4274 CPDF_Dictionary* pPagesDict = pPages->GetDict();
4275 if (!pPagesDict) {
4276 pPages->Release();
4277 m_docStatus = PDF_DATAAVAIL_ERROR;
4278 return FALSE;
4279 }
4280 if (!pPagesDict->KeyExist("Kids")) {
4281 pPages->Release();
4282 return TRUE;
4283 }
4284 int count = pPagesDict->GetInteger("Count");
4285 if (count > 0) {
4286 pPages->Release();
4287 return TRUE;
4288 }
4289 pPages->Release();
4290 return FALSE;
4291 }
LoadDocPages(IFX_DownloadHints * pHints)4292 FX_BOOL CPDF_DataAvail::LoadDocPages(IFX_DownloadHints* pHints) {
4293 if (!CheckUnkownPageNode(m_PagesObjNum, &m_pageNodes, pHints)) {
4294 return FALSE;
4295 }
4296 if (CheckPageCount(pHints)) {
4297 m_docStatus = PDF_DATAAVAIL_PAGE;
4298 return TRUE;
4299 }
4300 m_bTotalLoadPageTree = TRUE;
4301 return FALSE;
4302 }
LoadPages(IFX_DownloadHints * pHints)4303 FX_BOOL CPDF_DataAvail::LoadPages(IFX_DownloadHints* pHints) {
4304 while (!m_bPagesTreeLoad) {
4305 if (!CheckPageStatus(pHints)) {
4306 return FALSE;
4307 }
4308 }
4309 if (m_bPagesLoad) {
4310 return TRUE;
4311 }
4312 m_pDocument->LoadPages();
4313 return FALSE;
4314 }
CheckLinearizedData(IFX_DownloadHints * pHints)4315 IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData(
4316 IFX_DownloadHints* pHints) {
4317 if (m_bLinearedDataOK) {
4318 return DataAvailable;
4319 }
4320
4321 if (!m_bMainXRefLoadTried) {
4322 FX_SAFE_DWORD data_size = m_dwFileLen;
4323 data_size -= m_dwLastXRefOffset;
4324 if (!data_size.IsValid()) {
4325 return DataError;
4326 }
4327 if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset,
4328 data_size.ValueOrDie())) {
4329 pHints->AddSegment(m_dwLastXRefOffset, data_size.ValueOrDie());
4330 return DataNotAvailable;
4331 }
4332 FX_DWORD dwRet = m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
4333 m_bMainXRefLoadTried = TRUE;
4334 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
4335 return DataError;
4336 }
4337 if (!PreparePageItem()) {
4338 return DataNotAvailable;
4339 }
4340 m_bMainXRefLoadedOK = TRUE;
4341 m_bLinearedDataOK = TRUE;
4342 }
4343
4344 return m_bLinearedDataOK ? DataAvailable : DataNotAvailable;
4345 }
CheckPageAnnots(int32_t iPage,IFX_DownloadHints * pHints)4346 FX_BOOL CPDF_DataAvail::CheckPageAnnots(int32_t iPage,
4347 IFX_DownloadHints* pHints) {
4348 if (!m_objs_array.GetSize()) {
4349 m_objs_array.RemoveAll();
4350 m_ObjectSet.clear();
4351 CPDF_Dictionary* pPageDict = m_pDocument->GetPage(iPage);
4352 if (!pPageDict) {
4353 return TRUE;
4354 }
4355 CPDF_Object* pAnnots = pPageDict->GetElement("Annots");
4356 if (!pAnnots) {
4357 return TRUE;
4358 }
4359 CFX_ArrayTemplate<CPDF_Object*> obj_array;
4360 obj_array.Add(pAnnots);
4361 FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array);
4362 if (bRet) {
4363 m_objs_array.RemoveAll();
4364 }
4365 return bRet;
4366 }
4367 CFX_ArrayTemplate<CPDF_Object*> new_objs_array;
4368 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4369 m_objs_array.RemoveAll();
4370 if (!bRet) {
4371 m_objs_array.Append(new_objs_array);
4372 }
4373 return bRet;
4374 }
CheckLinearizedFirstPage(int32_t iPage,IFX_DownloadHints * pHints)4375 IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedFirstPage(
4376 int32_t iPage,
4377 IFX_DownloadHints* pHints) {
4378 if (!m_bAnnotsLoad) {
4379 if (!CheckPageAnnots(iPage, pHints)) {
4380 return DataNotAvailable;
4381 }
4382 m_bAnnotsLoad = TRUE;
4383 }
4384
4385 DocAvailStatus nRet = CheckLinearizedData(pHints);
4386 if (nRet == DataAvailable)
4387 m_bPageLoadedOK = FALSE;
4388 return nRet;
4389 }
HaveResourceAncestor(CPDF_Dictionary * pDict)4390 FX_BOOL CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) {
4391 CFX_AutoRestorer<int> restorer(&s_CurrentDataAvailRecursionDepth);
4392 if (++s_CurrentDataAvailRecursionDepth > kMaxDataAvailRecursionDepth) {
4393 return FALSE;
4394 }
4395 CPDF_Object* pParent = pDict->GetElement("Parent");
4396 if (!pParent) {
4397 return FALSE;
4398 }
4399 CPDF_Dictionary* pParentDict = pParent->GetDict();
4400 if (!pParentDict) {
4401 return FALSE;
4402 }
4403 CPDF_Object* pRet = pParentDict->GetElement("Resources");
4404 if (pRet) {
4405 m_pPageResource = pRet;
4406 return TRUE;
4407 }
4408 return HaveResourceAncestor(pParentDict);
4409 }
IsPageAvail(int32_t iPage,IFX_DownloadHints * pHints)4410 IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
4411 int32_t iPage,
4412 IFX_DownloadHints* pHints) {
4413 if (!m_pDocument) {
4414 return DataError;
4415 }
4416 if (IsFirstCheck(iPage)) {
4417 m_bCurPageDictLoadOK = FALSE;
4418 m_bPageLoadedOK = FALSE;
4419 m_bAnnotsLoad = FALSE;
4420 m_bNeedDownLoadResource = FALSE;
4421 m_objs_array.RemoveAll();
4422 m_ObjectSet.clear();
4423 }
4424 if (pdfium::ContainsKey(m_pagesLoadState, iPage))
4425 return DataAvailable;
4426
4427 if (m_bLinearized) {
4428 if ((FX_DWORD)iPage == m_dwFirstPageNo) {
4429 DocAvailStatus nRet = CheckLinearizedFirstPage(iPage, pHints);
4430 if (nRet == DataAvailable)
4431 m_pagesLoadState.insert(iPage);
4432 return nRet;
4433 }
4434 DocAvailStatus nResult = CheckLinearizedData(pHints);
4435 if (nResult != DataAvailable) {
4436 return nResult;
4437 }
4438 if (m_pHintTables) {
4439 nResult = m_pHintTables->CheckPage(iPage, pHints);
4440 if (nResult != DataAvailable)
4441 return nResult;
4442 m_pagesLoadState.insert(iPage);
4443 return DataAvailable;
4444 }
4445 if (m_bMainXRefLoadedOK) {
4446 if (m_bTotalLoadPageTree) {
4447 if (!LoadPages(pHints)) {
4448 return DataNotAvailable;
4449 }
4450 } else {
4451 if (!m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) {
4452 return DataNotAvailable;
4453 }
4454 }
4455 } else {
4456 if (!LoadAllFile(pHints)) {
4457 return DataNotAvailable;
4458 }
4459 ((CPDF_Parser*)m_pDocument->GetParser())->RebuildCrossRef();
4460 ResetFirstCheck(iPage);
4461 return DataAvailable;
4462 }
4463 } else {
4464 if (!m_bTotalLoadPageTree) {
4465 if (!m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) {
4466 return DataNotAvailable;
4467 }
4468 }
4469 }
4470 if (m_bHaveAcroForm && !m_bAcroFormLoad) {
4471 if (!CheckAcroFormSubObject(pHints)) {
4472 return DataNotAvailable;
4473 }
4474 m_bAcroFormLoad = TRUE;
4475 }
4476 if (!m_bPageLoadedOK) {
4477 if (!m_objs_array.GetSize()) {
4478 m_objs_array.RemoveAll();
4479 m_ObjectSet.clear();
4480 m_pPageDict = m_pDocument->GetPage(iPage);
4481 if (!m_pPageDict) {
4482 ResetFirstCheck(iPage);
4483 return DataAvailable;
4484 }
4485 CFX_ArrayTemplate<CPDF_Object*> obj_array;
4486 obj_array.Add(m_pPageDict);
4487 FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array);
4488 if (!bRet)
4489 return DataNotAvailable;
4490
4491 m_objs_array.RemoveAll();
4492 } else {
4493 CFX_ArrayTemplate<CPDF_Object*> new_objs_array;
4494 FX_BOOL bRet =
4495 IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4496 m_objs_array.RemoveAll();
4497 if (!bRet) {
4498 m_objs_array.Append(new_objs_array);
4499 return DataNotAvailable;
4500 }
4501 }
4502 m_bPageLoadedOK = TRUE;
4503 }
4504
4505 if (!m_bAnnotsLoad) {
4506 if (!CheckPageAnnots(iPage, pHints)) {
4507 return DataNotAvailable;
4508 }
4509 m_bAnnotsLoad = TRUE;
4510 }
4511
4512 if (m_pPageDict && !m_bNeedDownLoadResource) {
4513 m_pPageResource = m_pPageDict->GetElement("Resources");
4514 if (!m_pPageResource) {
4515 m_bNeedDownLoadResource = HaveResourceAncestor(m_pPageDict);
4516 } else {
4517 m_bNeedDownLoadResource = TRUE;
4518 }
4519 }
4520 if (m_bNeedDownLoadResource) {
4521 FX_BOOL bRet = CheckResources(pHints);
4522 if (!bRet) {
4523 return DataNotAvailable;
4524 }
4525 m_bNeedDownLoadResource = FALSE;
4526 }
4527 m_bPageLoadedOK = FALSE;
4528 m_bAnnotsLoad = FALSE;
4529 m_bCurPageDictLoadOK = FALSE;
4530 ResetFirstCheck(iPage);
4531 m_pagesLoadState.insert(iPage);
4532 return DataAvailable;
4533 }
CheckResources(IFX_DownloadHints * pHints)4534 FX_BOOL CPDF_DataAvail::CheckResources(IFX_DownloadHints* pHints) {
4535 if (!m_objs_array.GetSize()) {
4536 m_objs_array.RemoveAll();
4537 CFX_ArrayTemplate<CPDF_Object*> obj_array;
4538 obj_array.Add(m_pPageResource);
4539 FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array);
4540 if (bRet) {
4541 m_objs_array.RemoveAll();
4542 }
4543 return bRet;
4544 }
4545 CFX_ArrayTemplate<CPDF_Object*> new_objs_array;
4546 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4547 m_objs_array.RemoveAll();
4548 if (!bRet) {
4549 m_objs_array.Append(new_objs_array);
4550 }
4551 return bRet;
4552 }
GetLinearizedMainXRefInfo(FX_FILESIZE * pPos,FX_DWORD * pSize)4553 void CPDF_DataAvail::GetLinearizedMainXRefInfo(FX_FILESIZE* pPos,
4554 FX_DWORD* pSize) {
4555 if (pPos) {
4556 *pPos = m_dwLastXRefOffset;
4557 }
4558 if (pSize) {
4559 *pSize = (FX_DWORD)(m_dwFileLen - m_dwLastXRefOffset);
4560 }
4561 }
GetPageCount() const4562 int CPDF_DataAvail::GetPageCount() const {
4563 if (m_pLinearized) {
4564 CPDF_Dictionary* pDict = m_pLinearized->GetDict();
4565 CPDF_Object* pObj = pDict ? pDict->GetElementValue("N") : nullptr;
4566 return pObj ? pObj->GetInteger() : 0;
4567 }
4568 return m_pDocument ? m_pDocument->GetPageCount() : 0;
4569 }
GetPage(int index)4570 CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) {
4571 if (!m_pDocument || index < 0 || index >= this->GetPageCount()) {
4572 return nullptr;
4573 }
4574 if (m_pLinearized) {
4575 CPDF_Dictionary* pDict = m_pLinearized->GetDict();
4576 CPDF_Object* pObj = pDict ? pDict->GetElementValue("P") : nullptr;
4577 int pageNum = pObj ? pObj->GetInteger() : 0;
4578 if (m_pHintTables && index != pageNum) {
4579 FX_FILESIZE szPageStartPos = 0;
4580 FX_FILESIZE szPageLength = 0;
4581 FX_DWORD dwObjNum = 0;
4582 FX_BOOL bPagePosGot = m_pHintTables->GetPagePos(index, szPageStartPos,
4583 szPageLength, dwObjNum);
4584 if (!bPagePosGot) {
4585 return nullptr;
4586 }
4587 m_syntaxParser.InitParser(m_pFileRead, (FX_DWORD)szPageStartPos);
4588 CPDF_Object* pPageDict = ParseIndirectObjectAt(0, dwObjNum, m_pDocument);
4589 if (!pPageDict) {
4590 return nullptr;
4591 }
4592 if (!m_pDocument->InsertIndirectObject(dwObjNum, pPageDict))
4593 return nullptr;
4594 return pPageDict->GetDict();
4595 }
4596 }
4597 return m_pDocument->GetPage(index);
4598 }
IsFormAvail(IFX_DownloadHints * pHints)4599 IPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
4600 IFX_DownloadHints* pHints) {
4601 if (!m_pDocument) {
4602 return FormAvailable;
4603 }
4604 if (!m_bLinearizedFormParamLoad) {
4605 CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
4606 if (!pRoot) {
4607 return FormAvailable;
4608 }
4609 CPDF_Object* pAcroForm = pRoot->GetElement("AcroForm");
4610 if (!pAcroForm) {
4611 return FormNotExist;
4612 }
4613 DocAvailStatus nDocStatus = CheckLinearizedData(pHints);
4614 if (nDocStatus == DataError)
4615 return FormError;
4616 if (nDocStatus == DataNotAvailable)
4617 return FormNotAvailable;
4618
4619 if (!m_objs_array.GetSize()) {
4620 m_objs_array.Add(pAcroForm->GetDict());
4621 }
4622 m_bLinearizedFormParamLoad = TRUE;
4623 }
4624 CFX_ArrayTemplate<CPDF_Object*> new_objs_array;
4625 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4626 m_objs_array.RemoveAll();
4627 if (!bRet) {
4628 m_objs_array.Append(new_objs_array);
4629 return FormNotAvailable;
4630 }
4631 return FormAvailable;
4632 }
4633
~CPDF_PageNode()4634 CPDF_PageNode::~CPDF_PageNode() {
4635 for (int32_t i = 0; i < m_childNode.GetSize(); ++i) {
4636 delete m_childNode[i];
4637 }
4638 m_childNode.RemoveAll();
4639 }
~CPDF_HintTables()4640 CPDF_HintTables::~CPDF_HintTables() {
4641 m_dwDeltaNObjsArray.RemoveAll();
4642 m_dwNSharedObjsArray.RemoveAll();
4643 m_dwSharedObjNumArray.RemoveAll();
4644 m_dwIdentifierArray.RemoveAll();
4645 m_szPageOffsetArray.RemoveAll();
4646 m_szSharedObjOffsetArray.RemoveAll();
4647 }
GetItemLength(int index,const CFX_FileSizeArray & szArray)4648 FX_DWORD CPDF_HintTables::GetItemLength(int index,
4649 const CFX_FileSizeArray& szArray) {
4650 if (index < 0 || szArray.GetSize() < 2 || index > szArray.GetSize() - 2 ||
4651 szArray[index] > szArray[index + 1])
4652 return 0;
4653 return szArray[index + 1] - szArray[index];
4654 }
ReadPageHintTable(CFX_BitStream * hStream)4655 FX_BOOL CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) {
4656 if (!hStream || hStream->IsEOF())
4657 return FALSE;
4658 int nStreamOffset = ReadPrimaryHintStreamOffset();
4659 int nStreamLen = ReadPrimaryHintStreamLength();
4660 if (nStreamOffset < 0 || nStreamLen < 1)
4661 return FALSE;
4662
4663 const FX_DWORD kHeaderSize = 288;
4664 if (hStream->BitsRemaining() < kHeaderSize)
4665 return FALSE;
4666 // Item 1: The least number of objects in a page.
4667 FX_DWORD dwObjLeastNum = hStream->GetBits(32);
4668 // Item 2: The location of the first page's page object.
4669 FX_DWORD dwFirstObjLoc = hStream->GetBits(32);
4670 if (dwFirstObjLoc > nStreamOffset) {
4671 FX_SAFE_DWORD safeLoc = pdfium::base::checked_cast<FX_DWORD>(nStreamLen);
4672 safeLoc += dwFirstObjLoc;
4673 if (!safeLoc.IsValid())
4674 return FALSE;
4675 m_szFirstPageObjOffset =
4676 pdfium::base::checked_cast<FX_FILESIZE>(safeLoc.ValueOrDie());
4677 } else {
4678 m_szFirstPageObjOffset =
4679 pdfium::base::checked_cast<FX_FILESIZE>(dwFirstObjLoc);
4680 }
4681 // Item 3: The number of bits needed to represent the difference
4682 // between the greatest and least number of objects in a page.
4683 FX_DWORD dwDeltaObjectsBits = hStream->GetBits(16);
4684 // Item 4: The least length of a page in bytes.
4685 FX_DWORD dwPageLeastLen = hStream->GetBits(32);
4686 // Item 5: The number of bits needed to represent the difference
4687 // between the greatest and least length of a page, in bytes.
4688 FX_DWORD dwDeltaPageLenBits = hStream->GetBits(16);
4689 // Skip Item 6, 7, 8, 9 total 96 bits.
4690 hStream->SkipBits(96);
4691 // Item 10: The number of bits needed to represent the greatest
4692 // number of shared object references.
4693 FX_DWORD dwSharedObjBits = hStream->GetBits(16);
4694 // Item 11: The number of bits needed to represent the numerically
4695 // greatest shared object identifier used by the pages.
4696 FX_DWORD dwSharedIdBits = hStream->GetBits(16);
4697 // Item 12: The number of bits needed to represent the numerator of
4698 // the fractional position for each shared object reference. For each
4699 // shared object referenced from a page, there is an indication of
4700 // where in the page's content stream the object is first referenced.
4701 FX_DWORD dwSharedNumeratorBits = hStream->GetBits(16);
4702 // Item 13: Skip Item 13 which has 16 bits.
4703 hStream->SkipBits(16);
4704 CPDF_Object* pPageNum = m_pLinearizedDict->GetElementValue("N");
4705 int nPages = pPageNum ? pPageNum->GetInteger() : 0;
4706 if (nPages < 1)
4707 return FALSE;
4708
4709 FX_SAFE_DWORD required_bits = dwDeltaObjectsBits;
4710 required_bits *= pdfium::base::checked_cast<FX_DWORD>(nPages);
4711 if (!CanReadFromBitStream(hStream, required_bits))
4712 return FALSE;
4713 for (int i = 0; i < nPages; ++i) {
4714 FX_SAFE_DWORD safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits);
4715 safeDeltaObj += dwObjLeastNum;
4716 if (!safeDeltaObj.IsValid())
4717 return FALSE;
4718 m_dwDeltaNObjsArray.Add(safeDeltaObj.ValueOrDie());
4719 }
4720 hStream->ByteAlign();
4721
4722 required_bits = dwDeltaPageLenBits;
4723 required_bits *= pdfium::base::checked_cast<FX_DWORD>(nPages);
4724 if (!CanReadFromBitStream(hStream, required_bits))
4725 return FALSE;
4726 CFX_DWordArray dwPageLenArray;
4727 for (int i = 0; i < nPages; ++i) {
4728 FX_SAFE_DWORD safePageLen = hStream->GetBits(dwDeltaPageLenBits);
4729 safePageLen += dwPageLeastLen;
4730 if (!safePageLen.IsValid())
4731 return FALSE;
4732 dwPageLenArray.Add(safePageLen.ValueOrDie());
4733 }
4734 CPDF_Object* pOffsetE = m_pLinearizedDict->GetElementValue("E");
4735 int nOffsetE = pOffsetE ? pOffsetE->GetInteger() : -1;
4736 if (nOffsetE < 0)
4737 return FALSE;
4738 CPDF_Object* pFirstPageNum = m_pLinearizedDict->GetElementValue("P");
4739 int nFirstPageNum = pFirstPageNum ? pFirstPageNum->GetInteger() : 0;
4740 for (int i = 0; i < nPages; ++i) {
4741 if (i == nFirstPageNum) {
4742 m_szPageOffsetArray.Add(m_szFirstPageObjOffset);
4743 } else if (i == nFirstPageNum + 1) {
4744 if (i == 1) {
4745 m_szPageOffsetArray.Add(nOffsetE);
4746 } else {
4747 m_szPageOffsetArray.Add(m_szPageOffsetArray[i - 2] +
4748 dwPageLenArray[i - 2]);
4749 }
4750 } else {
4751 if (i == 0) {
4752 m_szPageOffsetArray.Add(nOffsetE);
4753 } else {
4754 m_szPageOffsetArray.Add(m_szPageOffsetArray[i - 1] +
4755 dwPageLenArray[i - 1]);
4756 }
4757 }
4758 }
4759 if (nPages > 0) {
4760 m_szPageOffsetArray.Add(m_szPageOffsetArray[nPages - 1] +
4761 dwPageLenArray[nPages - 1]);
4762 }
4763 hStream->ByteAlign();
4764
4765 // number of shared objects
4766 required_bits = dwSharedObjBits;
4767 required_bits *= pdfium::base::checked_cast<FX_DWORD>(nPages);
4768 if (!CanReadFromBitStream(hStream, required_bits))
4769 return FALSE;
4770 for (int i = 0; i < nPages; i++) {
4771 m_dwNSharedObjsArray.Add(hStream->GetBits(dwSharedObjBits));
4772 }
4773 hStream->ByteAlign();
4774
4775 // array of identifier, sizes = nshared_objects
4776 for (int i = 0; i < nPages; i++) {
4777 required_bits = dwSharedIdBits;
4778 required_bits *= m_dwNSharedObjsArray[i];
4779 if (!CanReadFromBitStream(hStream, required_bits))
4780 return FALSE;
4781 for (int j = 0; j < m_dwNSharedObjsArray[i]; j++) {
4782 m_dwIdentifierArray.Add(hStream->GetBits(dwSharedIdBits));
4783 }
4784 }
4785 hStream->ByteAlign();
4786
4787 for (int i = 0; i < nPages; i++) {
4788 FX_SAFE_DWORD safeSize = m_dwNSharedObjsArray[i];
4789 safeSize *= dwSharedNumeratorBits;
4790 if (!CanReadFromBitStream(hStream, safeSize))
4791 return FALSE;
4792 hStream->SkipBits(safeSize.ValueOrDie());
4793 }
4794 hStream->ByteAlign();
4795
4796 FX_SAFE_DWORD safeTotalPageLen = pdfium::base::checked_cast<FX_DWORD>(nPages);
4797 safeTotalPageLen *= dwDeltaPageLenBits;
4798 if (!CanReadFromBitStream(hStream, safeTotalPageLen))
4799 return FALSE;
4800 hStream->SkipBits(safeTotalPageLen.ValueOrDie());
4801 hStream->ByteAlign();
4802 return TRUE;
4803 }
ReadSharedObjHintTable(CFX_BitStream * hStream,FX_DWORD offset)4804 FX_BOOL CPDF_HintTables::ReadSharedObjHintTable(CFX_BitStream* hStream,
4805 FX_DWORD offset) {
4806 if (!hStream || hStream->IsEOF())
4807 return FALSE;
4808 int nStreamOffset = ReadPrimaryHintStreamOffset();
4809 int nStreamLen = ReadPrimaryHintStreamLength();
4810 if (nStreamOffset < 0 || nStreamLen < 1)
4811 return FALSE;
4812
4813 FX_SAFE_DWORD bit_offset = offset;
4814 bit_offset *= 8;
4815 if (!bit_offset.IsValid() || hStream->GetPos() > bit_offset.ValueOrDie())
4816 return FALSE;
4817 hStream->SkipBits(bit_offset.ValueOrDie() - hStream->GetPos());
4818
4819 const FX_DWORD kHeaderSize = 192;
4820 if (hStream->BitsRemaining() < kHeaderSize)
4821 return FALSE;
4822 // Item 1: The object number of the first object in the shared objects
4823 // section.
4824 FX_DWORD dwFirstSharedObjNum = hStream->GetBits(32);
4825 // Item 2: The location of the first object in the shared objects section.
4826 FX_DWORD dwFirstSharedObjLoc = hStream->GetBits(32);
4827 if (dwFirstSharedObjLoc > nStreamOffset)
4828 dwFirstSharedObjLoc += nStreamLen;
4829 // Item 3: The number of shared object entries for the first page.
4830 m_nFirstPageSharedObjs = hStream->GetBits(32);
4831 // Item 4: The number of shared object entries for the shared objects
4832 // section, including the number of shared object entries for the first page.
4833 FX_DWORD dwSharedObjTotal = hStream->GetBits(32);
4834 // Item 5: The number of bits needed to represent the greatest number of
4835 // objects in a shared object group. Skipped.
4836 hStream->SkipBits(16);
4837 // Item 6: The least length of a shared object group in bytes.
4838 FX_DWORD dwGroupLeastLen = hStream->GetBits(32);
4839 // Item 7: The number of bits needed to represent the difference between the
4840 // greatest and least length of a shared object group, in bytes.
4841 FX_DWORD dwDeltaGroupLen = hStream->GetBits(16);
4842 CPDF_Object* pFirstPageObj = m_pLinearizedDict->GetElementValue("O");
4843 int nFirstPageObjNum = pFirstPageObj ? pFirstPageObj->GetInteger() : -1;
4844 if (nFirstPageObjNum < 0)
4845 return FALSE;
4846 FX_DWORD dwPrevObjLen = 0;
4847 FX_DWORD dwCurObjLen = 0;
4848 FX_SAFE_DWORD required_bits = dwSharedObjTotal;
4849 required_bits *= dwDeltaGroupLen;
4850 if (!CanReadFromBitStream(hStream, required_bits))
4851 return FALSE;
4852
4853 for (int i = 0; i < dwSharedObjTotal; ++i) {
4854 dwPrevObjLen = dwCurObjLen;
4855 FX_SAFE_DWORD safeObjLen = hStream->GetBits(dwDeltaGroupLen);
4856 safeObjLen += dwGroupLeastLen;
4857 if (!safeObjLen.IsValid())
4858 return FALSE;
4859 dwCurObjLen = safeObjLen.ValueOrDie();
4860 if (i < m_nFirstPageSharedObjs) {
4861 m_dwSharedObjNumArray.Add(nFirstPageObjNum + i);
4862 if (i == 0)
4863 m_szSharedObjOffsetArray.Add(m_szFirstPageObjOffset);
4864 } else {
4865 FX_SAFE_DWORD safeObjNum = dwFirstSharedObjNum;
4866 safeObjNum += i - m_nFirstPageSharedObjs;
4867 if (!safeObjNum.IsValid())
4868 return FALSE;
4869 m_dwSharedObjNumArray.Add(safeObjNum.ValueOrDie());
4870 if (i == m_nFirstPageSharedObjs)
4871 m_szSharedObjOffsetArray.Add(
4872 pdfium::base::checked_cast<int32_t>(dwFirstSharedObjLoc));
4873 }
4874 if (i != 0 && i != m_nFirstPageSharedObjs) {
4875 FX_SAFE_INT32 safeLoc = pdfium::base::checked_cast<int32_t>(dwPrevObjLen);
4876 safeLoc += m_szSharedObjOffsetArray[i - 1];
4877 if (!safeLoc.IsValid())
4878 return FALSE;
4879 m_szSharedObjOffsetArray.Add(safeLoc.ValueOrDie());
4880 }
4881 }
4882 if (dwSharedObjTotal > 0) {
4883 FX_SAFE_INT32 safeLoc = pdfium::base::checked_cast<int32_t>(dwCurObjLen);
4884 safeLoc += m_szSharedObjOffsetArray[dwSharedObjTotal - 1];
4885 if (!safeLoc.IsValid())
4886 return FALSE;
4887 m_szSharedObjOffsetArray.Add(safeLoc.ValueOrDie());
4888 }
4889 hStream->ByteAlign();
4890 if (hStream->BitsRemaining() < dwSharedObjTotal)
4891 return FALSE;
4892 hStream->SkipBits(dwSharedObjTotal);
4893 hStream->ByteAlign();
4894 return TRUE;
4895 }
GetPagePos(int index,FX_FILESIZE & szPageStartPos,FX_FILESIZE & szPageLength,FX_DWORD & dwObjNum)4896 FX_BOOL CPDF_HintTables::GetPagePos(int index,
4897 FX_FILESIZE& szPageStartPos,
4898 FX_FILESIZE& szPageLength,
4899 FX_DWORD& dwObjNum) {
4900 if (!m_pLinearizedDict)
4901 return FALSE;
4902 szPageStartPos = m_szPageOffsetArray[index];
4903 szPageLength = GetItemLength(index, m_szPageOffsetArray);
4904 CPDF_Object* pFirstPageNum = m_pLinearizedDict->GetElementValue("P");
4905 int nFirstPageNum = pFirstPageNum ? pFirstPageNum->GetInteger() : 0;
4906 CPDF_Object* pFirstPageObjNum = m_pLinearizedDict->GetElementValue("O");
4907 if (!pFirstPageObjNum)
4908 return FALSE;
4909 int nFirstPageObjNum = pFirstPageObjNum->GetInteger();
4910 if (index == nFirstPageNum) {
4911 dwObjNum = nFirstPageObjNum;
4912 return TRUE;
4913 }
4914 // The object number of remaining pages starts from 1.
4915 dwObjNum = 1;
4916 for (int i = 0; i < index; ++i) {
4917 if (i == nFirstPageNum)
4918 continue;
4919 dwObjNum += m_dwDeltaNObjsArray[i];
4920 }
4921 return TRUE;
4922 }
CheckPage(int index,IFX_DownloadHints * pHints)4923 IPDF_DataAvail::DocAvailStatus CPDF_HintTables::CheckPage(
4924 int index,
4925 IFX_DownloadHints* pHints) {
4926 if (!m_pLinearizedDict || !pHints)
4927 return IPDF_DataAvail::DataError;
4928 CPDF_Object* pFirstAvailPage = m_pLinearizedDict->GetElementValue("P");
4929 int nFirstAvailPage = pFirstAvailPage ? pFirstAvailPage->GetInteger() : 0;
4930 if (index == nFirstAvailPage)
4931 return IPDF_DataAvail::DataAvailable;
4932 FX_DWORD dwLength = GetItemLength(index, m_szPageOffsetArray);
4933 // If two pages have the same offset, it should be treated as an error.
4934 if (!dwLength)
4935 return IPDF_DataAvail::DataError;
4936 if (!m_pDataAvail->IsDataAvail(m_szPageOffsetArray[index], dwLength, pHints))
4937 return IPDF_DataAvail::DataNotAvailable;
4938 // Download data of shared objects in the page.
4939 FX_DWORD offset = 0;
4940 for (int i = 0; i < index; ++i) {
4941 offset += m_dwNSharedObjsArray[i];
4942 }
4943 CPDF_Object* pFirstPageObj = m_pLinearizedDict->GetElementValue("O");
4944 int nFirstPageObjNum = pFirstPageObj ? pFirstPageObj->GetInteger() : -1;
4945 if (nFirstPageObjNum < 0)
4946 return IPDF_DataAvail::DataError;
4947 FX_DWORD dwIndex = 0;
4948 FX_DWORD dwObjNum = 0;
4949 for (int j = 0; j < m_dwNSharedObjsArray[index]; ++j) {
4950 dwIndex = m_dwIdentifierArray[offset + j];
4951 if (dwIndex >= m_dwSharedObjNumArray.GetSize())
4952 return IPDF_DataAvail::DataNotAvailable;
4953 dwObjNum = m_dwSharedObjNumArray[dwIndex];
4954 if (dwObjNum >= nFirstPageObjNum &&
4955 dwObjNum < nFirstPageObjNum + m_nFirstPageSharedObjs) {
4956 continue;
4957 }
4958 dwLength = GetItemLength(dwIndex, m_szSharedObjOffsetArray);
4959 // If two objects have the same offset, it should be treated as an error.
4960 if (!dwLength)
4961 return IPDF_DataAvail::DataError;
4962 if (!m_pDataAvail->IsDataAvail(m_szSharedObjOffsetArray[dwIndex], dwLength,
4963 pHints)) {
4964 return IPDF_DataAvail::DataNotAvailable;
4965 }
4966 }
4967 return IPDF_DataAvail::DataAvailable;
4968 }
4969
LoadHintStream(CPDF_Stream * pHintStream)4970 FX_BOOL CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) {
4971 if (!pHintStream || !m_pLinearizedDict)
4972 return FALSE;
4973 CPDF_Dictionary* pDict = pHintStream->GetDict();
4974 CPDF_Object* pOffset = pDict ? pDict->GetElement("S") : nullptr;
4975 if (!pOffset || pOffset->GetType() != PDFOBJ_NUMBER)
4976 return FALSE;
4977 int shared_hint_table_offset = pOffset->GetInteger();
4978 CPDF_StreamAcc acc;
4979 acc.LoadAllData(pHintStream);
4980 FX_DWORD size = acc.GetSize();
4981 // The header section of page offset hint table is 36 bytes.
4982 // The header section of shared object hint table is 24 bytes.
4983 // Hint table has at least 60 bytes.
4984 const FX_DWORD MIN_STREAM_LEN = 60;
4985 if (size < MIN_STREAM_LEN || shared_hint_table_offset <= 0 ||
4986 size < shared_hint_table_offset) {
4987 return FALSE;
4988 }
4989 CFX_BitStream bs;
4990 bs.Init(acc.GetData(), size);
4991 return ReadPageHintTable(&bs) &&
4992 ReadSharedObjHintTable(&bs, pdfium::base::checked_cast<FX_DWORD>(
4993 shared_hint_table_offset));
4994 }
4995
ReadPrimaryHintStreamOffset() const4996 int CPDF_HintTables::ReadPrimaryHintStreamOffset() const {
4997 if (!m_pLinearizedDict)
4998 return -1;
4999 CPDF_Array* pRange = m_pLinearizedDict->GetArray("H");
5000 if (!pRange)
5001 return -1;
5002 CPDF_Object* pStreamOffset = pRange->GetElementValue(0);
5003 if (!pStreamOffset)
5004 return -1;
5005 return pStreamOffset->GetInteger();
5006 }
ReadPrimaryHintStreamLength() const5007 int CPDF_HintTables::ReadPrimaryHintStreamLength() const {
5008 if (!m_pLinearizedDict)
5009 return -1;
5010 CPDF_Array* pRange = m_pLinearizedDict->GetArray("H");
5011 if (!pRange)
5012 return -1;
5013 CPDF_Object* pStreamLen = pRange->GetElementValue(1);
5014 if (!pStreamLen)
5015 return -1;
5016 return pStreamLen->GetInteger();
5017 }
5018