1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "../../../include/fpdfapi/fpdf_parser.h"
8 #include "../../../include/fpdfapi/fpdf_module.h"
9 #include "../../../include/fpdfapi/fpdf_page.h"
10 #include "../../../src/fxcrt/fx_safe_types.h"
11 #include "../fpdf_page/pageint.h"
12 #include <utility>
13 #include <vector>
14
IsSignatureDict(const CPDF_Dictionary * pDict)15 FX_BOOL IsSignatureDict(const CPDF_Dictionary* pDict)
16 {
17 CPDF_Object* pType = pDict->GetElementValue(FX_BSTRC("Type"));
18 if (!pType) {
19 pType = pDict->GetElementValue(FX_BSTRC("FT"));
20 if (!pType) {
21 return FALSE;
22 }
23 }
24 if (pType->GetString() == FX_BSTRC("Sig")) {
25 return TRUE;
26 }
27 return FALSE;
28 }
_CompareFileSize(const void * p1,const void * p2)29 static int _CompareFileSize(const void* p1, const void* p2)
30 {
31 FX_FILESIZE ret = (*(FX_FILESIZE*)p1) - (*(FX_FILESIZE*)p2);
32 if (ret > 0) {
33 return 1;
34 }
35 if (ret < 0) {
36 return -1;
37 }
38 return 0;
39 }
40
CPDF_Parser()41 CPDF_Parser::CPDF_Parser()
42 {
43 m_pDocument = NULL;
44 m_pTrailer = NULL;
45 m_pEncryptDict = NULL;
46 m_pSecurityHandler = NULL;
47 m_pLinearized = NULL;
48 m_dwFirstPageNo = 0;
49 m_dwXrefStartObjNum = 0;
50 m_bOwnFileRead = TRUE;
51 m_FileVersion = 0;
52 m_bForceUseSecurityHandler = FALSE;
53 }
~CPDF_Parser()54 CPDF_Parser::~CPDF_Parser()
55 {
56 CloseParser(FALSE);
57 }
GetLastObjNum()58 FX_DWORD CPDF_Parser::GetLastObjNum()
59 {
60 FX_DWORD dwSize = m_CrossRef.GetSize();
61 return dwSize ? dwSize - 1 : 0;
62 }
SetEncryptDictionary(CPDF_Dictionary * pDict)63 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict)
64 {
65 m_pEncryptDict = pDict;
66 }
CloseParser(FX_BOOL bReParse)67 void CPDF_Parser::CloseParser(FX_BOOL bReParse)
68 {
69 m_bVersionUpdated = FALSE;
70 if (m_pDocument && !bReParse) {
71 delete m_pDocument;
72 m_pDocument = NULL;
73 }
74 if (m_pTrailer) {
75 m_pTrailer->Release();
76 m_pTrailer = NULL;
77 }
78 ReleaseEncryptHandler();
79 SetEncryptDictionary(NULL);
80 if (m_bOwnFileRead && m_Syntax.m_pFileAccess) {
81 m_Syntax.m_pFileAccess->Release();
82 m_Syntax.m_pFileAccess = NULL;
83 }
84 FX_POSITION pos = m_ObjectStreamMap.GetStartPosition();
85 while (pos) {
86 FX_LPVOID objnum;
87 CPDF_StreamAcc* pStream;
88 m_ObjectStreamMap.GetNextAssoc(pos, objnum, (void*&)pStream);
89 delete pStream;
90 }
91 m_ObjectStreamMap.RemoveAll();
92 m_SortedOffset.RemoveAll();
93 m_CrossRef.RemoveAll();
94 m_V5Type.RemoveAll();
95 m_ObjVersion.RemoveAll();
96 FX_INT32 iLen = m_Trailers.GetSize();
97 for (FX_INT32 i = 0; i < iLen; ++i) {
98 if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i))
99 trailer->Release();
100 }
101 m_Trailers.RemoveAll();
102 if (m_pLinearized) {
103 m_pLinearized->Release();
104 m_pLinearized = NULL;
105 }
106 }
GetHeaderOffset(IFX_FileRead * pFile)107 static FX_INT32 GetHeaderOffset(IFX_FileRead* pFile)
108 {
109 FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025);
110 FX_BYTE buf[4];
111 FX_INT32 offset = 0;
112 while (1) {
113 if (!pFile->ReadBlock(buf, offset, 4)) {
114 return -1;
115 }
116 if (*(FX_DWORD*)buf == tag) {
117 return offset;
118 }
119 offset ++;
120 if (offset > 1024) {
121 return -1;
122 }
123 }
124 return -1;
125 }
StartParse(FX_LPCSTR filename,FX_BOOL bReParse)126 FX_DWORD CPDF_Parser::StartParse(FX_LPCSTR filename, FX_BOOL bReParse)
127 {
128 IFX_FileRead* pFileAccess = FX_CreateFileRead(filename);
129 if (!pFileAccess) {
130 return PDFPARSE_ERROR_FILE;
131 }
132 return StartParse(pFileAccess, bReParse);
133 }
StartParse(FX_LPCWSTR filename,FX_BOOL bReParse)134 FX_DWORD CPDF_Parser::StartParse(FX_LPCWSTR filename, FX_BOOL bReParse)
135 {
136 IFX_FileRead* pFileAccess = FX_CreateFileRead(filename);
137 if (!pFileAccess) {
138 return PDFPARSE_ERROR_FILE;
139 }
140 return StartParse(pFileAccess, bReParse);
141 }
142 CPDF_SecurityHandler* FPDF_CreateStandardSecurityHandler();
143 CPDF_SecurityHandler* FPDF_CreatePubKeyHandler(void*);
StartParse(IFX_FileRead * pFileAccess,FX_BOOL bReParse,FX_BOOL bOwnFileRead)144 FX_DWORD CPDF_Parser::StartParse(IFX_FileRead* pFileAccess, FX_BOOL bReParse, FX_BOOL bOwnFileRead)
145 {
146 CloseParser(bReParse);
147 m_bXRefStream = FALSE;
148 m_LastXRefOffset = 0;
149 m_bOwnFileRead = bOwnFileRead;
150 FX_INT32 offset = GetHeaderOffset(pFileAccess);
151 if (offset == -1) {
152 if (bOwnFileRead && pFileAccess) {
153 pFileAccess->Release();
154 }
155 return PDFPARSE_ERROR_FORMAT;
156 }
157 m_Syntax.InitParser(pFileAccess, offset);
158 FX_BYTE ch;
159 if (!m_Syntax.GetCharAt(5, ch)) {
160 return PDFPARSE_ERROR_FORMAT;
161 }
162 if (ch >= '0' && ch <= '9') {
163 m_FileVersion = (ch - '0') * 10;
164 }
165 if (!m_Syntax.GetCharAt(7, ch)) {
166 return PDFPARSE_ERROR_FORMAT;
167 }
168 if (ch >= '0' && ch <= '9') {
169 m_FileVersion += ch - '0';
170 }
171 if (m_Syntax.m_FileLen < m_Syntax.m_HeaderOffset + 9) {
172 return PDFPARSE_ERROR_FORMAT;
173 }
174 m_Syntax.RestorePos(m_Syntax.m_FileLen - m_Syntax.m_HeaderOffset - 9);
175 if (!bReParse) {
176 m_pDocument = new CPDF_Document(this);
177 }
178 FX_BOOL bXRefRebuilt = FALSE;
179 if (m_Syntax.SearchWord(FX_BSTRC("startxref"), TRUE, FALSE, 4096)) {
180 FX_FILESIZE startxref_offset = m_Syntax.SavePos();
181 FX_LPVOID pResult = FXSYS_bsearch(&startxref_offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
182 if (pResult == NULL) {
183 m_SortedOffset.Add(startxref_offset);
184 }
185 m_Syntax.GetKeyword();
186 FX_BOOL bNumber;
187 CFX_ByteString xrefpos_str = m_Syntax.GetNextWord(bNumber);
188 if (!bNumber) {
189 return PDFPARSE_ERROR_FORMAT;
190 }
191 m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);
192 if (!LoadAllCrossRefV4(m_LastXRefOffset) && !LoadAllCrossRefV5(m_LastXRefOffset)) {
193 if (!RebuildCrossRef()) {
194 return PDFPARSE_ERROR_FORMAT;
195 }
196 bXRefRebuilt = TRUE;
197 m_LastXRefOffset = 0;
198 }
199 } else {
200 if (!RebuildCrossRef()) {
201 return PDFPARSE_ERROR_FORMAT;
202 }
203 bXRefRebuilt = TRUE;
204 }
205 FX_DWORD dwRet = SetEncryptHandler();
206 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
207 return dwRet;
208 }
209 m_pDocument->LoadDoc();
210 if (m_pDocument->GetRoot() == NULL || m_pDocument->GetPageCount() == 0) {
211 if (bXRefRebuilt) {
212 return PDFPARSE_ERROR_FORMAT;
213 }
214 ReleaseEncryptHandler();
215 if (!RebuildCrossRef()) {
216 return PDFPARSE_ERROR_FORMAT;
217 }
218 dwRet = SetEncryptHandler();
219 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
220 return dwRet;
221 }
222 m_pDocument->LoadDoc();
223 if (m_pDocument->GetRoot() == NULL) {
224 return PDFPARSE_ERROR_FORMAT;
225 }
226 }
227 FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
228 FX_DWORD RootObjNum = GetRootObjNum();
229 if (RootObjNum == 0) {
230 ReleaseEncryptHandler();
231 RebuildCrossRef();
232 RootObjNum = GetRootObjNum();
233 if (RootObjNum == 0) {
234 return PDFPARSE_ERROR_FORMAT;
235 }
236 dwRet = SetEncryptHandler();
237 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
238 return dwRet;
239 }
240 }
241 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
242 CPDF_Reference* pMetadata = (CPDF_Reference*)m_pDocument->GetRoot()->GetElement(FX_BSTRC("Metadata"));
243 if (pMetadata && pMetadata->GetType() == PDFOBJ_REFERENCE) {
244 m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum();
245 }
246 }
247 return PDFPARSE_ERROR_SUCCESS;
248 }
SetEncryptHandler()249 FX_DWORD CPDF_Parser::SetEncryptHandler()
250 {
251 ReleaseEncryptHandler();
252 SetEncryptDictionary(NULL);
253 if (m_pTrailer == NULL) {
254 return PDFPARSE_ERROR_FORMAT;
255 }
256 CPDF_Object* pEncryptObj = m_pTrailer->GetElement(FX_BSTRC("Encrypt"));
257 if (pEncryptObj) {
258 if (pEncryptObj->GetType() == PDFOBJ_DICTIONARY) {
259 SetEncryptDictionary((CPDF_Dictionary*)pEncryptObj);
260 } else if (pEncryptObj->GetType() == PDFOBJ_REFERENCE) {
261 pEncryptObj = m_pDocument->GetIndirectObject(((CPDF_Reference*)pEncryptObj)->GetRefObjNum());
262 if (pEncryptObj) {
263 SetEncryptDictionary(pEncryptObj->GetDict());
264 }
265 }
266 }
267 if (m_bForceUseSecurityHandler) {
268 FX_DWORD err = PDFPARSE_ERROR_HANDLER;
269 if (m_pSecurityHandler == NULL) {
270 return PDFPARSE_ERROR_HANDLER;
271 }
272 if (!m_pSecurityHandler->OnInit(this, m_pEncryptDict)) {
273 return err;
274 }
275 CPDF_CryptoHandler* pCryptoHandler = m_pSecurityHandler->CreateCryptoHandler();
276 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler)) {
277 delete pCryptoHandler;
278 pCryptoHandler = NULL;
279 return PDFPARSE_ERROR_HANDLER;
280 }
281 m_Syntax.SetEncrypt(pCryptoHandler);
282 } else if (m_pEncryptDict) {
283 CFX_ByteString filter = m_pEncryptDict->GetString(FX_BSTRC("Filter"));
284 CPDF_SecurityHandler* pSecurityHandler = NULL;
285 FX_DWORD err = PDFPARSE_ERROR_HANDLER;
286 if (filter == FX_BSTRC("Standard")) {
287 pSecurityHandler = FPDF_CreateStandardSecurityHandler();
288 err = PDFPARSE_ERROR_PASSWORD;
289 }
290 if (pSecurityHandler == NULL) {
291 return PDFPARSE_ERROR_HANDLER;
292 }
293 if (!pSecurityHandler->OnInit(this, m_pEncryptDict)) {
294 delete pSecurityHandler;
295 pSecurityHandler = NULL;
296 return err;
297 }
298 m_pSecurityHandler = pSecurityHandler;
299 CPDF_CryptoHandler* pCryptoHandler = pSecurityHandler->CreateCryptoHandler();
300 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler)) {
301 delete pCryptoHandler;
302 pCryptoHandler = NULL;
303 return PDFPARSE_ERROR_HANDLER;
304 }
305 m_Syntax.SetEncrypt(pCryptoHandler);
306 }
307 return PDFPARSE_ERROR_SUCCESS;
308 }
ReleaseEncryptHandler()309 void CPDF_Parser::ReleaseEncryptHandler()
310 {
311 if (m_Syntax.m_pCryptoHandler) {
312 delete m_Syntax.m_pCryptoHandler;
313 m_Syntax.m_pCryptoHandler = NULL;
314 }
315 if (m_pSecurityHandler && !m_bForceUseSecurityHandler) {
316 delete m_pSecurityHandler;
317 m_pSecurityHandler = NULL;
318 }
319 }
GetObjectOffset(FX_DWORD objnum)320 FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum)
321 {
322 if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
323 return 0;
324 }
325 if (m_V5Type[objnum] == 1) {
326 return m_CrossRef[objnum];
327 }
328 if (m_V5Type[objnum] == 2) {
329 return m_CrossRef[(FX_INT32)m_CrossRef[objnum]];
330 }
331 return 0;
332 }
GetDirectInteger(CPDF_Dictionary * pDict,FX_BSTR key)333 static FX_INT32 GetDirectInteger(CPDF_Dictionary* pDict, FX_BSTR key)
334 {
335 CPDF_Object* pObj = pDict->GetElement(key);
336 if (pObj == NULL) {
337 return 0;
338 }
339 if (pObj->GetType() == PDFOBJ_NUMBER) {
340 return ((CPDF_Number*)pObj)->GetInteger();
341 }
342 return 0;
343 }
CheckDirectType(CPDF_Dictionary * pDict,FX_BSTR key,FX_INT32 iType)344 static FX_BOOL CheckDirectType(CPDF_Dictionary* pDict, FX_BSTR key, FX_INT32 iType)
345 {
346 CPDF_Object* pObj = pDict->GetElement(key);
347 if (!pObj) {
348 return TRUE;
349 }
350 return pObj->GetType() == iType;
351 }
LoadAllCrossRefV4(FX_FILESIZE xrefpos)352 FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos)
353 {
354 if (!LoadCrossRefV4(xrefpos, 0, TRUE, FALSE)) {
355 return FALSE;
356 }
357 m_pTrailer = LoadTrailerV4();
358 if (m_pTrailer == NULL) {
359 return FALSE;
360 }
361 FX_INT32 xrefsize = GetDirectInteger(m_pTrailer, FX_BSTRC("Size"));
362 if (xrefsize <= 0 || xrefsize > (1 << 20)) {
363 return FALSE;
364 }
365 m_CrossRef.SetSize(xrefsize);
366 m_V5Type.SetSize(xrefsize);
367 CFX_FileSizeArray CrossRefList, XRefStreamList;
368 CrossRefList.Add(xrefpos);
369 XRefStreamList.Add(GetDirectInteger(m_pTrailer, FX_BSTRC("XRefStm")));
370 if (!CheckDirectType(m_pTrailer, FX_BSTRC("Prev"), PDFOBJ_NUMBER)) {
371 return FALSE;
372 }
373 FX_FILESIZE newxrefpos = GetDirectInteger(m_pTrailer, FX_BSTRC("Prev"));
374 if (newxrefpos == xrefpos) {
375 return FALSE;
376 }
377 xrefpos = newxrefpos;
378 while (xrefpos) {
379 CrossRefList.InsertAt(0, xrefpos);
380 LoadCrossRefV4(xrefpos, 0, TRUE, FALSE);
381 CPDF_Dictionary* pDict = LoadTrailerV4();
382 if (pDict == NULL) {
383 return FALSE;
384 }
385 if (!CheckDirectType(pDict, FX_BSTRC("Prev"), PDFOBJ_NUMBER)) {
386 pDict->Release();
387 return FALSE;
388 }
389 newxrefpos = GetDirectInteger(pDict, FX_BSTRC("Prev"));
390 if (newxrefpos == xrefpos) {
391 pDict->Release();
392 return FALSE;
393 }
394 xrefpos = newxrefpos;
395 XRefStreamList.InsertAt(0, pDict->GetInteger(FX_BSTRC("XRefStm")));
396 m_Trailers.Add(pDict);
397 }
398 for (FX_INT32 i = 0; i < CrossRefList.GetSize(); i ++)
399 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE, i == 0)) {
400 return FALSE;
401 }
402 return TRUE;
403 }
LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,FX_DWORD dwObjCount)404 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos, FX_DWORD dwObjCount)
405 {
406 if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) {
407 return FALSE;
408 }
409 m_pTrailer = LoadTrailerV4();
410 if (m_pTrailer == NULL) {
411 return FALSE;
412 }
413 FX_INT32 xrefsize = GetDirectInteger(m_pTrailer, FX_BSTRC("Size"));
414 if (xrefsize == 0) {
415 return FALSE;
416 }
417 CFX_FileSizeArray CrossRefList, XRefStreamList;
418 CrossRefList.Add(xrefpos);
419 XRefStreamList.Add(GetDirectInteger(m_pTrailer, FX_BSTRC("XRefStm")));
420 xrefpos = GetDirectInteger(m_pTrailer, FX_BSTRC("Prev"));
421 while (xrefpos) {
422 CrossRefList.InsertAt(0, xrefpos);
423 LoadCrossRefV4(xrefpos, 0, TRUE, FALSE);
424 CPDF_Dictionary* pDict = LoadTrailerV4();
425 if (pDict == NULL) {
426 return FALSE;
427 }
428 xrefpos = GetDirectInteger(pDict, FX_BSTRC("Prev"));
429 XRefStreamList.InsertAt(0, pDict->GetInteger(FX_BSTRC("XRefStm")));
430 m_Trailers.Add(pDict);
431 }
432 for (FX_INT32 i = 1; i < CrossRefList.GetSize(); i ++)
433 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE, i == 0)) {
434 return FALSE;
435 }
436 return TRUE;
437 }
LoadLinearizedCrossRefV4(FX_FILESIZE pos,FX_DWORD dwObjCount)438 FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount)
439 {
440 FX_FILESIZE dwStartPos = pos - m_Syntax.m_HeaderOffset;
441 m_Syntax.RestorePos(dwStartPos);
442 FX_LPVOID pResult = FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
443 if (pResult == NULL) {
444 m_SortedOffset.Add(pos);
445 }
446 FX_DWORD start_objnum = 0;
447 FX_DWORD count = dwObjCount;
448 FX_FILESIZE SavedPos = m_Syntax.SavePos();
449 FX_INT32 recordsize = 20;
450 char* pBuf = FX_Alloc(char, 1024 * recordsize + 1);
451 pBuf[1024 * recordsize] = '\0';
452 FX_INT32 nBlocks = count / 1024 + 1;
453 for (FX_INT32 block = 0; block < nBlocks; block ++) {
454 FX_INT32 block_size = block == nBlocks - 1 ? count % 1024 : 1024;
455 FX_DWORD dwReadSize = block_size * recordsize;
456 if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_Syntax.m_FileLen) {
457 FX_Free(pBuf);
458 return FALSE;
459 }
460 if (!m_Syntax.ReadBlock((FX_LPBYTE)pBuf, dwReadSize)) {
461 FX_Free(pBuf);
462 return FALSE;
463 }
464 for (FX_INT32 i = 0; i < block_size; i ++) {
465 FX_DWORD objnum = start_objnum + block * 1024 + i;
466 char* pEntry = pBuf + i * recordsize;
467 if (pEntry[17] == 'f') {
468 m_CrossRef.SetAtGrow(objnum, 0);
469 m_V5Type.SetAtGrow(objnum, 0);
470 } else {
471 FX_INT32 offset = FXSYS_atoi(pEntry);
472 if (offset == 0) {
473 for (FX_INT32 c = 0; c < 10; c ++) {
474 if (pEntry[c] < '0' || pEntry[c] > '9') {
475 FX_Free(pBuf);
476 return FALSE;
477 }
478 }
479 }
480 m_CrossRef.SetAtGrow(objnum, offset);
481 FX_INT32 version = FXSYS_atoi(pEntry + 11);
482 if (version >= 1) {
483 m_bVersionUpdated = TRUE;
484 }
485 m_ObjVersion.SetAtGrow(objnum, version);
486 if (m_CrossRef[objnum] < m_Syntax.m_FileLen) {
487 FX_LPVOID pResult = FXSYS_bsearch(&m_CrossRef[objnum], m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
488 if (pResult == NULL) {
489 m_SortedOffset.Add(m_CrossRef[objnum]);
490 }
491 }
492 m_V5Type.SetAtGrow(objnum, 1);
493 }
494 }
495 }
496 FX_Free(pBuf);
497 m_Syntax.RestorePos(SavedPos + count * recordsize);
498 return TRUE;
499 }
LoadCrossRefV4(FX_FILESIZE pos,FX_FILESIZE streampos,FX_BOOL bSkip,FX_BOOL bFirst)500 FX_BOOL CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip, FX_BOOL bFirst)
501 {
502 m_Syntax.RestorePos(pos);
503 if (m_Syntax.GetKeyword() != FX_BSTRC("xref")) {
504 return FALSE;
505 }
506 FX_LPVOID pResult = FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
507 if (pResult == NULL) {
508 m_SortedOffset.Add(pos);
509 }
510 if (streampos) {
511 FX_LPVOID pResult = FXSYS_bsearch(&streampos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
512 if (pResult == NULL) {
513 m_SortedOffset.Add(streampos);
514 }
515 }
516 while (1) {
517 FX_FILESIZE SavedPos = m_Syntax.SavePos();
518 FX_BOOL bIsNumber;
519 CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
520 if (word.IsEmpty()) {
521 return FALSE;
522 }
523 if (!bIsNumber) {
524 m_Syntax.RestorePos(SavedPos);
525 break;
526 }
527 FX_DWORD start_objnum = FXSYS_atoi(word);
528 if (start_objnum >= (1 << 20)) {
529 return FALSE;
530 }
531 FX_DWORD count = m_Syntax.GetDirectNum();
532 m_Syntax.ToNextWord();
533 SavedPos = m_Syntax.SavePos();
534 FX_BOOL bFirstItem = FALSE;
535 FX_INT32 recordsize = 20;
536 if (bFirst) {
537 bFirstItem = TRUE;
538 }
539 m_dwXrefStartObjNum = start_objnum;
540 if (!bSkip) {
541 char* pBuf = FX_Alloc(char, 1024 * recordsize + 1);
542 pBuf[1024 * recordsize] = '\0';
543 FX_INT32 nBlocks = count / 1024 + 1;
544 FX_BOOL bFirstBlock = TRUE;
545 for (FX_INT32 block = 0; block < nBlocks; block ++) {
546 FX_INT32 block_size = block == nBlocks - 1 ? count % 1024 : 1024;
547 m_Syntax.ReadBlock((FX_LPBYTE)pBuf, block_size * recordsize);
548 for (FX_INT32 i = 0; i < block_size; i ++) {
549 FX_DWORD objnum = start_objnum + block * 1024 + i;
550 char* pEntry = pBuf + i * recordsize;
551 if (pEntry[17] == 'f') {
552 if (bFirstItem) {
553 objnum = 0;
554 bFirstItem = FALSE;
555 }
556 if (bFirstBlock) {
557 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
558 FX_INT32 version = FXSYS_atoi(pEntry + 11);
559 if (offset == 0 && version == 65535 && start_objnum != 0) {
560 start_objnum--;
561 objnum = 0;
562 }
563 }
564 m_CrossRef.SetAtGrow(objnum, 0);
565 m_V5Type.SetAtGrow(objnum, 0);
566 } else {
567 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
568 if (offset == 0) {
569 for (FX_INT32 c = 0; c < 10; c ++) {
570 if (pEntry[c] < '0' || pEntry[c] > '9') {
571 FX_Free(pBuf);
572 return FALSE;
573 }
574 }
575 }
576 m_CrossRef.SetAtGrow(objnum, offset);
577 FX_INT32 version = FXSYS_atoi(pEntry + 11);
578 if (version >= 1) {
579 m_bVersionUpdated = TRUE;
580 }
581 m_ObjVersion.SetAtGrow(objnum, version);
582 if (m_CrossRef[objnum] < m_Syntax.m_FileLen) {
583 FX_LPVOID pResult = FXSYS_bsearch(&m_CrossRef[objnum], m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
584 if (pResult == NULL) {
585 m_SortedOffset.Add(m_CrossRef[objnum]);
586 }
587 }
588 m_V5Type.SetAtGrow(objnum, 1);
589 }
590 if (bFirstBlock) {
591 bFirstBlock = FALSE;
592 }
593 }
594 }
595 FX_Free(pBuf);
596 }
597 m_Syntax.RestorePos(SavedPos + count * recordsize);
598 }
599 if (streampos)
600 if (!LoadCrossRefV5(streampos, streampos, FALSE)) {
601 return FALSE;
602 }
603 return TRUE;
604 }
LoadAllCrossRefV5(FX_FILESIZE xrefpos)605 FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos)
606 {
607 if (!LoadCrossRefV5(xrefpos, xrefpos, TRUE)) {
608 return FALSE;
609 }
610 while (xrefpos)
611 if (!LoadCrossRefV5(xrefpos, xrefpos, FALSE)) {
612 return FALSE;
613 }
614 m_ObjectStreamMap.InitHashTable(101, FALSE);
615 m_bXRefStream = TRUE;
616 return TRUE;
617 }
RebuildCrossRef()618 FX_BOOL CPDF_Parser::RebuildCrossRef()
619 {
620 m_CrossRef.RemoveAll();
621 m_V5Type.RemoveAll();
622 m_SortedOffset.RemoveAll();
623 m_ObjVersion.RemoveAll();
624 if (m_pTrailer) {
625 m_pTrailer->Release();
626 m_pTrailer = NULL;
627 }
628 FX_INT32 status = 0;
629 FX_INT32 inside_index = 0;
630 FX_DWORD objnum = 0, gennum = 0;
631 FX_INT32 depth = 0;
632 FX_LPBYTE buffer = FX_Alloc(FX_BYTE, 4096);
633 FX_FILESIZE pos = m_Syntax.m_HeaderOffset;
634 FX_FILESIZE start_pos = 0, start_pos1 = 0;
635 FX_FILESIZE last_obj = -1, last_xref = -1, last_trailer = -1;
636 while (pos < m_Syntax.m_FileLen) {
637 FX_BOOL bOverFlow = FALSE;
638 FX_DWORD size = (FX_DWORD)(m_Syntax.m_FileLen - pos);
639 if (size > 4096) {
640 size = 4096;
641 }
642 if (!m_Syntax.m_pFileAccess->ReadBlock(buffer, pos, size)) {
643 break;
644 }
645 for (FX_DWORD i = 0; i < size; i ++) {
646 FX_BYTE byte = buffer[i];
647 switch (status) {
648 case 0:
649 if (PDF_CharType[byte] == 'W') {
650 status = 1;
651 }
652 if (byte <= '9' && byte >= '0') {
653 --i;
654 status = 1;
655 }
656 if (byte == '%') {
657 inside_index = 0;
658 status = 9;
659 }
660 if (byte == '(') {
661 status = 10;
662 depth = 1;
663 }
664 if (byte == '<') {
665 inside_index = 1;
666 status = 11;
667 }
668 if (byte == '\\') {
669 status = 13;
670 }
671 if (byte == 't') {
672 status = 7;
673 inside_index = 1;
674 }
675 break;
676 case 1:
677 if (PDF_CharType[byte] == 'W') {
678 break;
679 } else if (byte <= '9' && byte >= '0') {
680 start_pos = pos + i;
681 status = 2;
682 objnum = byte - '0';
683 } else if (byte == 't') {
684 status = 7;
685 inside_index = 1;
686 } else if (byte == 'x') {
687 status = 8;
688 inside_index = 1;
689 } else {
690 --i;
691 status = 0;
692 }
693 break;
694 case 2:
695 if (byte <= '9' && byte >= '0') {
696 objnum = objnum * 10 + byte - '0';
697 break;
698 } else if (PDF_CharType[byte] == 'W') {
699 status = 3;
700 } else {
701 --i;
702 status = 14;
703 inside_index = 0;
704 }
705 break;
706 case 3:
707 if (byte <= '9' && byte >= '0') {
708 start_pos1 = pos + i;
709 status = 4;
710 gennum = byte - '0';
711 } else if (PDF_CharType[byte] == 'W') {
712 break;
713 } else if (byte == 't') {
714 status = 7;
715 inside_index = 1;
716 } else {
717 --i;
718 status = 0;
719 }
720 break;
721 case 4:
722 if (byte <= '9' && byte >= '0') {
723 gennum = gennum * 10 + byte - '0';
724 break;
725 } else if (PDF_CharType[byte] == 'W') {
726 status = 5;
727 } else {
728 --i;
729 status = 0;
730 }
731 break;
732 case 5:
733 if (byte == 'o') {
734 status = 6;
735 inside_index = 1;
736 } else if (PDF_CharType[byte] == 'W') {
737 break;
738 } else if (byte <= '9' && byte >= '0') {
739 objnum = gennum;
740 gennum = byte - '0';
741 start_pos = start_pos1;
742 start_pos1 = pos + i;
743 status = 4;
744 } else if (byte == 't') {
745 status = 7;
746 inside_index = 1;
747 } else {
748 --i;
749 status = 0;
750 }
751 break;
752 case 6:
753 switch (inside_index) {
754 case 1:
755 if (byte != 'b') {
756 --i;
757 status = 0;
758 } else {
759 inside_index ++;
760 }
761 break;
762 case 2:
763 if (byte != 'j') {
764 --i;
765 status = 0;
766 } else {
767 inside_index ++;
768 }
769 break;
770 case 3:
771 if (PDF_CharType[byte] == 'W' || PDF_CharType[byte] == 'D') {
772 if (objnum > 0x1000000) {
773 status = 0;
774 break;
775 }
776 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset;
777 last_obj = start_pos;
778 FX_LPVOID pResult = FXSYS_bsearch(&obj_pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
779 if (pResult == NULL) {
780 m_SortedOffset.Add(obj_pos);
781 }
782 FX_FILESIZE obj_end = 0;
783 CPDF_Object *pObject = ParseIndirectObjectAtByStrict(m_pDocument, obj_pos, objnum, NULL, &obj_end);
784 if (pObject) {
785 int iType = pObject->GetType();
786 if (iType == PDFOBJ_STREAM) {
787 CPDF_Stream* pStream = (CPDF_Stream*)pObject;
788 CPDF_Dictionary* pDict = pStream->GetDict();
789 if (pDict) {
790 if (pDict->KeyExist(FX_BSTRC("Type"))) {
791 CFX_ByteString bsValue = pDict->GetString(FX_BSTRC("Type"));
792 if (bsValue == FX_BSTRC("XRef") && pDict->KeyExist(FX_BSTRC("Size"))) {
793 CPDF_Object* pRoot = pDict->GetElement(FX_BSTRC("Root"));
794 if (pRoot && pRoot->GetDict() && pRoot->GetDict()->GetElement(FX_BSTRC("Pages"))) {
795 if (m_pTrailer) {
796 m_pTrailer->Release();
797 }
798 m_pTrailer = (CPDF_Dictionary*)pDict->Clone();
799 }
800 }
801 }
802 }
803 }
804 }
805 FX_FILESIZE offset = 0;
806 m_Syntax.RestorePos(obj_pos);
807 offset = m_Syntax.FindTag(FX_BSTRC("obj"), 0);
808 if (offset == -1) {
809 offset = 0;
810 } else {
811 offset += 3;
812 }
813 FX_FILESIZE nLen = obj_end - obj_pos - offset;
814 if ((FX_DWORD)nLen > size - i) {
815 pos = obj_end + m_Syntax.m_HeaderOffset;
816 bOverFlow = TRUE;
817 } else {
818 i += (FX_DWORD)nLen;
819 }
820 if (m_CrossRef.GetSize() > (FX_INT32)objnum && m_CrossRef[objnum]) {
821 if (pObject) {
822 FX_DWORD oldgen = m_ObjVersion.GetAt(objnum);
823 m_CrossRef[objnum] = obj_pos;
824 m_ObjVersion.SetAt(objnum, (FX_SHORT)gennum);
825 if (oldgen != gennum) {
826 m_bVersionUpdated = TRUE;
827 }
828 }
829 } else {
830 m_CrossRef.SetAtGrow(objnum, obj_pos);
831 m_V5Type.SetAtGrow(objnum, 1);
832 m_ObjVersion.SetAtGrow(objnum, (FX_SHORT)gennum);
833 }
834 if (pObject) {
835 pObject->Release();
836 }
837 }
838 --i;
839 status = 0;
840 break;
841 }
842 break;
843 case 7:
844 if (inside_index == 7) {
845 if (PDF_CharType[byte] == 'W' || PDF_CharType[byte] == 'D') {
846 last_trailer = pos + i - 7;
847 m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset);
848 CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, 0);
849 if (pObj) {
850 if (pObj->GetType() != PDFOBJ_DICTIONARY && pObj->GetType() != PDFOBJ_STREAM) {
851 pObj->Release();
852 } else {
853 CPDF_Dictionary* pTrailer = NULL;
854 if (pObj->GetType() == PDFOBJ_STREAM) {
855 pTrailer = ((CPDF_Stream*)pObj)->GetDict();
856 } else {
857 pTrailer = (CPDF_Dictionary*)pObj;
858 }
859 if (pTrailer) {
860 if (m_pTrailer) {
861 CPDF_Object* pRoot = pTrailer->GetElement(FX_BSTRC("Root"));
862 if (pRoot == NULL || (pRoot->GetType() == PDFOBJ_REFERENCE &&
863 (FX_DWORD)m_CrossRef.GetSize() > ((CPDF_Reference*)pRoot)->GetRefObjNum() &&
864 m_CrossRef.GetAt(((CPDF_Reference*)pRoot)->GetRefObjNum()) != 0)) {
865 FX_POSITION pos = pTrailer->GetStartPos();
866 while (pos) {
867 CFX_ByteString key;
868 CPDF_Object* pObj = pTrailer->GetNextElement(pos, key);
869 m_pTrailer->SetAt(key, pObj->Clone(), m_pDocument);
870 }
871 pObj->Release();
872 } else {
873 pObj->Release();
874 }
875 } else {
876 if (pObj->GetType() == PDFOBJ_STREAM) {
877 m_pTrailer = (CPDF_Dictionary*)pTrailer->Clone();
878 pObj->Release();
879 } else {
880 m_pTrailer = pTrailer;
881 }
882 FX_FILESIZE dwSavePos = m_Syntax.SavePos();
883 CFX_ByteString strWord = m_Syntax.GetKeyword();
884 if (!strWord.Compare(FX_BSTRC("startxref"))) {
885 FX_BOOL bNumber = FALSE;
886 CFX_ByteString bsOffset = m_Syntax.GetNextWord(bNumber);
887 if (bNumber) {
888 m_LastXRefOffset = FXSYS_atoi(bsOffset);
889 }
890 }
891 m_Syntax.RestorePos(dwSavePos);
892 }
893 } else {
894 pObj->Release();
895 }
896 }
897 }
898 }
899 --i;
900 status = 0;
901 } else if (byte == "trailer"[inside_index]) {
902 inside_index ++;
903 } else {
904 --i;
905 status = 0;
906 }
907 break;
908 case 8:
909 if (inside_index == 4) {
910 last_xref = pos + i - 4;
911 status = 1;
912 } else if (byte == "xref"[inside_index]) {
913 inside_index ++;
914 } else {
915 --i;
916 status = 0;
917 }
918 break;
919 case 9:
920 if (byte == '\r' || byte == '\n') {
921 status = 0;
922 }
923 break;
924 case 10:
925 if (byte == ')') {
926 if (depth > 0) {
927 depth--;
928 }
929 } else if (byte == '(') {
930 depth++;
931 }
932 if (!depth) {
933 status = 0;
934 }
935 break;
936 case 11:
937 if (byte == '<' && inside_index == 1) {
938 status = 12;
939 } else if (byte == '>') {
940 status = 0;
941 }
942 inside_index = 0;
943 break;
944 case 12:
945 --i;
946 status = 0;
947 break;
948 case 13:
949 if (PDF_CharType[byte] == 'D' || PDF_CharType[byte] == 'W') {
950 --i;
951 status = 0;
952 }
953 break;
954 case 14:
955 if (PDF_CharType[byte] == 'W') {
956 status = 0;
957 } else if (byte == '%' || byte == '(' || byte == '<' || byte == '\\') {
958 status = 0;
959 --i;
960 } else if (inside_index == 6) {
961 status = 0;
962 --i;
963 } else if (byte == "endobj"[inside_index]) {
964 inside_index++;
965 }
966 break;
967 }
968 if (bOverFlow) {
969 size = 0;
970 break;
971 }
972 }
973 pos += size;
974 }
975 if (last_xref != -1 && last_xref > last_obj) {
976 last_trailer = last_xref;
977 } else if (last_trailer == -1 || last_xref < last_obj) {
978 last_trailer = m_Syntax.m_FileLen;
979 }
980 FX_FILESIZE offset = last_trailer - m_Syntax.m_HeaderOffset;
981 FX_LPVOID pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
982 if (pResult == NULL) {
983 m_SortedOffset.Add(offset);
984 }
985 FX_Free(buffer);
986 return TRUE;
987 }
_GetVarInt(FX_LPCBYTE p,FX_INT32 n)988 static FX_DWORD _GetVarInt(FX_LPCBYTE p, FX_INT32 n)
989 {
990 FX_DWORD result = 0;
991 for (FX_INT32 i = 0; i < n; i ++) {
992 result = result * 256 + p[i];
993 }
994 return result;
995 }
LoadCrossRefV5(FX_FILESIZE pos,FX_FILESIZE & prev,FX_BOOL bMainXRef)996 FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE pos, FX_FILESIZE& prev, FX_BOOL bMainXRef)
997 {
998 CPDF_Stream* pStream = (CPDF_Stream*)ParseIndirectObjectAt(m_pDocument, pos, 0, NULL);
999 if (!pStream) {
1000 return FALSE;
1001 }
1002 if (m_pDocument) {
1003 CPDF_Dictionary * pDict = m_pDocument->GetRoot();
1004 if (!pDict || pDict->GetObjNum() != pStream->m_ObjNum) {
1005 m_pDocument->InsertIndirectObject(pStream->m_ObjNum, pStream);
1006 } else {
1007 if (pStream->GetType() == PDFOBJ_STREAM) {
1008 pStream->Release();
1009 }
1010 return FALSE;
1011 }
1012 }
1013 if (pStream->GetType() != PDFOBJ_STREAM) {
1014 return FALSE;
1015 }
1016 prev = pStream->GetDict()->GetInteger(FX_BSTRC("Prev"));
1017 FX_INT32 size = pStream->GetDict()->GetInteger(FX_BSTRC("Size"));
1018 if (size < 0) {
1019 pStream->Release();
1020 return FALSE;
1021 }
1022 if (bMainXRef) {
1023 m_pTrailer = (CPDF_Dictionary*)pStream->GetDict()->Clone();
1024 m_CrossRef.SetSize(size);
1025 if (m_V5Type.SetSize(size)) {
1026 FXSYS_memset32(m_V5Type.GetData(), 0, size);
1027 }
1028 } else {
1029 m_Trailers.Add((CPDF_Dictionary*)pStream->GetDict()->Clone());
1030 }
1031 std::vector<std::pair<FX_INT32, FX_INT32> > arrIndex;
1032 CPDF_Array* pArray = pStream->GetDict()->GetArray(FX_BSTRC("Index"));
1033 if (pArray) {
1034 FX_DWORD nPairSize = pArray->GetCount() / 2;
1035 for (FX_DWORD i = 0; i < nPairSize; i++) {
1036 CPDF_Object* pStartNumObj = pArray->GetElement(i * 2);
1037 CPDF_Object* pCountObj = pArray->GetElement(i * 2 + 1);
1038 if (pStartNumObj && pStartNumObj->GetType() == PDFOBJ_NUMBER
1039 && pCountObj && pCountObj->GetType() == PDFOBJ_NUMBER) {
1040 int nStartNum = pStartNumObj->GetInteger();
1041 int nCount = pCountObj->GetInteger();
1042 if (nStartNum >= 0 && nCount > 0) {
1043 arrIndex.push_back(std::make_pair(nStartNum, nCount));
1044 }
1045 }
1046 }
1047 }
1048 if (arrIndex.size() == 0) {
1049 arrIndex.push_back(std::make_pair(0, size));
1050 }
1051 pArray = pStream->GetDict()->GetArray(FX_BSTRC("W"));
1052 if (pArray == NULL) {
1053 pStream->Release();
1054 return FALSE;
1055 }
1056 CFX_DWordArray WidthArray;
1057 FX_SAFE_DWORD dwAccWidth = 0;
1058 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
1059 WidthArray.Add(pArray->GetInteger(i));
1060 dwAccWidth += WidthArray[i];
1061 }
1062 if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) {
1063 pStream->Release();
1064 return FALSE;
1065 }
1066 FX_DWORD totalWidth = dwAccWidth.ValueOrDie();
1067 CPDF_StreamAcc acc;
1068 acc.LoadAllData(pStream);
1069 FX_LPCBYTE pData = acc.GetData();
1070 FX_DWORD dwTotalSize = acc.GetSize();
1071 FX_DWORD segindex = 0;
1072 for (FX_DWORD i = 0; i < arrIndex.size(); i ++) {
1073 FX_INT32 startnum = arrIndex[i].first;
1074 if (startnum < 0) {
1075 continue;
1076 }
1077 m_dwXrefStartObjNum = pdfium::base::checked_cast<FX_DWORD, FX_INT32> (startnum);
1078 FX_DWORD count = pdfium::base::checked_cast<FX_DWORD, FX_INT32> (arrIndex[i].second);
1079 FX_SAFE_DWORD dwCaculatedSize = segindex;
1080 dwCaculatedSize += count;
1081 dwCaculatedSize *= totalWidth;
1082 if (!dwCaculatedSize.IsValid() || dwCaculatedSize.ValueOrDie() > dwTotalSize) {
1083 continue;
1084 }
1085 FX_LPCBYTE segstart = pData + segindex * totalWidth;
1086 FX_SAFE_DWORD dwMaxObjNum = startnum;
1087 dwMaxObjNum += count;
1088 FX_DWORD dwV5Size = pdfium::base::checked_cast<FX_DWORD, FX_INT32> (m_V5Type.GetSize());
1089 if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) {
1090 continue;
1091 }
1092 for (FX_DWORD j = 0; j < count; j ++) {
1093 FX_INT32 type = 1;
1094 FX_LPCBYTE entrystart = segstart + j * totalWidth;
1095 if (WidthArray[0]) {
1096 type = _GetVarInt(entrystart, WidthArray[0]);
1097 }
1098 if (m_V5Type[startnum + j] == 255) {
1099 FX_FILESIZE offset = _GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1100 m_CrossRef[startnum + j] = offset;
1101 FX_LPVOID pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1102 if (pResult == NULL) {
1103 m_SortedOffset.Add(offset);
1104 }
1105 continue;
1106 }
1107 if (m_V5Type[startnum + j]) {
1108 continue;
1109 }
1110 m_V5Type[startnum + j] = type;
1111 if (type == 0) {
1112 m_CrossRef[startnum + j] = 0;
1113 } else {
1114 FX_FILESIZE offset = _GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
1115 m_CrossRef[startnum + j] = offset;
1116 if (type == 1) {
1117 FX_LPVOID pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1118 if (pResult == NULL) {
1119 m_SortedOffset.Add(offset);
1120 }
1121 } else {
1122 if (offset < 0 || offset >= m_V5Type.GetSize()) {
1123 pStream->Release();
1124 return FALSE;
1125 }
1126 m_V5Type[offset] = 255;
1127 }
1128 }
1129 }
1130 segindex += count;
1131 }
1132 pStream->Release();
1133 return TRUE;
1134 }
GetIDArray()1135 CPDF_Array* CPDF_Parser::GetIDArray()
1136 {
1137 CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetElement(FX_BSTRC("ID")) : NULL;
1138 if (pID == NULL) {
1139 return NULL;
1140 }
1141 if (pID->GetType() == PDFOBJ_REFERENCE) {
1142 pID = ParseIndirectObject(NULL, ((CPDF_Reference*)pID)->GetRefObjNum());
1143 m_pTrailer->SetAt(FX_BSTRC("ID"), pID);
1144 }
1145 if (pID == NULL || pID->GetType() != PDFOBJ_ARRAY) {
1146 return NULL;
1147 }
1148 return (CPDF_Array*)pID;
1149 }
GetRootObjNum()1150 FX_DWORD CPDF_Parser::GetRootObjNum()
1151 {
1152 CPDF_Object* pRef = m_pTrailer ? m_pTrailer->GetElement(FX_BSTRC("Root")) : NULL;
1153 if (pRef == NULL || pRef->GetType() != PDFOBJ_REFERENCE) {
1154 return 0;
1155 }
1156 return ((CPDF_Reference*) pRef)->GetRefObjNum();
1157 }
GetInfoObjNum()1158 FX_DWORD CPDF_Parser::GetInfoObjNum()
1159 {
1160 CPDF_Object* pRef = m_pTrailer ? m_pTrailer->GetElement(FX_BSTRC("Info")) : NULL;
1161 if (pRef == NULL || pRef->GetType() != PDFOBJ_REFERENCE) {
1162 return 0;
1163 }
1164 return ((CPDF_Reference*) pRef)->GetRefObjNum();
1165 }
IsFormStream(FX_DWORD objnum,FX_BOOL & bForm)1166 FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm)
1167 {
1168 bForm = FALSE;
1169 if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
1170 return TRUE;
1171 }
1172 if (m_V5Type[objnum] == 0) {
1173 return TRUE;
1174 }
1175 if (m_V5Type[objnum] == 2) {
1176 return TRUE;
1177 }
1178 FX_FILESIZE pos = m_CrossRef[objnum];
1179 FX_LPVOID pResult = FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1180 if (pResult == NULL) {
1181 return TRUE;
1182 }
1183 if ((FX_FILESIZE*)pResult - (FX_FILESIZE*)m_SortedOffset.GetData() == m_SortedOffset.GetSize() - 1) {
1184 return FALSE;
1185 }
1186 FX_FILESIZE size = ((FX_FILESIZE*)pResult)[1] - pos;
1187 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1188 m_Syntax.RestorePos(pos);
1189 bForm = m_Syntax.SearchMultiWord(FX_BSTRC("/Form\0stream"), TRUE, size) == 0;
1190 m_Syntax.RestorePos(SavedPos);
1191 return TRUE;
1192 }
ParseIndirectObject(CPDF_IndirectObjects * pObjList,FX_DWORD objnum,PARSE_CONTEXT * pContext)1193 CPDF_Object* CPDF_Parser::ParseIndirectObject(CPDF_IndirectObjects* pObjList, FX_DWORD objnum, PARSE_CONTEXT* pContext)
1194 {
1195 if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
1196 return NULL;
1197 }
1198 if (m_V5Type[objnum] == 1 || m_V5Type[objnum] == 255) {
1199 FX_FILESIZE pos = m_CrossRef[objnum];
1200 if (pos <= 0) {
1201 return NULL;
1202 }
1203 return ParseIndirectObjectAt(pObjList, pos, objnum, pContext);
1204 }
1205 if (m_V5Type[objnum] == 2) {
1206 CPDF_StreamAcc* pObjStream = GetObjectStream((FX_DWORD)m_CrossRef[objnum]);
1207 if (pObjStream == NULL) {
1208 return NULL;
1209 }
1210 FX_INT32 n = pObjStream->GetDict()->GetInteger(FX_BSTRC("N"));
1211 FX_INT32 offset = pObjStream->GetDict()->GetInteger(FX_BSTRC("First"));
1212 CPDF_SyntaxParser syntax;
1213 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream((FX_LPBYTE)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE));
1214 syntax.InitParser(file.Get(), 0);
1215 CPDF_Object* pRet = NULL;
1216 while (n) {
1217 FX_DWORD thisnum = syntax.GetDirectNum();
1218 FX_DWORD thisoff = syntax.GetDirectNum();
1219 if (thisnum == objnum) {
1220 syntax.RestorePos(offset + thisoff);
1221 pRet = syntax.GetObject(pObjList, 0, 0, pContext);
1222 break;
1223 }
1224 n --;
1225 }
1226 return pRet;
1227 }
1228 return NULL;
1229 }
GetObjectStream(FX_DWORD objnum)1230 CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum)
1231 {
1232 CPDF_StreamAcc* pStreamAcc = NULL;
1233 if (m_ObjectStreamMap.Lookup((void*)(FX_UINTPTR)objnum, (void*&)pStreamAcc)) {
1234 return pStreamAcc;
1235 }
1236 const CPDF_Stream* pStream = m_pDocument ? (CPDF_Stream*)m_pDocument->GetIndirectObject(objnum) : NULL;
1237 if (pStream == NULL || pStream->GetType() != PDFOBJ_STREAM) {
1238 return NULL;
1239 }
1240 pStreamAcc = new CPDF_StreamAcc;
1241 pStreamAcc->LoadAllData(pStream);
1242 m_ObjectStreamMap.SetAt((void*)(FX_UINTPTR)objnum, pStreamAcc);
1243 return pStreamAcc;
1244 }
GetObjectSize(FX_DWORD objnum)1245 FX_FILESIZE CPDF_Parser::GetObjectSize(FX_DWORD objnum)
1246 {
1247 if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
1248 return 0;
1249 }
1250 if (m_V5Type[objnum] == 2) {
1251 objnum = (FX_DWORD)m_CrossRef[objnum];
1252 }
1253 if (m_V5Type[objnum] == 1 || m_V5Type[objnum] == 255) {
1254 FX_FILESIZE offset = m_CrossRef[objnum];
1255 if (offset == 0) {
1256 return 0;
1257 }
1258 FX_LPVOID pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1259 if (pResult == NULL) {
1260 return 0;
1261 }
1262 if ((FX_FILESIZE*)pResult - (FX_FILESIZE*)m_SortedOffset.GetData() == m_SortedOffset.GetSize() - 1) {
1263 return 0;
1264 }
1265 return ((FX_FILESIZE*)pResult)[1] - offset;
1266 }
1267 return 0;
1268 }
GetIndirectBinary(FX_DWORD objnum,FX_LPBYTE & pBuffer,FX_DWORD & size)1269 void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum, FX_LPBYTE& pBuffer, FX_DWORD& size)
1270 {
1271 pBuffer = NULL;
1272 size = 0;
1273 if (objnum >= (FX_DWORD)m_CrossRef.GetSize()) {
1274 return;
1275 }
1276 if (m_V5Type[objnum] == 2) {
1277 CPDF_StreamAcc* pObjStream = GetObjectStream((FX_DWORD)m_CrossRef[objnum]);
1278 if (pObjStream == NULL) {
1279 return;
1280 }
1281 FX_INT32 n = pObjStream->GetDict()->GetInteger(FX_BSTRC("N"));
1282 FX_INT32 offset = pObjStream->GetDict()->GetInteger(FX_BSTRC("First"));
1283 CPDF_SyntaxParser syntax;
1284 FX_LPCBYTE pData = pObjStream->GetData();
1285 FX_DWORD totalsize = pObjStream->GetSize();
1286 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream((FX_LPBYTE)pData, (size_t)totalsize, FALSE));
1287 syntax.InitParser(file.Get(), 0);
1288 while (n) {
1289 FX_DWORD thisnum = syntax.GetDirectNum();
1290 FX_DWORD thisoff = syntax.GetDirectNum();
1291 if (thisnum == objnum) {
1292 if (n == 1) {
1293 size = totalsize - (thisoff + offset);
1294 } else {
1295 syntax.GetDirectNum(); // Skip nextnum.
1296 FX_DWORD nextoff = syntax.GetDirectNum();
1297 size = nextoff - thisoff;
1298 }
1299 pBuffer = FX_Alloc(FX_BYTE, size);
1300 FXSYS_memcpy32(pBuffer, pData + thisoff + offset, size);
1301 return;
1302 }
1303 n --;
1304 }
1305 return;
1306 }
1307 if (m_V5Type[objnum] == 1) {
1308 FX_FILESIZE pos = m_CrossRef[objnum];
1309 if (pos == 0) {
1310 return;
1311 }
1312 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1313 m_Syntax.RestorePos(pos);
1314 FX_BOOL bIsNumber;
1315 CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
1316 if (!bIsNumber) {
1317 m_Syntax.RestorePos(SavedPos);
1318 return;
1319 }
1320 FX_DWORD parser_objnum = FXSYS_atoi(word);
1321 if (parser_objnum && parser_objnum != objnum) {
1322 m_Syntax.RestorePos(SavedPos);
1323 return;
1324 }
1325 word = m_Syntax.GetNextWord(bIsNumber);
1326 if (!bIsNumber) {
1327 m_Syntax.RestorePos(SavedPos);
1328 return;
1329 }
1330 if (m_Syntax.GetKeyword() != FX_BSTRC("obj")) {
1331 m_Syntax.RestorePos(SavedPos);
1332 return;
1333 }
1334 FX_LPVOID pResult = FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1335 if (pResult == NULL) {
1336 m_Syntax.RestorePos(SavedPos);
1337 return;
1338 }
1339 FX_FILESIZE nextoff = ((FX_FILESIZE*)pResult)[1];
1340 FX_BOOL bNextOffValid = FALSE;
1341 if (nextoff != pos) {
1342 m_Syntax.RestorePos(nextoff);
1343 word = m_Syntax.GetNextWord(bIsNumber);
1344 if (word == FX_BSTRC("xref")) {
1345 bNextOffValid = TRUE;
1346 } else if (bIsNumber) {
1347 word = m_Syntax.GetNextWord(bIsNumber);
1348 if (bIsNumber && m_Syntax.GetKeyword() == FX_BSTRC("obj")) {
1349 bNextOffValid = TRUE;
1350 }
1351 }
1352 }
1353 if (!bNextOffValid) {
1354 m_Syntax.RestorePos(pos);
1355 while (1) {
1356 if (m_Syntax.GetKeyword() == FX_BSTRC("endobj")) {
1357 break;
1358 }
1359 if (m_Syntax.SavePos() == m_Syntax.m_FileLen) {
1360 break;
1361 }
1362 }
1363 nextoff = m_Syntax.SavePos();
1364 }
1365 size = (FX_DWORD)(nextoff - pos);
1366 pBuffer = FX_Alloc(FX_BYTE, size);
1367 m_Syntax.RestorePos(pos);
1368 m_Syntax.ReadBlock(pBuffer, size);
1369 m_Syntax.RestorePos(SavedPos);
1370 }
1371 }
ParseIndirectObjectAt(CPDF_IndirectObjects * pObjList,FX_FILESIZE pos,FX_DWORD objnum,PARSE_CONTEXT * pContext)1372 CPDF_Object* CPDF_Parser::ParseIndirectObjectAt(CPDF_IndirectObjects* pObjList, FX_FILESIZE pos, FX_DWORD objnum,
1373 PARSE_CONTEXT* pContext)
1374 {
1375 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1376 m_Syntax.RestorePos(pos);
1377 FX_BOOL bIsNumber;
1378 CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
1379 if (!bIsNumber) {
1380 m_Syntax.RestorePos(SavedPos);
1381 return NULL;
1382 }
1383 FX_FILESIZE objOffset = m_Syntax.SavePos();
1384 objOffset -= word.GetLength();
1385 FX_DWORD parser_objnum = FXSYS_atoi(word);
1386 if (objnum && parser_objnum != objnum) {
1387 m_Syntax.RestorePos(SavedPos);
1388 return NULL;
1389 }
1390 word = m_Syntax.GetNextWord(bIsNumber);
1391 if (!bIsNumber) {
1392 m_Syntax.RestorePos(SavedPos);
1393 return NULL;
1394 }
1395 FX_DWORD parser_gennum = FXSYS_atoi(word);
1396 if (m_Syntax.GetKeyword() != FX_BSTRC("obj")) {
1397 m_Syntax.RestorePos(SavedPos);
1398 return NULL;
1399 }
1400 CPDF_Object* pObj = m_Syntax.GetObject(pObjList, objnum, parser_gennum, pContext);
1401 m_Syntax.SavePos();
1402 CFX_ByteString bsWord = m_Syntax.GetKeyword();
1403 if (bsWord == FX_BSTRC("endobj")) {
1404 m_Syntax.SavePos();
1405 }
1406 m_Syntax.RestorePos(SavedPos);
1407 if (pObj) {
1408 if (!objnum) {
1409 pObj->m_ObjNum = parser_objnum;
1410 }
1411 pObj->m_GenNum = parser_gennum;
1412 }
1413 return pObj;
1414 }
ParseIndirectObjectAtByStrict(CPDF_IndirectObjects * pObjList,FX_FILESIZE pos,FX_DWORD objnum,struct PARSE_CONTEXT * pContext,FX_FILESIZE * pResultPos)1415 CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict(CPDF_IndirectObjects* pObjList, FX_FILESIZE pos, FX_DWORD objnum,
1416 struct PARSE_CONTEXT* pContext, FX_FILESIZE *pResultPos)
1417 {
1418 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1419 m_Syntax.RestorePos(pos);
1420 FX_BOOL bIsNumber;
1421 CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
1422 if (!bIsNumber) {
1423 m_Syntax.RestorePos(SavedPos);
1424 return NULL;
1425 }
1426 FX_DWORD parser_objnum = FXSYS_atoi(word);
1427 if (objnum && parser_objnum != objnum) {
1428 m_Syntax.RestorePos(SavedPos);
1429 return NULL;
1430 }
1431 word = m_Syntax.GetNextWord(bIsNumber);
1432 if (!bIsNumber) {
1433 m_Syntax.RestorePos(SavedPos);
1434 return NULL;
1435 }
1436 FX_DWORD gennum = FXSYS_atoi(word);
1437 if (m_Syntax.GetKeyword() != FX_BSTRC("obj")) {
1438 m_Syntax.RestorePos(SavedPos);
1439 return NULL;
1440 }
1441 CPDF_Object* pObj = m_Syntax.GetObjectByStrict(pObjList, objnum, gennum, pContext);
1442 if (pResultPos) {
1443 *pResultPos = m_Syntax.m_Pos;
1444 }
1445 m_Syntax.RestorePos(SavedPos);
1446 return pObj;
1447 }
LoadTrailerV4()1448 CPDF_Dictionary* CPDF_Parser::LoadTrailerV4()
1449 {
1450 if (m_Syntax.GetKeyword() != FX_BSTRC("trailer")) {
1451 return NULL;
1452 }
1453 CPDF_Object* pObj = m_Syntax.GetObject(m_pDocument, 0, 0, 0);
1454 if (pObj == NULL || pObj->GetType() != PDFOBJ_DICTIONARY) {
1455 if (pObj) {
1456 pObj->Release();
1457 }
1458 return NULL;
1459 }
1460 return (CPDF_Dictionary*)pObj;
1461 }
GetPermissions(FX_BOOL bCheckRevision)1462 FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision)
1463 {
1464 if (m_pSecurityHandler == NULL) {
1465 return (FX_DWORD) - 1;
1466 }
1467 FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions();
1468 if (m_pEncryptDict && m_pEncryptDict->GetString(FX_BSTRC("Filter")) == FX_BSTRC("Standard")) {
1469 dwPermission &= 0xFFFFFFFC;
1470 dwPermission |= 0xFFFFF0C0;
1471 if(bCheckRevision && m_pEncryptDict->GetInteger(FX_BSTRC("R")) == 2) {
1472 dwPermission &= 0xFFFFF0FF;
1473 }
1474 }
1475 return dwPermission;
1476 }
IsOwner()1477 FX_BOOL CPDF_Parser::IsOwner()
1478 {
1479 return m_pSecurityHandler == NULL ? TRUE : m_pSecurityHandler->IsOwner();
1480 }
SetSecurityHandler(CPDF_SecurityHandler * pSecurityHandler,FX_BOOL bForced)1481 void CPDF_Parser::SetSecurityHandler(CPDF_SecurityHandler* pSecurityHandler, FX_BOOL bForced)
1482 {
1483 ASSERT(m_pSecurityHandler == NULL);
1484 if (m_pSecurityHandler && !m_bForceUseSecurityHandler) {
1485 delete m_pSecurityHandler;
1486 m_pSecurityHandler = NULL;
1487 }
1488 m_bForceUseSecurityHandler = bForced;
1489 m_pSecurityHandler = pSecurityHandler;
1490 if (m_bForceUseSecurityHandler) {
1491 return;
1492 }
1493 m_Syntax.m_pCryptoHandler = pSecurityHandler->CreateCryptoHandler();
1494 m_Syntax.m_pCryptoHandler->Init(NULL, pSecurityHandler);
1495 }
IsLinearizedFile(IFX_FileRead * pFileAccess,FX_DWORD offset)1496 FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess, FX_DWORD offset)
1497 {
1498 m_Syntax.InitParser(pFileAccess, offset);
1499 m_Syntax.RestorePos(m_Syntax.m_HeaderOffset + 9);
1500 FX_FILESIZE SavedPos = m_Syntax.SavePos();
1501 FX_BOOL bIsNumber;
1502 CFX_ByteString word = m_Syntax.GetNextWord(bIsNumber);
1503 if (!bIsNumber) {
1504 return FALSE;
1505 }
1506 FX_DWORD objnum = FXSYS_atoi(word);
1507 word = m_Syntax.GetNextWord(bIsNumber);
1508 if (!bIsNumber) {
1509 return FALSE;
1510 }
1511 FX_DWORD gennum = FXSYS_atoi(word);
1512 if (m_Syntax.GetKeyword() != FX_BSTRC("obj")) {
1513 m_Syntax.RestorePos(SavedPos);
1514 return FALSE;
1515 }
1516 m_pLinearized = m_Syntax.GetObject(NULL, objnum, gennum, 0);
1517 if (!m_pLinearized) {
1518 return FALSE;
1519 }
1520 if (m_pLinearized->GetDict() && m_pLinearized->GetDict()->GetElement(FX_BSTRC("Linearized"))) {
1521 m_Syntax.GetNextWord(bIsNumber);
1522 CPDF_Object *pLen = m_pLinearized->GetDict()->GetElement(FX_BSTRC("L"));
1523 if (!pLen) {
1524 m_pLinearized->Release();
1525 m_pLinearized = NULL;
1526 return FALSE;
1527 }
1528 if (pLen->GetInteger() != (int)pFileAccess->GetSize()) {
1529 return FALSE;
1530 }
1531 CPDF_Object *pNo = m_pLinearized->GetDict()->GetElement(FX_BSTRC("P"));
1532 if (pNo && pNo->GetType() == PDFOBJ_NUMBER) {
1533 m_dwFirstPageNo = pNo->GetInteger();
1534 }
1535 CPDF_Object *pTable = m_pLinearized->GetDict()->GetElement(FX_BSTRC("T"));
1536 if (pTable && pTable->GetType() == PDFOBJ_NUMBER) {
1537 m_LastXRefOffset = pTable->GetInteger();
1538 }
1539 return TRUE;
1540 }
1541 m_pLinearized->Release();
1542 m_pLinearized = NULL;
1543 return FALSE;
1544 }
StartAsynParse(IFX_FileRead * pFileAccess,FX_BOOL bReParse,FX_BOOL bOwnFileRead)1545 FX_DWORD CPDF_Parser::StartAsynParse(IFX_FileRead* pFileAccess, FX_BOOL bReParse, FX_BOOL bOwnFileRead)
1546 {
1547 CloseParser(bReParse);
1548 m_bXRefStream = FALSE;
1549 m_LastXRefOffset = 0;
1550 m_bOwnFileRead = bOwnFileRead;
1551 FX_INT32 offset = GetHeaderOffset(pFileAccess);
1552 if (offset == -1) {
1553 return PDFPARSE_ERROR_FORMAT;
1554 }
1555 if (!IsLinearizedFile(pFileAccess, offset)) {
1556 m_Syntax.m_pFileAccess = NULL;
1557 return StartParse(pFileAccess, bReParse, bOwnFileRead);
1558 }
1559 if (!bReParse) {
1560 m_pDocument = new CPDF_Document(this);
1561 }
1562 FX_FILESIZE dwFirstXRefOffset = m_Syntax.SavePos();
1563 FX_BOOL bXRefRebuilt = FALSE;
1564 FX_BOOL bLoadV4 = FALSE;
1565 if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE, FALSE)) && !LoadCrossRefV5(dwFirstXRefOffset, dwFirstXRefOffset, TRUE)) {
1566 if (!RebuildCrossRef()) {
1567 return PDFPARSE_ERROR_FORMAT;
1568 }
1569 bXRefRebuilt = TRUE;
1570 m_LastXRefOffset = 0;
1571 }
1572 if (bLoadV4) {
1573 m_pTrailer = LoadTrailerV4();
1574 if (m_pTrailer == NULL) {
1575 return FALSE;
1576 }
1577 FX_INT32 xrefsize = GetDirectInteger(m_pTrailer, FX_BSTRC("Size"));
1578 if (xrefsize > 0) {
1579 m_CrossRef.SetSize(xrefsize);
1580 m_V5Type.SetSize(xrefsize);
1581 }
1582 }
1583 FX_DWORD dwRet = SetEncryptHandler();
1584 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
1585 return dwRet;
1586 }
1587 m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
1588 if (m_pDocument->GetRoot() == NULL || m_pDocument->GetPageCount() == 0) {
1589 if (bXRefRebuilt) {
1590 return PDFPARSE_ERROR_FORMAT;
1591 }
1592 ReleaseEncryptHandler();
1593 if (!RebuildCrossRef()) {
1594 return PDFPARSE_ERROR_FORMAT;
1595 }
1596 dwRet = SetEncryptHandler();
1597 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
1598 return dwRet;
1599 }
1600 m_pDocument->LoadAsynDoc(m_pLinearized->GetDict());
1601 if (m_pDocument->GetRoot() == NULL) {
1602 return PDFPARSE_ERROR_FORMAT;
1603 }
1604 }
1605 FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1606 FX_DWORD RootObjNum = GetRootObjNum();
1607 if (RootObjNum == 0) {
1608 ReleaseEncryptHandler();
1609 RebuildCrossRef();
1610 RootObjNum = GetRootObjNum();
1611 if (RootObjNum == 0) {
1612 return PDFPARSE_ERROR_FORMAT;
1613 }
1614 dwRet = SetEncryptHandler();
1615 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
1616 return dwRet;
1617 }
1618 }
1619 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
1620 CPDF_Object* pMetadata = m_pDocument->GetRoot()->GetElement(FX_BSTRC("Metadata"));
1621 if (pMetadata && pMetadata->GetType() == PDFOBJ_REFERENCE) {
1622 m_Syntax.m_MetadataObjnum = ((CPDF_Reference*) pMetadata)->GetRefObjNum();
1623 }
1624 }
1625 return PDFPARSE_ERROR_SUCCESS;
1626 }
LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos)1627 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos)
1628 {
1629 if (!LoadCrossRefV5(xrefpos, xrefpos, FALSE)) {
1630 return FALSE;
1631 }
1632 while (xrefpos)
1633 if (!LoadCrossRefV5(xrefpos, xrefpos, FALSE)) {
1634 return FALSE;
1635 }
1636 m_ObjectStreamMap.InitHashTable(101, FALSE);
1637 m_bXRefStream = TRUE;
1638 return TRUE;
1639 }
LoadLinearizedMainXRefTable()1640 FX_DWORD CPDF_Parser::LoadLinearizedMainXRefTable()
1641 {
1642 FX_DWORD dwSaveMetadataObjnum = m_Syntax.m_MetadataObjnum;
1643 m_Syntax.m_MetadataObjnum = 0;
1644 if (m_pTrailer) {
1645 m_pTrailer->Release();
1646 m_pTrailer = NULL;
1647 }
1648 m_Syntax.RestorePos(m_LastXRefOffset - m_Syntax.m_HeaderOffset);
1649 FX_BYTE ch = 0;
1650 FX_DWORD dwCount = 0;
1651 m_Syntax.GetNextChar(ch);
1652 FX_INT32 type = PDF_CharType[ch];
1653 while (type == 'W') {
1654 ++dwCount;
1655 if (m_Syntax.m_FileLen >= (FX_FILESIZE)(m_Syntax.SavePos() + m_Syntax.m_HeaderOffset)) {
1656 break;
1657 }
1658 m_Syntax.GetNextChar(ch);
1659 type = PDF_CharType[ch];
1660 }
1661 m_LastXRefOffset += dwCount;
1662 FX_POSITION pos = m_ObjectStreamMap.GetStartPosition();
1663 while (pos) {
1664 FX_LPVOID objnum;
1665 CPDF_StreamAcc* pStream;
1666 m_ObjectStreamMap.GetNextAssoc(pos, objnum, (void*&)pStream);
1667 delete pStream;
1668 }
1669 m_ObjectStreamMap.RemoveAll();
1670 if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) && !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
1671 m_LastXRefOffset = 0;
1672 m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;
1673 return PDFPARSE_ERROR_FORMAT;
1674 }
1675 FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
1676 m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum;
1677 return PDFPARSE_ERROR_SUCCESS;
1678 }
1679
1680 // static
1681 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
1682
CPDF_SyntaxParser()1683 CPDF_SyntaxParser::CPDF_SyntaxParser()
1684 {
1685 m_pFileAccess = NULL;
1686 m_pCryptoHandler = NULL;
1687 m_pFileBuf = NULL;
1688 m_BufSize = CPDF_ModuleMgr::Get()->m_FileBufSize;
1689 m_pFileBuf = NULL;
1690 m_MetadataObjnum = 0;
1691 m_dwWordPos = 0;
1692 m_bFileStream = FALSE;
1693 }
~CPDF_SyntaxParser()1694 CPDF_SyntaxParser::~CPDF_SyntaxParser()
1695 {
1696 if (m_pFileBuf) {
1697 FX_Free(m_pFileBuf);
1698 }
1699 }
GetCharAt(FX_FILESIZE pos,FX_BYTE & ch)1700 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, FX_BYTE& ch)
1701 {
1702 FX_FILESIZE save_pos = m_Pos;
1703 m_Pos = pos;
1704 FX_BOOL ret = GetNextChar(ch);
1705 m_Pos = save_pos;
1706 return ret;
1707 }
GetNextChar(FX_BYTE & ch)1708 FX_BOOL CPDF_SyntaxParser::GetNextChar(FX_BYTE& ch)
1709 {
1710 FX_FILESIZE pos = m_Pos + m_HeaderOffset;
1711 if (pos >= m_FileLen) {
1712 return FALSE;
1713 }
1714 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
1715 FX_FILESIZE read_pos = pos;
1716 FX_DWORD read_size = m_BufSize;
1717 if ((FX_FILESIZE)read_size > m_FileLen) {
1718 read_size = (FX_DWORD)m_FileLen;
1719 }
1720 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
1721 if (m_FileLen < (FX_FILESIZE)read_size) {
1722 read_pos = 0;
1723 read_size = (FX_DWORD)m_FileLen;
1724 } else {
1725 read_pos = m_FileLen - read_size;
1726 }
1727 }
1728 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) {
1729 return FALSE;
1730 }
1731 m_BufOffset = read_pos;
1732 }
1733 ch = m_pFileBuf[pos - m_BufOffset];
1734 m_Pos ++;
1735 return TRUE;
1736 }
GetCharAtBackward(FX_FILESIZE pos,FX_BYTE & ch)1737 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, FX_BYTE& ch)
1738 {
1739 pos += m_HeaderOffset;
1740 if (pos >= m_FileLen) {
1741 return FALSE;
1742 }
1743 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) {
1744 FX_FILESIZE read_pos;
1745 if (pos < (FX_FILESIZE)m_BufSize) {
1746 read_pos = 0;
1747 } else {
1748 read_pos = pos - m_BufSize + 1;
1749 }
1750 FX_DWORD read_size = m_BufSize;
1751 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) {
1752 if (m_FileLen < (FX_FILESIZE)read_size) {
1753 read_pos = 0;
1754 read_size = (FX_DWORD)m_FileLen;
1755 } else {
1756 read_pos = m_FileLen - read_size;
1757 }
1758 }
1759 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) {
1760 return FALSE;
1761 }
1762 m_BufOffset = read_pos;
1763 }
1764 ch = m_pFileBuf[pos - m_BufOffset];
1765 return TRUE;
1766 }
ReadBlock(FX_LPBYTE pBuf,FX_DWORD size)1767 FX_BOOL CPDF_SyntaxParser::ReadBlock(FX_LPBYTE pBuf, FX_DWORD size)
1768 {
1769 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) {
1770 return FALSE;
1771 }
1772 m_Pos += size;
1773 return TRUE;
1774 }
1775 #define MAX_WORD_BUFFER 256
GetNextWord()1776 void CPDF_SyntaxParser::GetNextWord()
1777 {
1778 m_WordSize = 0;
1779 m_bIsNumber = TRUE;
1780 FX_BYTE ch;
1781 if (!GetNextChar(ch)) {
1782 return;
1783 }
1784 FX_BYTE type = PDF_CharType[ch];
1785 while (1) {
1786 while (type == 'W') {
1787 if (!GetNextChar(ch)) {
1788 return;
1789 }
1790 type = PDF_CharType[ch];
1791 }
1792 if (ch != '%') {
1793 break;
1794 }
1795 while (1) {
1796 if (!GetNextChar(ch)) {
1797 return;
1798 }
1799 if (ch == '\r' || ch == '\n') {
1800 break;
1801 }
1802 }
1803 type = PDF_CharType[ch];
1804 }
1805 if (type == 'D') {
1806 m_bIsNumber = FALSE;
1807 m_WordBuffer[m_WordSize++] = ch;
1808 if (ch == '/') {
1809 while (1) {
1810 if (!GetNextChar(ch)) {
1811 return;
1812 }
1813 type = PDF_CharType[ch];
1814 if (type != 'R' && type != 'N') {
1815 m_Pos --;
1816 return;
1817 }
1818 if (m_WordSize < MAX_WORD_BUFFER) {
1819 m_WordBuffer[m_WordSize++] = ch;
1820 }
1821 }
1822 } else if (ch == '<') {
1823 if (!GetNextChar(ch)) {
1824 return;
1825 }
1826 if (ch == '<') {
1827 m_WordBuffer[m_WordSize++] = ch;
1828 } else {
1829 m_Pos --;
1830 }
1831 } else if (ch == '>') {
1832 if (!GetNextChar(ch)) {
1833 return;
1834 }
1835 if (ch == '>') {
1836 m_WordBuffer[m_WordSize++] = ch;
1837 } else {
1838 m_Pos --;
1839 }
1840 }
1841 return;
1842 }
1843 while (1) {
1844 if (m_WordSize < MAX_WORD_BUFFER) {
1845 m_WordBuffer[m_WordSize++] = ch;
1846 }
1847 if (type != 'N') {
1848 m_bIsNumber = FALSE;
1849 }
1850 if (!GetNextChar(ch)) {
1851 return;
1852 }
1853 type = PDF_CharType[ch];
1854 if (type == 'D' || type == 'W') {
1855 m_Pos --;
1856 break;
1857 }
1858 }
1859 }
ReadString()1860 CFX_ByteString CPDF_SyntaxParser::ReadString()
1861 {
1862 FX_BYTE ch;
1863 if (!GetNextChar(ch)) {
1864 return CFX_ByteString();
1865 }
1866 CFX_ByteTextBuf buf;
1867 FX_INT32 parlevel = 0;
1868 FX_INT32 status = 0, iEscCode = 0;
1869 while (1) {
1870 switch (status) {
1871 case 0:
1872 if (ch == ')') {
1873 if (parlevel == 0) {
1874 return buf.GetByteString();
1875 }
1876 parlevel --;
1877 buf.AppendChar(')');
1878 } else if (ch == '(') {
1879 parlevel ++;
1880 buf.AppendChar('(');
1881 } else if (ch == '\\') {
1882 status = 1;
1883 } else {
1884 buf.AppendChar(ch);
1885 }
1886 break;
1887 case 1:
1888 if (ch >= '0' && ch <= '7') {
1889 iEscCode = ch - '0';
1890 status = 2;
1891 break;
1892 }
1893 if (ch == 'n') {
1894 buf.AppendChar('\n');
1895 } else if (ch == 'r') {
1896 buf.AppendChar('\r');
1897 } else if (ch == 't') {
1898 buf.AppendChar('\t');
1899 } else if (ch == 'b') {
1900 buf.AppendChar('\b');
1901 } else if (ch == 'f') {
1902 buf.AppendChar('\f');
1903 } else if (ch == '\r') {
1904 status = 4;
1905 break;
1906 } else if (ch == '\n') {
1907 } else {
1908 buf.AppendChar(ch);
1909 }
1910 status = 0;
1911 break;
1912 case 2:
1913 if (ch >= '0' && ch <= '7') {
1914 iEscCode = iEscCode * 8 + ch - '0';
1915 status = 3;
1916 } else {
1917 buf.AppendChar(iEscCode);
1918 status = 0;
1919 continue;
1920 }
1921 break;
1922 case 3:
1923 if (ch >= '0' && ch <= '7') {
1924 iEscCode = iEscCode * 8 + ch - '0';
1925 buf.AppendChar(iEscCode);
1926 status = 0;
1927 } else {
1928 buf.AppendChar(iEscCode);
1929 status = 0;
1930 continue;
1931 }
1932 break;
1933 case 4:
1934 status = 0;
1935 if (ch != '\n') {
1936 continue;
1937 }
1938 break;
1939 }
1940 if (!GetNextChar(ch)) {
1941 break;
1942 }
1943 }
1944 GetNextChar(ch);
1945 return buf.GetByteString();
1946 }
ReadHexString()1947 CFX_ByteString CPDF_SyntaxParser::ReadHexString()
1948 {
1949 FX_BYTE ch;
1950 if (!GetNextChar(ch)) {
1951 return CFX_ByteString();
1952 }
1953 CFX_BinaryBuf buf;
1954 FX_BOOL bFirst = TRUE;
1955 FX_BYTE code = 0;
1956 while (1) {
1957 if (ch == '>') {
1958 break;
1959 }
1960 if (ch >= '0' && ch <= '9') {
1961 if (bFirst) {
1962 code = (ch - '0') * 16;
1963 } else {
1964 code += ch - '0';
1965 buf.AppendByte((FX_BYTE)code);
1966 }
1967 bFirst = !bFirst;
1968 } else if (ch >= 'A' && ch <= 'F') {
1969 if (bFirst) {
1970 code = (ch - 'A' + 10) * 16;
1971 } else {
1972 code += ch - 'A' + 10;
1973 buf.AppendByte((FX_BYTE)code);
1974 }
1975 bFirst = !bFirst;
1976 } else if (ch >= 'a' && ch <= 'f') {
1977 if (bFirst) {
1978 code = (ch - 'a' + 10) * 16;
1979 } else {
1980 code += ch - 'a' + 10;
1981 buf.AppendByte((FX_BYTE)code);
1982 }
1983 bFirst = !bFirst;
1984 }
1985 if (!GetNextChar(ch)) {
1986 break;
1987 }
1988 }
1989 if (!bFirst) {
1990 buf.AppendByte((FX_BYTE)code);
1991 }
1992 return buf.GetByteString();
1993 }
ToNextLine()1994 void CPDF_SyntaxParser::ToNextLine()
1995 {
1996 FX_BYTE ch;
1997 while (1) {
1998 if (!GetNextChar(ch)) {
1999 return;
2000 }
2001 if (ch == '\n') {
2002 return;
2003 }
2004 if (ch == '\r') {
2005 GetNextChar(ch);
2006 if (ch == '\n') {
2007 return;
2008 } else {
2009 m_Pos --;
2010 return;
2011 }
2012 }
2013 }
2014 }
ToNextWord()2015 void CPDF_SyntaxParser::ToNextWord()
2016 {
2017 FX_BYTE ch;
2018 if (!GetNextChar(ch)) {
2019 return;
2020 }
2021 FX_BYTE type = PDF_CharType[ch];
2022 while (1) {
2023 while (type == 'W') {
2024 m_dwWordPos = m_Pos;
2025 if (!GetNextChar(ch)) {
2026 return;
2027 }
2028 type = PDF_CharType[ch];
2029 }
2030 if (ch != '%') {
2031 break;
2032 }
2033 while (1) {
2034 if (!GetNextChar(ch)) {
2035 return;
2036 }
2037 if (ch == '\r' || ch == '\n') {
2038 break;
2039 }
2040 }
2041 type = PDF_CharType[ch];
2042 }
2043 m_Pos --;
2044 }
GetNextWord(FX_BOOL & bIsNumber)2045 CFX_ByteString CPDF_SyntaxParser::GetNextWord(FX_BOOL& bIsNumber)
2046 {
2047 GetNextWord();
2048 bIsNumber = m_bIsNumber;
2049 return CFX_ByteString((FX_LPCSTR)m_WordBuffer, m_WordSize);
2050 }
GetKeyword()2051 CFX_ByteString CPDF_SyntaxParser::GetKeyword()
2052 {
2053 GetNextWord();
2054 return CFX_ByteString((FX_LPCSTR)m_WordBuffer, m_WordSize);
2055 }
GetObject(CPDF_IndirectObjects * pObjList,FX_DWORD objnum,FX_DWORD gennum,PARSE_CONTEXT * pContext,FX_BOOL bDecrypt)2056 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjects* pObjList, FX_DWORD objnum, FX_DWORD gennum, PARSE_CONTEXT* pContext, FX_BOOL bDecrypt)
2057 {
2058 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
2059 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) {
2060 return NULL;
2061 }
2062 FX_FILESIZE SavedPos = m_Pos;
2063 FX_BOOL bTypeOnly = pContext && (pContext->m_Flags & PDFPARSE_TYPEONLY);
2064 FX_BOOL bIsNumber;
2065 CFX_ByteString word = GetNextWord(bIsNumber);
2066 CPDF_Object* pRet = NULL;
2067 if (word.GetLength() == 0) {
2068 if (bTypeOnly) {
2069 return (CPDF_Object*)PDFOBJ_INVALID;
2070 }
2071 return NULL;
2072 }
2073 if (bIsNumber) {
2074 FX_FILESIZE SavedPos = m_Pos;
2075 CFX_ByteString nextword = GetNextWord(bIsNumber);
2076 if (bIsNumber) {
2077 CFX_ByteString nextword2 = GetNextWord(bIsNumber);
2078 if (nextword2 == FX_BSTRC("R")) {
2079 FX_DWORD objnum = FXSYS_atoi(word);
2080 if (bTypeOnly) {
2081 return (CPDF_Object*)PDFOBJ_REFERENCE;
2082 }
2083 pRet = CPDF_Reference::Create(pObjList, objnum);
2084 return pRet;
2085 } else {
2086 m_Pos = SavedPos;
2087 if (bTypeOnly) {
2088 return (CPDF_Object*)PDFOBJ_NUMBER;
2089 }
2090 pRet = CPDF_Number::Create(word);
2091 return pRet;
2092 }
2093 } else {
2094 m_Pos = SavedPos;
2095 if (bTypeOnly) {
2096 return (CPDF_Object*)PDFOBJ_NUMBER;
2097 }
2098 pRet = CPDF_Number::Create(word);
2099 return pRet;
2100 }
2101 }
2102 if (word == FX_BSTRC("true") || word == FX_BSTRC("false")) {
2103 if (bTypeOnly) {
2104 return (CPDF_Object*)PDFOBJ_BOOLEAN;
2105 }
2106 pRet = CPDF_Boolean::Create(word == FX_BSTRC("true"));
2107 return pRet;
2108 }
2109 if (word == FX_BSTRC("null")) {
2110 if (bTypeOnly) {
2111 return (CPDF_Object*)PDFOBJ_NULL;
2112 }
2113 pRet = CPDF_Null::Create();
2114 return pRet;
2115 }
2116 if (word == FX_BSTRC("(")) {
2117 if (bTypeOnly) {
2118 return (CPDF_Object*)PDFOBJ_STRING;
2119 }
2120 CFX_ByteString str = ReadString();
2121 if (m_pCryptoHandler && bDecrypt) {
2122 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2123 }
2124 pRet = CPDF_String::Create(str, FALSE);
2125 return pRet;
2126 }
2127 if (word == FX_BSTRC("<")) {
2128 if (bTypeOnly) {
2129 return (CPDF_Object*)PDFOBJ_STRING;
2130 }
2131 CFX_ByteString str = ReadHexString();
2132 if (m_pCryptoHandler && bDecrypt) {
2133 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2134 }
2135 pRet = CPDF_String::Create(str, TRUE);
2136 return pRet;
2137 }
2138 if (word == FX_BSTRC("[")) {
2139 if (bTypeOnly) {
2140 return (CPDF_Object*)PDFOBJ_ARRAY;
2141 }
2142 CPDF_Array* pArray = CPDF_Array::Create();
2143 while (1) {
2144 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum);
2145 if (pObj == NULL) {
2146 return pArray;
2147 }
2148 pArray->Add(pObj);
2149 }
2150 }
2151 if (word[0] == '/') {
2152 if (bTypeOnly) {
2153 return (CPDF_Object*)PDFOBJ_NAME;
2154 }
2155 pRet = CPDF_Name::Create(PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
2156 return pRet;
2157 }
2158 if (word == FX_BSTRC("<<")) {
2159 if (bTypeOnly) {
2160 return (CPDF_Object*)PDFOBJ_DICTIONARY;
2161 }
2162 if (pContext) {
2163 pContext->m_DictStart = SavedPos;
2164 }
2165 CPDF_Dictionary* pDict = CPDF_Dictionary::Create();
2166 FX_INT32 nKeys = 0;
2167 FX_FILESIZE dwSignValuePos = 0;
2168 while (1) {
2169 FX_BOOL bIsNumber;
2170 CFX_ByteString key = GetNextWord(bIsNumber);
2171 if (key.IsEmpty()) {
2172 if (pDict)
2173 pDict->Release();
2174 return NULL;
2175 }
2176 FX_FILESIZE SavedPos = m_Pos - key.GetLength();
2177 if (key == FX_BSTRC(">>")) {
2178 break;
2179 }
2180 if (key == FX_BSTRC("endobj")) {
2181 m_Pos = SavedPos;
2182 break;
2183 }
2184 if (key[0] != '/') {
2185 continue;
2186 }
2187 nKeys ++;
2188 key = PDF_NameDecode(key);
2189 if (key == FX_BSTRC("/Contents")) {
2190 dwSignValuePos = m_Pos;
2191 }
2192 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum);
2193 if (pObj == NULL) {
2194 continue;
2195 }
2196 if (key.GetLength() >= 1) {
2197 if (nKeys < 32) {
2198 pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1), pObj);
2199 } else {
2200 pDict->AddValue(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1), pObj);
2201 }
2202 }
2203 }
2204 if (IsSignatureDict(pDict)) {
2205 FX_FILESIZE dwSavePos = m_Pos;
2206 m_Pos = dwSignValuePos;
2207 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, NULL, FALSE);
2208 pDict->SetAt(FX_BSTRC("Contents"), pObj);
2209 m_Pos = dwSavePos;
2210 }
2211 if (pContext) {
2212 pContext->m_DictEnd = m_Pos;
2213 if (pContext->m_Flags & PDFPARSE_NOSTREAM) {
2214 return pDict;
2215 }
2216 }
2217 FX_FILESIZE SavedPos = m_Pos;
2218 FX_BOOL bIsNumber;
2219 CFX_ByteString nextword = GetNextWord(bIsNumber);
2220 if (nextword == FX_BSTRC("stream")) {
2221 CPDF_Stream* pStream = ReadStream(pDict, pContext, objnum, gennum);
2222 if (pStream) {
2223 return pStream;
2224 }
2225 if (pDict)
2226 pDict->Release();
2227 return NULL;
2228 } else {
2229 m_Pos = SavedPos;
2230 return pDict;
2231 }
2232 }
2233 if (word == FX_BSTRC(">>")) {
2234 m_Pos = SavedPos;
2235 return NULL;
2236 }
2237 if (bTypeOnly) {
2238 return (CPDF_Object*)PDFOBJ_INVALID;
2239 }
2240 return NULL;
2241 }
GetObjectByStrict(CPDF_IndirectObjects * pObjList,FX_DWORD objnum,FX_DWORD gennum,struct PARSE_CONTEXT * pContext)2242 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict(CPDF_IndirectObjects* pObjList, FX_DWORD objnum, FX_DWORD gennum, struct PARSE_CONTEXT* pContext)
2243 {
2244 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
2245 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) {
2246 return NULL;
2247 }
2248 FX_FILESIZE SavedPos = m_Pos;
2249 FX_BOOL bTypeOnly = pContext && (pContext->m_Flags & PDFPARSE_TYPEONLY);
2250 FX_BOOL bIsNumber;
2251 CFX_ByteString word = GetNextWord(bIsNumber);
2252 if (word.GetLength() == 0) {
2253 if (bTypeOnly) {
2254 return (CPDF_Object*)PDFOBJ_INVALID;
2255 }
2256 return NULL;
2257 }
2258 if (bIsNumber) {
2259 FX_FILESIZE SavedPos = m_Pos;
2260 CFX_ByteString nextword = GetNextWord(bIsNumber);
2261 if (bIsNumber) {
2262 CFX_ByteString nextword2 = GetNextWord(bIsNumber);
2263 if (nextword2 == FX_BSTRC("R")) {
2264 FX_DWORD objnum = FXSYS_atoi(word);
2265 if (bTypeOnly) {
2266 return (CPDF_Object*)PDFOBJ_REFERENCE;
2267 }
2268 return CPDF_Reference::Create(pObjList, objnum);
2269 } else {
2270 m_Pos = SavedPos;
2271 if (bTypeOnly) {
2272 return (CPDF_Object*)PDFOBJ_NUMBER;
2273 }
2274 return CPDF_Number::Create(word);
2275 }
2276 } else {
2277 m_Pos = SavedPos;
2278 if (bTypeOnly) {
2279 return (CPDF_Object*)PDFOBJ_NUMBER;
2280 }
2281 return CPDF_Number::Create(word);
2282 }
2283 }
2284 if (word == FX_BSTRC("true") || word == FX_BSTRC("false")) {
2285 if (bTypeOnly) {
2286 return (CPDF_Object*)PDFOBJ_BOOLEAN;
2287 }
2288 return CPDF_Boolean::Create(word == FX_BSTRC("true"));
2289 }
2290 if (word == FX_BSTRC("null")) {
2291 if (bTypeOnly) {
2292 return (CPDF_Object*)PDFOBJ_NULL;
2293 }
2294 return CPDF_Null::Create();
2295 }
2296 if (word == FX_BSTRC("(")) {
2297 if (bTypeOnly) {
2298 return (CPDF_Object*)PDFOBJ_STRING;
2299 }
2300 CFX_ByteString str = ReadString();
2301 if (m_pCryptoHandler) {
2302 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2303 }
2304 return CPDF_String::Create(str, FALSE);
2305 }
2306 if (word == FX_BSTRC("<")) {
2307 if (bTypeOnly) {
2308 return (CPDF_Object*)PDFOBJ_STRING;
2309 }
2310 CFX_ByteString str = ReadHexString();
2311 if (m_pCryptoHandler) {
2312 m_pCryptoHandler->Decrypt(objnum, gennum, str);
2313 }
2314 return CPDF_String::Create(str, TRUE);
2315 }
2316 if (word == FX_BSTRC("[")) {
2317 if (bTypeOnly) {
2318 return (CPDF_Object*)PDFOBJ_ARRAY;
2319 }
2320 CPDF_Array* pArray = CPDF_Array::Create();
2321 while (1) {
2322 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum);
2323 if (pObj == NULL) {
2324 if (m_WordBuffer[0] == ']') {
2325 return pArray;
2326 }
2327 if (pArray) {
2328 pArray->Release();
2329 }
2330 return NULL;
2331 }
2332 pArray->Add(pObj);
2333 }
2334 }
2335 if (word[0] == '/') {
2336 if (bTypeOnly) {
2337 return (CPDF_Object*)PDFOBJ_NAME;
2338 }
2339 return CPDF_Name::Create(PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
2340 }
2341 if (word == FX_BSTRC("<<")) {
2342 if (bTypeOnly) {
2343 return (CPDF_Object*)PDFOBJ_DICTIONARY;
2344 }
2345 if (pContext) {
2346 pContext->m_DictStart = SavedPos;
2347 }
2348 CPDF_Dictionary* pDict = CPDF_Dictionary::Create();
2349 while (1) {
2350 FX_BOOL bIsNumber;
2351 FX_FILESIZE SavedPos = m_Pos;
2352 CFX_ByteString key = GetNextWord(bIsNumber);
2353 if (key.IsEmpty()) {
2354 if (pDict) {
2355 pDict->Release();
2356 }
2357 return NULL;
2358 }
2359 if (key == FX_BSTRC(">>")) {
2360 break;
2361 }
2362 if (key == FX_BSTRC("endobj")) {
2363 m_Pos = SavedPos;
2364 break;
2365 }
2366 if (key[0] != '/') {
2367 continue;
2368 }
2369 key = PDF_NameDecode(key);
2370 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum);
2371 if (pObj == NULL) {
2372 if (pDict) {
2373 pDict->Release();
2374 }
2375 FX_BYTE ch;
2376 while (1) {
2377 if (!GetNextChar(ch)) {
2378 break;
2379 }
2380 if (ch == 0x0A || ch == 0x0D) {
2381 break;
2382 }
2383 }
2384 return NULL;
2385 }
2386 if (key.GetLength() > 1) {
2387 pDict->AddValue(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1), pObj);
2388 }
2389 }
2390 if (pContext) {
2391 pContext->m_DictEnd = m_Pos;
2392 if (pContext->m_Flags & PDFPARSE_NOSTREAM) {
2393 return pDict;
2394 }
2395 }
2396 FX_FILESIZE SavedPos = m_Pos;
2397 FX_BOOL bIsNumber;
2398 CFX_ByteString nextword = GetNextWord(bIsNumber);
2399 if (nextword == FX_BSTRC("stream")) {
2400 CPDF_Stream* pStream = ReadStream(pDict, pContext, objnum, gennum);
2401 if (pStream) {
2402 return pStream;
2403 }
2404 if (pDict) {
2405 pDict->Release();
2406 }
2407 return NULL;
2408 } else {
2409 m_Pos = SavedPos;
2410 return pDict;
2411 }
2412 }
2413 if (word == FX_BSTRC(">>")) {
2414 m_Pos = SavedPos;
2415 return NULL;
2416 }
2417 if (bTypeOnly) {
2418 return (CPDF_Object*)PDFOBJ_INVALID;
2419 }
2420 return NULL;
2421 }
ReadStream(CPDF_Dictionary * pDict,PARSE_CONTEXT * pContext,FX_DWORD objnum,FX_DWORD gennum)2422 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, PARSE_CONTEXT* pContext,
2423 FX_DWORD objnum, FX_DWORD gennum)
2424 {
2425 CPDF_Object* pLenObj = pDict->GetElement(FX_BSTRC("Length"));
2426 FX_FILESIZE len = 0;
2427 if (pLenObj && ((pLenObj->GetType() != PDFOBJ_REFERENCE) ||
2428 ((((CPDF_Reference*)pLenObj)->GetObjList() != NULL) &&
2429 ((CPDF_Reference*)pLenObj)->GetRefObjNum() != objnum))) {
2430 len = pLenObj->GetInteger();
2431 }
2432
2433 ToNextLine();
2434 FX_FILESIZE StreamStartPos = m_Pos;
2435 if (pContext) {
2436 pContext->m_DataStart = m_Pos;
2437 }
2438
2439 CPDF_CryptoHandler* pCryptoHandler = objnum == (FX_DWORD)m_MetadataObjnum ? NULL : m_pCryptoHandler;
2440 if (pCryptoHandler == NULL) {
2441 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
2442 pos += len;
2443 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) {
2444 m_Pos = pos.ValueOrDie();
2445 }
2446 GetNextWord();
2447 if (m_WordSize < 9 || FXSYS_memcmp32(m_WordBuffer, "endstream", 9)) {
2448 m_Pos = StreamStartPos;
2449 FX_FILESIZE offset = FindTag(FX_BSTRC("endstream"), 0);
2450 if (offset >= 0) {
2451 FX_FILESIZE curPos = m_Pos;
2452 m_Pos = StreamStartPos;
2453 FX_FILESIZE endobjOffset = FindTag(FX_BSTRC("endobj"), 0);
2454 if (endobjOffset < offset && endobjOffset >= 0) {
2455 offset = endobjOffset;
2456 } else {
2457 m_Pos = curPos;
2458 }
2459 FX_BYTE byte1, byte2;
2460 GetCharAt(StreamStartPos + offset - 1, byte1);
2461 GetCharAt(StreamStartPos + offset - 2, byte2);
2462 if (byte1 == 0x0a && byte2 == 0x0d) {
2463 len -= 2;
2464 } else if (byte1 == 0x0a || byte1 == 0x0d) {
2465 len --;
2466 }
2467 len = (FX_DWORD)offset;
2468 pDict->SetAtInteger(FX_BSTRC("Length"), len);
2469 } else {
2470 m_Pos = StreamStartPos;
2471 if (FindTag(FX_BSTRC("endobj"), 0) < 0) {
2472 return NULL;
2473 }
2474 }
2475 }
2476 m_Pos = StreamStartPos;
2477 }
2478 CPDF_Stream* pStream;
2479 FX_LPBYTE pData = FX_Alloc(FX_BYTE, len);
2480 ReadBlock(pData, len);
2481 if (pCryptoHandler) {
2482 CFX_BinaryBuf dest_buf;
2483 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));
2484 FX_LPVOID context = pCryptoHandler->DecryptStart(objnum, gennum);
2485 pCryptoHandler->DecryptStream(context, pData, len, dest_buf);
2486 pCryptoHandler->DecryptFinish(context, dest_buf);
2487 FX_Free(pData);
2488 pData = dest_buf.GetBuffer();
2489 len = dest_buf.GetSize();
2490 dest_buf.DetachBuffer();
2491 }
2492 pStream = new CPDF_Stream(pData, len, pDict);
2493 if (pContext) {
2494 pContext->m_DataEnd = pContext->m_DataStart + len;
2495 }
2496 StreamStartPos = m_Pos;
2497 GetNextWord();
2498 if (m_WordSize == 6 && 0 == FXSYS_memcmp32(m_WordBuffer, "endobj", 6)) {
2499 m_Pos = StreamStartPos;
2500 }
2501 return pStream;
2502 }
InitParser(IFX_FileRead * pFileAccess,FX_DWORD HeaderOffset)2503 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess, FX_DWORD HeaderOffset)
2504 {
2505 if (m_pFileBuf) {
2506 FX_Free(m_pFileBuf);
2507 m_pFileBuf = NULL;
2508 }
2509 m_pFileBuf = FX_Alloc(FX_BYTE, m_BufSize);
2510 m_HeaderOffset = HeaderOffset;
2511 m_FileLen = pFileAccess->GetSize();
2512 m_Pos = 0;
2513 m_pFileAccess = pFileAccess;
2514 m_BufOffset = 0;
2515 pFileAccess->ReadBlock(m_pFileBuf, 0, (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize));
2516 }
GetDirectNum()2517 FX_INT32 CPDF_SyntaxParser::GetDirectNum()
2518 {
2519 GetNextWord();
2520 if (!m_bIsNumber) {
2521 return 0;
2522 }
2523 m_WordBuffer[m_WordSize] = 0;
2524 return FXSYS_atoi((FX_LPCSTR)m_WordBuffer);
2525 }
IsWholeWord(FX_FILESIZE startpos,FX_FILESIZE limit,FX_LPCBYTE tag,FX_DWORD taglen)2526 FX_BOOL CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, FX_FILESIZE limit, FX_LPCBYTE tag, FX_DWORD taglen)
2527 {
2528 FX_BYTE type = PDF_CharType[tag[0]];
2529 FX_BOOL bCheckLeft = type != 'D' && type != 'W';
2530 type = PDF_CharType[tag[taglen - 1]];
2531 FX_BOOL bCheckRight = type != 'D' && type != 'W';
2532 FX_BYTE ch;
2533 if (bCheckRight && startpos + (FX_INT32)taglen <= limit && GetCharAt(startpos + (FX_INT32)taglen, ch)) {
2534 FX_BYTE type = PDF_CharType[ch];
2535 if (type == 'N' || type == 'R') {
2536 return FALSE;
2537 }
2538 }
2539 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
2540 FX_BYTE type = PDF_CharType[ch];
2541 if (type == 'N' || type == 'R') {
2542 return FALSE;
2543 }
2544 }
2545 return TRUE;
2546 }
SearchWord(FX_BSTR tag,FX_BOOL bWholeWord,FX_BOOL bForward,FX_FILESIZE limit)2547 FX_BOOL CPDF_SyntaxParser::SearchWord(FX_BSTR tag, FX_BOOL bWholeWord, FX_BOOL bForward, FX_FILESIZE limit)
2548 {
2549 FX_INT32 taglen = tag.GetLength();
2550 if (taglen == 0) {
2551 return FALSE;
2552 }
2553 FX_FILESIZE pos = m_Pos;
2554 FX_INT32 offset = 0;
2555 if (!bForward) {
2556 offset = taglen - 1;
2557 }
2558 FX_LPCBYTE tag_data = tag.GetPtr();
2559 FX_BYTE byte;
2560 while (1) {
2561 if (bForward) {
2562 if (limit) {
2563 if (pos >= m_Pos + limit) {
2564 return FALSE;
2565 }
2566 }
2567 if (!GetCharAt(pos, byte)) {
2568 return FALSE;
2569 }
2570 } else {
2571 if (limit) {
2572 if (pos <= m_Pos - limit) {
2573 return FALSE;
2574 }
2575 }
2576 if (!GetCharAtBackward(pos, byte)) {
2577 return FALSE;
2578 }
2579 }
2580 if (byte == tag_data[offset]) {
2581 if (bForward) {
2582 offset ++;
2583 if (offset < taglen) {
2584 pos ++;
2585 continue;
2586 }
2587 } else {
2588 offset --;
2589 if (offset >= 0) {
2590 pos --;
2591 continue;
2592 }
2593 }
2594 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
2595 if (!bWholeWord || IsWholeWord(startpos, limit, tag.GetPtr(), taglen)) {
2596 m_Pos = startpos;
2597 return TRUE;
2598 }
2599 }
2600 if (bForward) {
2601 offset = byte == tag_data[0] ? 1 : 0;
2602 pos ++;
2603 } else {
2604 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
2605 pos --;
2606 }
2607 if (pos < 0) {
2608 return FALSE;
2609 }
2610 }
2611 return FALSE;
2612 }
2613 struct _SearchTagRecord {
2614 FX_LPCBYTE m_pTag;
2615 FX_DWORD m_Len;
2616 FX_DWORD m_Offset;
2617 };
SearchMultiWord(FX_BSTR tags,FX_BOOL bWholeWord,FX_FILESIZE limit)2618 FX_INT32 CPDF_SyntaxParser::SearchMultiWord(FX_BSTR tags, FX_BOOL bWholeWord, FX_FILESIZE limit)
2619 {
2620 FX_INT32 ntags = 1, i;
2621 for (i = 0; i < tags.GetLength(); i ++)
2622 if (tags[i] == 0) {
2623 ntags ++;
2624 }
2625 _SearchTagRecord* pPatterns = FX_Alloc(_SearchTagRecord, ntags);
2626 FX_DWORD start = 0, itag = 0, max_len = 0;
2627 for (i = 0; i <= tags.GetLength(); i ++) {
2628 if (tags[i] == 0) {
2629 FX_DWORD len = i - start;
2630 if (len > max_len) {
2631 max_len = len;
2632 }
2633 pPatterns[itag].m_pTag = tags.GetPtr() + start;
2634 pPatterns[itag].m_Len = len;
2635 pPatterns[itag].m_Offset = 0;
2636 start = i + 1;
2637 itag ++;
2638 }
2639 }
2640 FX_FILESIZE pos = m_Pos;
2641 FX_BYTE byte;
2642 GetCharAt(pos++, byte);
2643 FX_INT32 found = -1;
2644 while (1) {
2645 for (i = 0; i < ntags; i ++) {
2646 if (pPatterns[i].m_pTag[pPatterns[i].m_Offset] == byte) {
2647 pPatterns[i].m_Offset ++;
2648 if (pPatterns[i].m_Offset == pPatterns[i].m_Len) {
2649 if (!bWholeWord || IsWholeWord(pos - pPatterns[i].m_Len, limit, pPatterns[i].m_pTag, pPatterns[i].m_Len)) {
2650 found = i;
2651 goto end;
2652 } else {
2653 if (pPatterns[i].m_pTag[0] == byte) {
2654 pPatterns[i].m_Offset = 1;
2655 } else {
2656 pPatterns[i].m_Offset = 0;
2657 }
2658 }
2659 }
2660 } else {
2661 if (pPatterns[i].m_pTag[0] == byte) {
2662 pPatterns[i].m_Offset = 1;
2663 } else {
2664 pPatterns[i].m_Offset = 0;
2665 }
2666 }
2667 }
2668 if (limit && pos >= m_Pos + limit) {
2669 goto end;
2670 }
2671 if (!GetCharAt(pos, byte)) {
2672 goto end;
2673 }
2674 pos ++;
2675 }
2676 end:
2677 FX_Free(pPatterns);
2678 return found;
2679 }
FindTag(FX_BSTR tag,FX_FILESIZE limit)2680 FX_FILESIZE CPDF_SyntaxParser::FindTag(FX_BSTR tag, FX_FILESIZE limit)
2681 {
2682 FX_INT32 taglen = tag.GetLength();
2683 FX_INT32 match = 0;
2684 limit += m_Pos;
2685 FX_FILESIZE startpos = m_Pos;
2686 while (1) {
2687 FX_BYTE ch;
2688 if (!GetNextChar(ch)) {
2689 return -1;
2690 }
2691 if (ch == tag[match]) {
2692 match ++;
2693 if (match == taglen) {
2694 return m_Pos - startpos - taglen;
2695 }
2696 } else {
2697 match = ch == tag[0] ? 1 : 0;
2698 }
2699 if (limit && m_Pos == limit) {
2700 return -1;
2701 }
2702 }
2703 return -1;
2704 }
GetBinary(FX_BYTE * buffer,FX_DWORD size)2705 void CPDF_SyntaxParser::GetBinary(FX_BYTE* buffer, FX_DWORD size)
2706 {
2707 FX_DWORD offset = 0;
2708 FX_BYTE ch;
2709 while (1) {
2710 if (!GetNextChar(ch)) {
2711 return;
2712 }
2713 buffer[offset++] = ch;
2714 if (offset == size) {
2715 break;
2716 }
2717 }
2718 }
2719
2720 class CPDF_DataAvail FX_FINAL : public IPDF_DataAvail
2721 {
2722 public:
2723 CPDF_DataAvail(IFX_FileAvail* pFileAvail, IFX_FileRead* pFileRead);
2724 ~CPDF_DataAvail();
2725
2726 virtual FX_BOOL IsDocAvail(IFX_DownloadHints* pHints) FX_OVERRIDE;
2727
2728 virtual void SetDocument(CPDF_Document* pDoc) FX_OVERRIDE;
2729
2730 virtual FX_BOOL IsPageAvail(int iPage, IFX_DownloadHints* pHints) FX_OVERRIDE;
2731
2732 virtual FX_INT32 IsFormAvail(IFX_DownloadHints *pHints) FX_OVERRIDE;
2733
2734 virtual FX_INT32 IsLinearizedPDF() FX_OVERRIDE;
2735
IsLinearized()2736 virtual FX_BOOL IsLinearized() FX_OVERRIDE
2737 {
2738 return m_bLinearized;
2739 }
2740
2741 virtual void GetLinearizedMainXRefInfo(FX_FILESIZE *pPos, FX_DWORD *pSize) FX_OVERRIDE;
2742
2743 protected:
2744 static const int kMaxDataAvailRecursionDepth = 64;
2745 static int s_CurrentDataAvailRecursionDepth;
2746
2747 FX_DWORD GetObjectSize(FX_DWORD objnum, FX_FILESIZE& offset);
2748 FX_BOOL IsObjectsAvail(CFX_PtrArray& obj_array, FX_BOOL bParsePage, IFX_DownloadHints* pHints, CFX_PtrArray &ret_array);
2749 FX_BOOL CheckDocStatus(IFX_DownloadHints *pHints);
2750 FX_BOOL CheckHeader(IFX_DownloadHints* pHints);
2751 FX_BOOL CheckFirstPage(IFX_DownloadHints *pHints);
2752 FX_BOOL CheckEnd(IFX_DownloadHints *pHints);
2753 FX_BOOL CheckCrossRef(IFX_DownloadHints* pHints);
2754 FX_BOOL CheckCrossRefItem(IFX_DownloadHints *pHints);
2755 FX_BOOL CheckTrailer(IFX_DownloadHints* pHints);
2756 FX_BOOL CheckRoot(IFX_DownloadHints* pHints);
2757 FX_BOOL CheckInfo(IFX_DownloadHints* pHints);
2758 FX_BOOL CheckPages(IFX_DownloadHints* pHints);
2759 FX_BOOL CheckPage(IFX_DownloadHints* pHints);
2760 FX_BOOL CheckResources(IFX_DownloadHints* pHints);
2761 FX_BOOL CheckAnnots(IFX_DownloadHints* pHints);
2762 FX_BOOL CheckAcroForm(IFX_DownloadHints* pHints);
2763 FX_BOOL CheckAcroFormSubObject(IFX_DownloadHints* pHints);
2764 FX_BOOL CheckTrailerAppend(IFX_DownloadHints* pHints);
2765 FX_BOOL CheckPageStatus(IFX_DownloadHints* pHints);
2766 FX_BOOL CheckAllCrossRefStream(IFX_DownloadHints *pHints);
2767
2768 FX_INT32 CheckCrossRefStream(IFX_DownloadHints *pHints, FX_FILESIZE &xref_offset);
2769 FX_BOOL IsLinearizedFile(FX_LPBYTE pData, FX_DWORD dwLen);
2770 void SetStartOffset(FX_FILESIZE dwOffset);
2771 FX_BOOL GetNextToken(CFX_ByteString &token);
2772 FX_BOOL GetNextChar(FX_BYTE &ch);
2773 CPDF_Object * ParseIndirectObjectAt(FX_FILESIZE pos, FX_DWORD objnum);
2774 CPDF_Object * GetObject(FX_DWORD objnum, IFX_DownloadHints* pHints, FX_BOOL *pExistInFile);
2775 FX_BOOL GetPageKids(CPDF_Parser *pParser, CPDF_Object *pPages);
2776 FX_BOOL PreparePageItem();
2777 FX_BOOL LoadPages(IFX_DownloadHints* pHints);
2778 FX_BOOL LoadAllXref(IFX_DownloadHints* pHints);
2779 FX_BOOL LoadAllFile(IFX_DownloadHints* pHints);
2780 FX_BOOL CheckLinearizedData(IFX_DownloadHints* pHints);
2781 FX_BOOL CheckFileResources(IFX_DownloadHints* pHints);
2782 FX_BOOL CheckPageAnnots(int iPage, IFX_DownloadHints* pHints);
2783
2784 FX_BOOL CheckLinearizedFirstPage(int iPage, IFX_DownloadHints* pHints);
2785 FX_BOOL HaveResourceAncestor(CPDF_Dictionary *pDict);
2786 FX_BOOL CheckPage(FX_INT32 iPage, IFX_DownloadHints* pHints);
2787 FX_BOOL LoadDocPages(IFX_DownloadHints* pHints);
2788 FX_BOOL LoadDocPage(FX_INT32 iPage, IFX_DownloadHints* pHints);
2789 FX_BOOL CheckPageNode(CPDF_PageNode &pageNodes, FX_INT32 iPage, FX_INT32 &iCount, IFX_DownloadHints* pHints);
2790 FX_BOOL CheckUnkownPageNode(FX_DWORD dwPageNo, CPDF_PageNode *pPageNode, IFX_DownloadHints* pHints);
2791 FX_BOOL CheckArrayPageNode(FX_DWORD dwPageNo, CPDF_PageNode *pPageNode, IFX_DownloadHints* pHints);
2792 FX_BOOL CheckPageCount(IFX_DownloadHints* pHints);
2793 FX_BOOL IsFirstCheck(int iPage);
2794 void ResetFirstCheck(int iPage);
2795
2796 CPDF_Parser m_parser;
2797
2798 CPDF_SyntaxParser m_syntaxParser;
2799
2800 CPDF_Object *m_pRoot;
2801
2802 FX_DWORD m_dwRootObjNum;
2803
2804 FX_DWORD m_dwInfoObjNum;
2805
2806 CPDF_Object *m_pLinearized;
2807
2808 CPDF_Object *m_pTrailer;
2809
2810 FX_BOOL m_bDocAvail;
2811
2812 FX_FILESIZE m_dwHeaderOffset;
2813
2814 FX_FILESIZE m_dwLastXRefOffset;
2815
2816 FX_FILESIZE m_dwXRefOffset;
2817
2818 FX_FILESIZE m_dwTrailerOffset;
2819
2820 FX_FILESIZE m_dwCurrentOffset;
2821
2822 PDF_DATAAVAIL_STATUS m_docStatus;
2823
2824 FX_FILESIZE m_dwFileLen;
2825
2826 CPDF_Document* m_pDocument;
2827
2828 CPDF_SortObjNumArray m_objnum_array;
2829
2830 CFX_PtrArray m_objs_array;
2831
2832 FX_FILESIZE m_Pos;
2833
2834 FX_FILESIZE m_bufferOffset;
2835
2836 FX_DWORD m_bufferSize;
2837
2838 CFX_ByteString m_WordBuf;
2839
2840 FX_BYTE m_WordBuffer[257];
2841
2842 FX_DWORD m_WordSize;
2843
2844 FX_BYTE m_bufferData[512];
2845
2846 CFX_FileSizeArray m_CrossOffset;
2847
2848 CFX_DWordArray m_XRefStreamList;
2849
2850 CFX_DWordArray m_PageObjList;
2851
2852 FX_DWORD m_PagesObjNum;
2853
2854 FX_BOOL m_bLinearized;
2855
2856 FX_DWORD m_dwFirstPageNo;
2857
2858 FX_BOOL m_bLinearedDataOK;
2859
2860 FX_BOOL m_bMainXRefLoadTried;
2861
2862 FX_BOOL m_bMainXRefLoadedOK;
2863
2864 FX_BOOL m_bPagesTreeLoad;
2865
2866 FX_BOOL m_bPagesLoad;
2867
2868 CPDF_Parser * m_pCurrentParser;
2869
2870 FX_FILESIZE m_dwCurrentXRefSteam;
2871
2872 FX_BOOL m_bAnnotsLoad;
2873
2874 FX_BOOL m_bHaveAcroForm;
2875
2876 FX_DWORD m_dwAcroFormObjNum;
2877
2878 FX_BOOL m_bAcroFormLoad;
2879
2880 CPDF_Object * m_pAcroForm;
2881
2882 CFX_PtrArray m_arrayAcroforms;
2883
2884 CPDF_Dictionary * m_pPageDict;
2885
2886 CPDF_Object * m_pPageResource;
2887
2888 FX_BOOL m_bNeedDownLoadResource;
2889
2890 FX_BOOL m_bPageLoadedOK;
2891
2892 FX_BOOL m_bLinearizedFormParamLoad;
2893
2894 CFX_PtrArray m_PagesArray;
2895
2896 FX_DWORD m_dwEncryptObjNum;
2897
2898 FX_FILESIZE m_dwPrevXRefOffset;
2899
2900 FX_BOOL m_bTotalLoadPageTree;
2901
2902 FX_BOOL m_bCurPageDictLoadOK;
2903
2904 CPDF_PageNode m_pageNodes;
2905
2906 CFX_CMapDWordToDWord * m_pageMapCheckState;
2907
2908 CFX_CMapDWordToDWord * m_pagesLoadState;
2909 };
2910
IPDF_DataAvail(IFX_FileAvail * pFileAvail,IFX_FileRead * pFileRead)2911 IPDF_DataAvail::IPDF_DataAvail(IFX_FileAvail* pFileAvail, IFX_FileRead* pFileRead) :
2912 m_pFileAvail(pFileAvail),
2913 m_pFileRead(pFileRead) {
2914 }
2915
2916 // static
Create(IFX_FileAvail * pFileAvail,IFX_FileRead * pFileRead)2917 IPDF_DataAvail* IPDF_DataAvail::Create(IFX_FileAvail* pFileAvail, IFX_FileRead* pFileRead)
2918 {
2919 return new CPDF_DataAvail(pFileAvail, pFileRead);
2920 }
2921
2922 // static
2923 int CPDF_DataAvail::s_CurrentDataAvailRecursionDepth = 0;
2924
CPDF_DataAvail(IFX_FileAvail * pFileAvail,IFX_FileRead * pFileRead)2925 CPDF_DataAvail::CPDF_DataAvail(IFX_FileAvail* pFileAvail, IFX_FileRead* pFileRead)
2926 : IPDF_DataAvail(pFileAvail, pFileRead)
2927 {
2928 m_Pos = 0;
2929 m_dwFileLen = 0;
2930 if (m_pFileRead) {
2931 m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize();
2932 }
2933 m_dwCurrentOffset = 0;
2934 m_WordSize = 0;
2935 m_dwXRefOffset = 0;
2936 m_bufferOffset = 0;
2937 m_dwFirstPageNo = 0;
2938 m_bufferSize = 0;
2939 m_PagesObjNum = 0;
2940 m_dwCurrentXRefSteam = 0;
2941 m_dwAcroFormObjNum = 0;
2942 m_dwInfoObjNum = 0;
2943 m_pDocument = 0;
2944 m_dwEncryptObjNum = 0;
2945 m_dwPrevXRefOffset = 0;
2946 m_dwLastXRefOffset = 0;
2947 m_bDocAvail = FALSE;
2948 m_bMainXRefLoadTried = FALSE;
2949 m_bDocAvail = FALSE;
2950 m_bLinearized = FALSE;
2951 m_bPagesLoad = FALSE;
2952 m_bPagesTreeLoad = FALSE;
2953 m_bMainXRefLoadedOK = FALSE;
2954 m_bAnnotsLoad = FALSE;
2955 m_bHaveAcroForm = FALSE;
2956 m_bAcroFormLoad = FALSE;
2957 m_bPageLoadedOK = FALSE;
2958 m_bNeedDownLoadResource = FALSE;
2959 m_bLinearizedFormParamLoad = FALSE;
2960 m_pLinearized = NULL;
2961 m_pRoot = NULL;
2962 m_pTrailer = NULL;
2963 m_pCurrentParser = NULL;
2964 m_pAcroForm = NULL;
2965 m_pPageDict = NULL;
2966 m_pPageResource = NULL;
2967 m_pageMapCheckState = NULL;
2968 m_docStatus = PDF_DATAAVAIL_HEADER;
2969 m_parser.m_bOwnFileRead = FALSE;
2970 m_bTotalLoadPageTree = FALSE;
2971 m_bCurPageDictLoadOK = FALSE;
2972 m_bLinearedDataOK = FALSE;
2973 m_pagesLoadState = NULL;
2974 }
~CPDF_DataAvail()2975 CPDF_DataAvail::~CPDF_DataAvail()
2976 {
2977 if (m_pLinearized) {
2978 m_pLinearized->Release();
2979 }
2980 if (m_pRoot) {
2981 m_pRoot->Release();
2982 }
2983 if (m_pTrailer) {
2984 m_pTrailer->Release();
2985 }
2986 if (m_pageMapCheckState) {
2987 delete m_pageMapCheckState;
2988 }
2989 if (m_pagesLoadState) {
2990 delete m_pagesLoadState;
2991 }
2992 FX_INT32 i = 0;
2993 FX_INT32 iSize = m_arrayAcroforms.GetSize();
2994 for (i = 0; i < iSize; ++i) {
2995 ((CPDF_Object *)m_arrayAcroforms.GetAt(i))->Release();
2996 }
2997 }
SetDocument(CPDF_Document * pDoc)2998 void CPDF_DataAvail::SetDocument(CPDF_Document* pDoc)
2999 {
3000 m_pDocument = pDoc;
3001 }
GetObjectSize(FX_DWORD objnum,FX_FILESIZE & offset)3002 FX_DWORD CPDF_DataAvail::GetObjectSize(FX_DWORD objnum, FX_FILESIZE& offset)
3003 {
3004 CPDF_Parser *pParser = (CPDF_Parser *)(m_pDocument->GetParser());
3005 if (pParser == NULL) {
3006 return 0;
3007 }
3008 if (objnum >= (FX_DWORD)pParser->m_CrossRef.GetSize()) {
3009 return 0;
3010 }
3011 if (pParser->m_V5Type[objnum] == 2) {
3012 objnum = (FX_DWORD)pParser->m_CrossRef[objnum];
3013 }
3014 if (pParser->m_V5Type[objnum] == 1 || pParser->m_V5Type[objnum] == 255) {
3015 offset = pParser->m_CrossRef[objnum];
3016 if (offset == 0) {
3017 return 0;
3018 }
3019 FX_LPVOID pResult = FXSYS_bsearch(&offset, pParser->m_SortedOffset.GetData(), pParser->m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
3020 if (pResult == NULL) {
3021 return 0;
3022 }
3023 if ((FX_FILESIZE*)pResult - (FX_FILESIZE*)pParser->m_SortedOffset.GetData() == pParser->m_SortedOffset.GetSize() - 1) {
3024 return 0;
3025 }
3026 return (FX_DWORD)(((FX_FILESIZE*)pResult)[1] - offset);
3027 }
3028 return 0;
3029 }
IsObjectsAvail(CFX_PtrArray & obj_array,FX_BOOL bParsePage,IFX_DownloadHints * pHints,CFX_PtrArray & ret_array)3030 FX_BOOL CPDF_DataAvail::IsObjectsAvail(CFX_PtrArray& obj_array, FX_BOOL bParsePage, IFX_DownloadHints* pHints, CFX_PtrArray &ret_array)
3031 {
3032 if (!obj_array.GetSize()) {
3033 return TRUE;
3034 }
3035 FX_DWORD count = 0;
3036 CFX_PtrArray new_obj_array;
3037 FX_INT32 i = 0;
3038 for (i = 0; i < obj_array.GetSize(); i++) {
3039 CPDF_Object *pObj = (CPDF_Object *)obj_array[i];
3040 if (!pObj) {
3041 continue;
3042 }
3043 FX_INT32 type = pObj->GetType();
3044 switch (type) {
3045 case PDFOBJ_ARRAY: {
3046 CPDF_Array *pArray = pObj->GetArray();
3047 for (FX_DWORD k = 0; k < pArray->GetCount(); k++) {
3048 new_obj_array.Add(pArray->GetElement(k));
3049 }
3050 }
3051 break;
3052 case PDFOBJ_STREAM:
3053 pObj = pObj->GetDict();
3054 case PDFOBJ_DICTIONARY: {
3055 CPDF_Dictionary *pDict = pObj->GetDict();
3056 if (pDict && pDict->GetString("Type") == "Page" && !bParsePage) {
3057 continue;
3058 }
3059 FX_POSITION pos = pDict->GetStartPos();
3060 while (pos) {
3061 CPDF_Object *value;
3062 CFX_ByteString key;
3063 value = pDict->GetNextElement(pos, key);
3064 if (key != "Parent") {
3065 new_obj_array.Add(value);
3066 }
3067 }
3068 }
3069 break;
3070 case PDFOBJ_REFERENCE: {
3071 CPDF_Reference *pRef = (CPDF_Reference*)pObj;
3072 FX_DWORD dwNum = pRef->GetRefObjNum();
3073 FX_FILESIZE offset;
3074 FX_DWORD original_size = GetObjectSize(dwNum, offset);
3075 pdfium::base::CheckedNumeric<FX_DWORD> size = original_size;
3076 if (size.ValueOrDefault(0) == 0 || offset < 0 || offset >= m_dwFileLen) {
3077 break;
3078 }
3079
3080 size += offset;
3081 size += 512;
3082 if (!size.IsValid()) {
3083 break;
3084 }
3085 if (size.ValueOrDie() > m_dwFileLen) {
3086 size = m_dwFileLen - offset;
3087 } else {
3088 size = original_size + 512;
3089 }
3090 if (!size.IsValid()) {
3091 break;
3092 }
3093 if (!m_pFileAvail->IsDataAvail(offset, size.ValueOrDie())) {
3094 pHints->AddSegment(offset, size.ValueOrDie());
3095 ret_array.Add(pObj);
3096 count++;
3097 } else if (!m_objnum_array.Find(dwNum)) {
3098 m_objnum_array.AddObjNum(dwNum);
3099 CPDF_Object *pReferred = m_pDocument->GetIndirectObject(pRef->GetRefObjNum(), NULL);
3100 if (pReferred) {
3101 new_obj_array.Add(pReferred);
3102 }
3103 }
3104 }
3105 break;
3106 }
3107 }
3108 if (count > 0) {
3109 FX_INT32 iSize = new_obj_array.GetSize();
3110 for (i = 0; i < iSize; ++i) {
3111 CPDF_Object *pObj = (CPDF_Object *)new_obj_array[i];
3112 FX_INT32 type = pObj->GetType();
3113 if (type == PDFOBJ_REFERENCE) {
3114 CPDF_Reference *pRef = (CPDF_Reference *)pObj;
3115 FX_DWORD dwNum = pRef->GetRefObjNum();
3116 if (!m_objnum_array.Find(dwNum)) {
3117 ret_array.Add(pObj);
3118 }
3119 } else {
3120 ret_array.Add(pObj);
3121 }
3122 }
3123 return FALSE;
3124 }
3125 obj_array.RemoveAll();
3126 obj_array.Append(new_obj_array);
3127 return IsObjectsAvail(obj_array, FALSE, pHints, ret_array);
3128 }
IsDocAvail(IFX_DownloadHints * pHints)3129 FX_BOOL CPDF_DataAvail::IsDocAvail(IFX_DownloadHints* pHints)
3130 {
3131 if (!m_dwFileLen && m_pFileRead) {
3132 m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize();
3133 if (!m_dwFileLen) {
3134 return TRUE;
3135 }
3136 }
3137 while (!m_bDocAvail) {
3138 if (!CheckDocStatus(pHints)) {
3139 return FALSE;
3140 }
3141 }
3142 return TRUE;
3143 }
CheckAcroFormSubObject(IFX_DownloadHints * pHints)3144 FX_BOOL CPDF_DataAvail::CheckAcroFormSubObject(IFX_DownloadHints* pHints)
3145 {
3146 if (!m_objs_array.GetSize()) {
3147 m_objs_array.RemoveAll();
3148 m_objnum_array.RemoveAll();
3149 CFX_PtrArray obj_array;
3150 obj_array.Append(m_arrayAcroforms);
3151 FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array);
3152 if (bRet) {
3153 m_objs_array.RemoveAll();
3154 }
3155 return bRet;
3156 } else {
3157 CFX_PtrArray new_objs_array;
3158 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
3159 if (bRet) {
3160 FX_INT32 iSize = m_arrayAcroforms.GetSize();
3161 for (FX_INT32 i = 0; i < iSize; ++i) {
3162 ((CPDF_Object *)m_arrayAcroforms.GetAt(i))->Release();
3163 }
3164 m_arrayAcroforms.RemoveAll();
3165 } else {
3166 m_objs_array.RemoveAll();
3167 m_objs_array.Append(new_objs_array);
3168 }
3169 return bRet;
3170 }
3171 }
CheckAcroForm(IFX_DownloadHints * pHints)3172 FX_BOOL CPDF_DataAvail::CheckAcroForm(IFX_DownloadHints* pHints)
3173 {
3174 FX_BOOL bExist = FALSE;
3175 m_pAcroForm = GetObject(m_dwAcroFormObjNum, pHints, &bExist);
3176 if (!bExist) {
3177 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3178 return TRUE;
3179 }
3180 if (!m_pAcroForm) {
3181 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3182 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3183 return TRUE;
3184 }
3185 return FALSE;
3186 }
3187 m_arrayAcroforms.Add(m_pAcroForm);
3188 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3189 return TRUE;
3190 }
CheckDocStatus(IFX_DownloadHints * pHints)3191 FX_BOOL CPDF_DataAvail::CheckDocStatus(IFX_DownloadHints *pHints)
3192 {
3193 switch (m_docStatus) {
3194 case PDF_DATAAVAIL_HEADER:
3195 return CheckHeader(pHints);
3196 case PDF_DATAAVAIL_FIRSTPAGE:
3197 case PDF_DATAAVAIL_FIRSTPAGE_PREPARE:
3198 return CheckFirstPage(pHints);
3199 case PDF_DATAAVAIL_END:
3200 return CheckEnd(pHints);
3201 case PDF_DATAAVAIL_CROSSREF:
3202 return CheckCrossRef(pHints);
3203 case PDF_DATAAVAIL_CROSSREF_ITEM:
3204 return CheckCrossRefItem(pHints);
3205 case PDF_DATAAVAIL_CROSSREF_STREAM:
3206 return CheckAllCrossRefStream(pHints);
3207 case PDF_DATAAVAIL_TRAILER:
3208 return CheckTrailer(pHints);
3209 case PDF_DATAAVAIL_TRAILER_APPEND:
3210 return CheckTrailerAppend(pHints);
3211 case PDF_DATAAVAIL_LOADALLCRSOSSREF:
3212 return LoadAllXref(pHints);
3213 case PDF_DATAAVAIL_LOADALLFILE:
3214 return LoadAllFile(pHints);
3215 case PDF_DATAAVAIL_ROOT:
3216 return CheckRoot(pHints);
3217 case PDF_DATAAVAIL_INFO:
3218 return CheckInfo(pHints);
3219 case PDF_DATAAVAIL_ACROFORM:
3220 return CheckAcroForm(pHints);
3221 case PDF_DATAAVAIL_PAGETREE:
3222 if (m_bTotalLoadPageTree) {
3223 return CheckPages(pHints);
3224 } else {
3225 return LoadDocPages(pHints);
3226 }
3227 case PDF_DATAAVAIL_PAGE:
3228 if (m_bTotalLoadPageTree) {
3229 return CheckPage(pHints);
3230 } else {
3231 m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD;
3232 return TRUE;
3233 }
3234 case PDF_DATAAVAIL_ERROR:
3235 return LoadAllFile(pHints);
3236 case PDF_DATAAVAIL_PAGE_LATERLOAD:
3237 m_docStatus = PDF_DATAAVAIL_PAGE;
3238 default:
3239 m_bDocAvail = TRUE;
3240 return TRUE;
3241 }
3242 }
CheckPageStatus(IFX_DownloadHints * pHints)3243 FX_BOOL CPDF_DataAvail::CheckPageStatus(IFX_DownloadHints* pHints)
3244 {
3245 switch (m_docStatus) {
3246 case PDF_DATAAVAIL_PAGETREE:
3247 return CheckPages(pHints);
3248 case PDF_DATAAVAIL_PAGE:
3249 return CheckPage(pHints);
3250 case PDF_DATAAVAIL_ERROR:
3251 return LoadAllFile(pHints);
3252 default:
3253 m_bPagesTreeLoad = TRUE;
3254 m_bPagesLoad = TRUE;
3255 return TRUE;
3256 }
3257 }
LoadAllFile(IFX_DownloadHints * pHints)3258 FX_BOOL CPDF_DataAvail::LoadAllFile(IFX_DownloadHints* pHints)
3259 {
3260 if (m_pFileAvail->IsDataAvail(0, (FX_DWORD)m_dwFileLen)) {
3261 m_docStatus = PDF_DATAAVAIL_DONE;
3262 return TRUE;
3263 }
3264 pHints->AddSegment(0, (FX_DWORD)m_dwFileLen);
3265 return FALSE;
3266 }
LoadAllXref(IFX_DownloadHints * pHints)3267 FX_BOOL CPDF_DataAvail::LoadAllXref(IFX_DownloadHints* pHints)
3268 {
3269 m_parser.m_Syntax.InitParser(m_pFileRead, (FX_DWORD)m_dwHeaderOffset);
3270 m_parser.m_bOwnFileRead = FALSE;
3271 if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) && !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) {
3272 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3273 return FALSE;
3274 }
3275 FXSYS_qsort(m_parser.m_SortedOffset.GetData(), m_parser.m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), _CompareFileSize);
3276 m_dwRootObjNum = m_parser.GetRootObjNum();
3277 m_dwInfoObjNum = m_parser.GetInfoObjNum();
3278 m_pCurrentParser = &m_parser;
3279 m_docStatus = PDF_DATAAVAIL_ROOT;
3280 return TRUE;
3281 }
GetObject(FX_DWORD objnum,IFX_DownloadHints * pHints,FX_BOOL * pExistInFile)3282 CPDF_Object* CPDF_DataAvail::GetObject(FX_DWORD objnum, IFX_DownloadHints* pHints, FX_BOOL *pExistInFile)
3283 {
3284 CPDF_Object *pRet = NULL;
3285 FX_DWORD original_size = 0;
3286 FX_FILESIZE offset = 0;
3287 CPDF_Parser *pParser = NULL;
3288
3289 if (pExistInFile) {
3290 *pExistInFile = TRUE;
3291 }
3292
3293 if (m_pDocument == NULL) {
3294 original_size = (FX_DWORD)m_parser.GetObjectSize(objnum);
3295 offset = m_parser.GetObjectOffset(objnum);
3296 pParser = &m_parser;
3297 } else {
3298 original_size = GetObjectSize(objnum, offset);
3299 pParser = (CPDF_Parser *)(m_pDocument->GetParser());
3300 }
3301
3302 pdfium::base::CheckedNumeric<FX_DWORD> size = original_size;
3303 if (size.ValueOrDefault(0) == 0 || offset < 0 || offset >= m_dwFileLen) {
3304 if (pExistInFile)
3305 *pExistInFile = FALSE;
3306
3307 return NULL;
3308 }
3309
3310 size += offset;
3311 size += 512;
3312 if (!size.IsValid()) {
3313 return NULL;
3314 }
3315
3316 if (size.ValueOrDie() > m_dwFileLen) {
3317 size = m_dwFileLen - offset;
3318 } else {
3319 size = original_size + 512;
3320 }
3321
3322 if (!size.IsValid()) {
3323 return NULL;
3324 }
3325
3326 if (!m_pFileAvail->IsDataAvail(offset, size.ValueOrDie())) {
3327 pHints->AddSegment(offset, size.ValueOrDie());
3328 return NULL;
3329 }
3330
3331 if (pParser) {
3332 pRet = pParser->ParseIndirectObject(NULL, objnum, NULL);
3333 }
3334
3335 if (!pRet && pExistInFile) {
3336 *pExistInFile = FALSE;
3337 }
3338
3339 return pRet;
3340 }
3341
CheckInfo(IFX_DownloadHints * pHints)3342 FX_BOOL CPDF_DataAvail::CheckInfo(IFX_DownloadHints* pHints)
3343 {
3344 FX_BOOL bExist = FALSE;
3345 CPDF_Object *pInfo = GetObject(m_dwInfoObjNum, pHints, &bExist);
3346 if (!bExist) {
3347 if (m_bHaveAcroForm) {
3348 m_docStatus = PDF_DATAAVAIL_ACROFORM;
3349 } else {
3350 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3351 }
3352 return TRUE;
3353 }
3354 if (!pInfo) {
3355 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3356 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3357 return TRUE;
3358 }
3359 if (m_Pos == m_dwFileLen) {
3360 m_docStatus = PDF_DATAAVAIL_ERROR;
3361 }
3362 return FALSE;
3363 }
3364 if (pInfo) {
3365 pInfo->Release();
3366 }
3367 if (m_bHaveAcroForm) {
3368 m_docStatus = PDF_DATAAVAIL_ACROFORM;
3369 } else {
3370 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3371 }
3372 return TRUE;
3373 }
CheckRoot(IFX_DownloadHints * pHints)3374 FX_BOOL CPDF_DataAvail::CheckRoot(IFX_DownloadHints* pHints)
3375 {
3376 FX_BOOL bExist = FALSE;
3377 m_pRoot = GetObject(m_dwRootObjNum, pHints, &bExist);
3378 if (!bExist) {
3379 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3380 return TRUE;
3381 }
3382 if (!m_pRoot) {
3383 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3384 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3385 return TRUE;
3386 }
3387 return FALSE;
3388 }
3389 CPDF_Dictionary* pDict = m_pRoot->GetDict();
3390 if (!pDict) {
3391 m_docStatus = PDF_DATAAVAIL_ERROR;
3392 return FALSE;
3393 }
3394 CPDF_Reference* pRef = (CPDF_Reference*)pDict->GetElement(FX_BSTRC("Pages"));
3395 if (pRef == NULL || pRef->GetType() != PDFOBJ_REFERENCE) {
3396 m_docStatus = PDF_DATAAVAIL_ERROR;
3397 return FALSE;
3398 }
3399 m_PagesObjNum = pRef->GetRefObjNum();
3400 CPDF_Reference* pAcroFormRef = (CPDF_Reference*)m_pRoot->GetDict()->GetElement(FX_BSTRC("AcroForm"));
3401 if (pAcroFormRef && pAcroFormRef->GetType() == PDFOBJ_REFERENCE) {
3402 m_bHaveAcroForm = TRUE;
3403 m_dwAcroFormObjNum = pAcroFormRef->GetRefObjNum();
3404 }
3405 if (m_dwInfoObjNum) {
3406 m_docStatus = PDF_DATAAVAIL_INFO;
3407 } else {
3408 if (m_bHaveAcroForm) {
3409 m_docStatus = PDF_DATAAVAIL_ACROFORM;
3410 } else {
3411 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3412 }
3413 }
3414 return TRUE;
3415 }
PreparePageItem()3416 FX_BOOL CPDF_DataAvail::PreparePageItem()
3417 {
3418 CPDF_Dictionary *pRoot = m_pDocument->GetRoot();
3419 CPDF_Reference* pRef = pRoot ? (CPDF_Reference*)pRoot->GetElement(FX_BSTRC("Pages")) : NULL;
3420 if (pRef == NULL || pRef->GetType() != PDFOBJ_REFERENCE) {
3421 m_docStatus = PDF_DATAAVAIL_ERROR;
3422 return FALSE;
3423 }
3424 m_PagesObjNum = pRef->GetRefObjNum();
3425 m_pCurrentParser = (CPDF_Parser *)m_pDocument->GetParser();
3426 m_docStatus = PDF_DATAAVAIL_PAGETREE;
3427 return TRUE;
3428 }
IsFirstCheck(int iPage)3429 FX_BOOL CPDF_DataAvail::IsFirstCheck(int iPage)
3430 {
3431 if (NULL == m_pageMapCheckState) {
3432 m_pageMapCheckState = new CFX_CMapDWordToDWord();
3433 }
3434 FX_DWORD dwValue = 0;
3435 if (!m_pageMapCheckState->Lookup(iPage, dwValue)) {
3436 m_pageMapCheckState->SetAt(iPage, 1);
3437 return TRUE;
3438 }
3439 if (dwValue != 0) {
3440 return FALSE;
3441 }
3442 m_pageMapCheckState->SetAt(iPage, 1);
3443 return TRUE;
3444 }
ResetFirstCheck(int iPage)3445 void CPDF_DataAvail::ResetFirstCheck(int iPage)
3446 {
3447 if (NULL == m_pageMapCheckState) {
3448 m_pageMapCheckState = new CFX_CMapDWordToDWord();
3449 }
3450 FX_DWORD dwValue = 1;
3451 if (!m_pageMapCheckState->Lookup(iPage, dwValue)) {
3452 return;
3453 }
3454 m_pageMapCheckState->SetAt(iPage, 0);
3455 }
CheckPage(IFX_DownloadHints * pHints)3456 FX_BOOL CPDF_DataAvail::CheckPage(IFX_DownloadHints* pHints)
3457 {
3458 FX_DWORD iPageObjs = m_PageObjList.GetSize();
3459 CFX_DWordArray UnavailObjList;
3460 for (FX_DWORD i = 0; i < iPageObjs; ++i) {
3461 FX_DWORD dwPageObjNum = m_PageObjList.GetAt(i);
3462 FX_BOOL bExist = FALSE;
3463 CPDF_Object *pObj = GetObject(dwPageObjNum, pHints, &bExist);
3464 if (!pObj) {
3465 if (bExist) {
3466 UnavailObjList.Add(dwPageObjNum);
3467 }
3468 continue;
3469 }
3470 if (pObj->GetType() == PDFOBJ_ARRAY) {
3471 CPDF_Array *pArray = pObj->GetArray();
3472 if (pArray) {
3473 FX_INT32 iSize = pArray->GetCount();
3474 CPDF_Object *pItem = NULL;
3475 for (FX_INT32 j = 0; j < iSize; ++j) {
3476 pItem = pArray->GetElement(j);
3477 if (pItem && pItem->GetType() == PDFOBJ_REFERENCE) {
3478 UnavailObjList.Add(((CPDF_Reference *)pItem)->GetRefObjNum());
3479 }
3480 }
3481 }
3482 }
3483 if (pObj->GetType() != PDFOBJ_DICTIONARY) {
3484 pObj->Release();
3485 continue;
3486 }
3487 CFX_ByteString type = pObj->GetDict()->GetString(FX_BSTRC("Type"));
3488 if (type == FX_BSTRC("Pages")) {
3489 m_PagesArray.Add(pObj);
3490 continue;
3491 }
3492 pObj->Release();
3493 }
3494 m_PageObjList.RemoveAll();
3495 if (UnavailObjList.GetSize()) {
3496 m_PageObjList.Append(UnavailObjList);
3497 return FALSE;
3498 }
3499 FX_DWORD iPages = m_PagesArray.GetSize();
3500 for (FX_DWORD i = 0; i < iPages; i++) {
3501 CPDF_Object *pPages = (CPDF_Object *)m_PagesArray.GetAt(i);
3502 if (!pPages) {
3503 continue;
3504 }
3505 if (!GetPageKids(m_pCurrentParser, pPages)) {
3506 pPages->Release();
3507 while (++i < iPages) {
3508 pPages = (CPDF_Object *)m_PagesArray.GetAt(i);
3509 pPages->Release();
3510 }
3511 m_PagesArray.RemoveAll();
3512 m_docStatus = PDF_DATAAVAIL_ERROR;
3513 return FALSE;
3514 }
3515 pPages->Release();
3516 }
3517 m_PagesArray.RemoveAll();
3518 if (!m_PageObjList.GetSize()) {
3519 m_docStatus = PDF_DATAAVAIL_DONE;
3520 }
3521 return TRUE;
3522 }
GetPageKids(CPDF_Parser * pParser,CPDF_Object * pPages)3523 FX_BOOL CPDF_DataAvail::GetPageKids(CPDF_Parser *pParser, CPDF_Object *pPages)
3524 {
3525 if (!pParser) {
3526 m_docStatus = PDF_DATAAVAIL_ERROR;
3527 return FALSE;
3528 }
3529 CPDF_Dictionary* pDict = pPages->GetDict();
3530 CPDF_Object *pKids = pDict ? pDict->GetElement(FX_BSTRC("Kids")) : NULL;
3531 if (!pKids) {
3532 return TRUE;
3533 }
3534 switch (pKids->GetType()) {
3535 case PDFOBJ_REFERENCE: {
3536 CPDF_Reference *pKid = (CPDF_Reference *)pKids;
3537 m_PageObjList.Add(pKid->GetRefObjNum());
3538 }
3539 break;
3540 case PDFOBJ_ARRAY: {
3541 CPDF_Array *pKidsArray = (CPDF_Array *)pKids;
3542 for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) {
3543 CPDF_Object *pKid = (CPDF_Object *)pKidsArray->GetElement(i);
3544 if (pKid && pKid->GetType() == PDFOBJ_REFERENCE) {
3545 m_PageObjList.Add(((CPDF_Reference *)pKid)->GetRefObjNum());
3546 }
3547 }
3548 }
3549 break;
3550 default:
3551 m_docStatus = PDF_DATAAVAIL_ERROR;
3552 return FALSE;
3553 }
3554 return TRUE;
3555 }
CheckPages(IFX_DownloadHints * pHints)3556 FX_BOOL CPDF_DataAvail::CheckPages(IFX_DownloadHints* pHints)
3557 {
3558 FX_BOOL bExist = FALSE;
3559 CPDF_Object *pPages = GetObject(m_PagesObjNum, pHints, &bExist);
3560 if (!bExist) {
3561 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3562 return TRUE;
3563 }
3564 if (!pPages) {
3565 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3566 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3567 return TRUE;
3568 }
3569 return FALSE;
3570 }
3571 if (!GetPageKids(m_pCurrentParser, pPages)) {
3572 pPages->Release();
3573 m_docStatus = PDF_DATAAVAIL_ERROR;
3574 return FALSE;
3575 }
3576 pPages->Release();
3577 m_docStatus = PDF_DATAAVAIL_PAGE;
3578 return TRUE;
3579 }
CheckHeader(IFX_DownloadHints * pHints)3580 FX_BOOL CPDF_DataAvail::CheckHeader(IFX_DownloadHints* pHints)
3581 {
3582 FX_DWORD req_size = 1024;
3583 if ((FX_FILESIZE)req_size > m_dwFileLen) {
3584 req_size = (FX_DWORD)m_dwFileLen;
3585 }
3586 if (m_pFileAvail->IsDataAvail(0, req_size)) {
3587 FX_BYTE buffer[1024];
3588 m_pFileRead->ReadBlock(buffer, 0, req_size);
3589 if (IsLinearizedFile(buffer, req_size)) {
3590 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE;
3591 } else {
3592 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
3593 return FALSE;
3594 }
3595 m_docStatus = PDF_DATAAVAIL_END;
3596 }
3597 return TRUE;
3598 }
3599 pHints->AddSegment(0, req_size);
3600 return FALSE;
3601 }
CheckFirstPage(IFX_DownloadHints * pHints)3602 FX_BOOL CPDF_DataAvail::CheckFirstPage(IFX_DownloadHints *pHints)
3603 {
3604 CPDF_Dictionary* pDict = m_pLinearized->GetDict();
3605 CPDF_Object *pEndOffSet = pDict ? pDict->GetElement(FX_BSTRC("E")) : NULL;
3606 if (!pEndOffSet) {
3607 m_docStatus = PDF_DATAAVAIL_ERROR;
3608 return FALSE;
3609 }
3610 CPDF_Object *pXRefOffset = pDict ? pDict->GetElement(FX_BSTRC("T")) : NULL;
3611 if (!pXRefOffset) {
3612 m_docStatus = PDF_DATAAVAIL_ERROR;
3613 return FALSE;
3614 }
3615 CPDF_Object *pFileLen = pDict ? pDict->GetElement(FX_BSTRC("L")) : NULL;
3616 if (!pFileLen) {
3617 m_docStatus = PDF_DATAAVAIL_ERROR;
3618 return FALSE;
3619 }
3620 FX_BOOL bNeedDownLoad = FALSE;
3621 if (pEndOffSet->GetType() == PDFOBJ_NUMBER) {
3622 FX_DWORD dwEnd = pEndOffSet->GetInteger();
3623 dwEnd += 512;
3624 if ((FX_FILESIZE)dwEnd > m_dwFileLen) {
3625 dwEnd = (FX_DWORD)m_dwFileLen;
3626 }
3627 FX_INT32 iStartPos = (FX_INT32)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen);
3628 FX_INT32 iSize = dwEnd > 1024 ? dwEnd - 1024 : 0;
3629 if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) {
3630 pHints->AddSegment(iStartPos, iSize);
3631 bNeedDownLoad = TRUE;
3632 }
3633 }
3634 m_dwLastXRefOffset = 0;
3635 FX_FILESIZE dwFileLen = 0;
3636 if (pXRefOffset->GetType() == PDFOBJ_NUMBER) {
3637 m_dwLastXRefOffset = pXRefOffset->GetInteger();
3638 }
3639 if (pFileLen->GetType() == PDFOBJ_NUMBER) {
3640 dwFileLen = pFileLen->GetInteger();
3641 }
3642 if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, (FX_DWORD)(dwFileLen - m_dwLastXRefOffset))) {
3643 if (m_docStatus == PDF_DATAAVAIL_FIRSTPAGE) {
3644 FX_DWORD dwSize = (FX_DWORD)(dwFileLen - m_dwLastXRefOffset);
3645 FX_FILESIZE offset = m_dwLastXRefOffset;
3646 if (dwSize < 512 && dwFileLen > 512) {
3647 dwSize = 512;
3648 offset = dwFileLen - 512;
3649 }
3650 pHints->AddSegment(offset, dwSize);
3651 }
3652 } else {
3653 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE;
3654 }
3655 if (!bNeedDownLoad && m_docStatus == PDF_DATAAVAIL_FIRSTPAGE_PREPARE) {
3656 m_docStatus = PDF_DATAAVAIL_DONE;
3657 return TRUE;
3658 }
3659 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE;
3660 return FALSE;
3661 }
ParseIndirectObjectAt(FX_FILESIZE pos,FX_DWORD objnum)3662 CPDF_Object * CPDF_DataAvail::ParseIndirectObjectAt(FX_FILESIZE pos, FX_DWORD objnum)
3663 {
3664 FX_FILESIZE SavedPos = m_syntaxParser.SavePos();
3665 m_syntaxParser.RestorePos(pos);
3666 FX_BOOL bIsNumber;
3667 CFX_ByteString word = m_syntaxParser.GetNextWord(bIsNumber);
3668 if (!bIsNumber) {
3669 return NULL;
3670 }
3671 FX_DWORD parser_objnum = FXSYS_atoi(word);
3672 if (objnum && parser_objnum != objnum) {
3673 return NULL;
3674 }
3675 word = m_syntaxParser.GetNextWord(bIsNumber);
3676 if (!bIsNumber) {
3677 return NULL;
3678 }
3679 FX_DWORD gennum = FXSYS_atoi(word);
3680 if (m_syntaxParser.GetKeyword() != FX_BSTRC("obj")) {
3681 m_syntaxParser.RestorePos(SavedPos);
3682 return NULL;
3683 }
3684 CPDF_Object* pObj = m_syntaxParser.GetObject(NULL, objnum, gennum, 0);
3685 m_syntaxParser.RestorePos(SavedPos);
3686 return pObj;
3687 }
IsLinearizedPDF()3688 FX_INT32 CPDF_DataAvail::IsLinearizedPDF()
3689 {
3690 FX_DWORD req_size = 1024;
3691 if (!m_pFileAvail->IsDataAvail(0, req_size)) {
3692 return PDF_UNKNOW_LINEARIZED;
3693 }
3694 if (!m_pFileRead) {
3695 return PDF_NOT_LINEARIZED;
3696 }
3697 FX_FILESIZE dwSize = m_pFileRead->GetSize();
3698 if (dwSize < (FX_FILESIZE)req_size) {
3699 return PDF_UNKNOW_LINEARIZED;
3700 }
3701 FX_BYTE buffer[1024];
3702 m_pFileRead->ReadBlock(buffer, 0, req_size);
3703 if (IsLinearizedFile(buffer, req_size)) {
3704 return PDF_IS_LINEARIZED;
3705 }
3706 return PDF_NOT_LINEARIZED;
3707 }
IsLinearizedFile(FX_LPBYTE pData,FX_DWORD dwLen)3708 FX_BOOL CPDF_DataAvail::IsLinearizedFile(FX_LPBYTE pData, FX_DWORD dwLen)
3709 {
3710 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream(pData, (size_t)dwLen, FALSE));
3711 FX_INT32 offset = GetHeaderOffset(file.Get());
3712 if (offset == -1) {
3713 m_docStatus = PDF_DATAAVAIL_ERROR;
3714 return FALSE;
3715 }
3716 m_dwHeaderOffset = offset;
3717 m_syntaxParser.InitParser(file.Get(), offset);
3718 m_syntaxParser.RestorePos(m_syntaxParser.m_HeaderOffset + 9);
3719 FX_BOOL bNumber = FALSE;
3720 CFX_ByteString wordObjNum = m_syntaxParser.GetNextWord(bNumber);
3721 if (!bNumber) {
3722 return FALSE;
3723 }
3724 FX_DWORD objnum = FXSYS_atoi(wordObjNum);
3725 if (m_pLinearized) {
3726 m_pLinearized->Release();
3727 m_pLinearized = NULL;
3728 }
3729 m_pLinearized = ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum);
3730 if (!m_pLinearized) {
3731 return FALSE;
3732 }
3733 if (m_pLinearized->GetDict() && m_pLinearized->GetDict()->GetElement(FX_BSTRC("Linearized"))) {
3734 CPDF_Object *pLen = m_pLinearized->GetDict()->GetElement(FX_BSTRC("L"));
3735 if (!pLen) {
3736 return FALSE;
3737 }
3738 if ((FX_FILESIZE)pLen->GetInteger() != m_pFileRead->GetSize()) {
3739 return FALSE;
3740 }
3741 m_bLinearized = TRUE;
3742 CPDF_Object *pNo = m_pLinearized->GetDict()->GetElement(FX_BSTRC("P"));
3743 if (pNo && pNo->GetType() == PDFOBJ_NUMBER) {
3744 m_dwFirstPageNo = pNo->GetInteger();
3745 }
3746 return TRUE;
3747 }
3748 return FALSE;
3749 }
CheckEnd(IFX_DownloadHints * pHints)3750 FX_BOOL CPDF_DataAvail::CheckEnd(IFX_DownloadHints* pHints)
3751 {
3752 FX_DWORD req_pos = (FX_DWORD)(m_dwFileLen > 1024 ? m_dwFileLen - 1024 : 0);
3753 FX_DWORD dwSize = (FX_DWORD)(m_dwFileLen - req_pos);
3754 if (m_pFileAvail->IsDataAvail(req_pos, dwSize)) {
3755 FX_BYTE buffer[1024];
3756 m_pFileRead->ReadBlock(buffer, req_pos, dwSize);
3757 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream(buffer, (size_t)dwSize, FALSE));
3758 m_syntaxParser.InitParser(file.Get(), 0);
3759 m_syntaxParser.RestorePos(dwSize - 1);
3760 if (m_syntaxParser.SearchWord(FX_BSTRC("startxref"), TRUE, FALSE, dwSize)) {
3761 FX_BOOL bNumber;
3762 m_syntaxParser.GetNextWord(bNumber);
3763 CFX_ByteString xrefpos_str = m_syntaxParser.GetNextWord(bNumber);
3764 if (!bNumber) {
3765 m_docStatus = PDF_DATAAVAIL_ERROR;
3766 return FALSE;
3767 }
3768 m_dwXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str);
3769 if (!m_dwXRefOffset || m_dwXRefOffset > m_dwFileLen) {
3770 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3771 return TRUE;
3772 }
3773 m_dwLastXRefOffset = m_dwXRefOffset;
3774 SetStartOffset(m_dwXRefOffset);
3775 m_docStatus = PDF_DATAAVAIL_CROSSREF;
3776 return TRUE;
3777 } else {
3778 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
3779 return TRUE;
3780 }
3781 }
3782 pHints->AddSegment(req_pos, dwSize);
3783 return FALSE;
3784 }
CheckCrossRefStream(IFX_DownloadHints * pHints,FX_FILESIZE & xref_offset)3785 FX_INT32 CPDF_DataAvail::CheckCrossRefStream(IFX_DownloadHints* pHints, FX_FILESIZE &xref_offset)
3786 {
3787 xref_offset = 0;
3788 FX_DWORD req_size = (FX_DWORD)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3789 if (m_pFileAvail->IsDataAvail(m_Pos, req_size)) {
3790 FX_INT32 iSize = (FX_INT32)(m_Pos + req_size - m_dwCurrentXRefSteam);
3791 CFX_BinaryBuf buf(iSize);
3792 FX_LPBYTE pBuf = buf.GetBuffer();
3793 m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize);
3794 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE));
3795 m_parser.m_Syntax.InitParser(file.Get(), 0);
3796 FX_BOOL bNumber = FALSE;
3797 CFX_ByteString objnum = m_parser.m_Syntax.GetNextWord(bNumber);
3798 if (!bNumber) {
3799 return -1;
3800 }
3801 FX_DWORD objNum = FXSYS_atoi(objnum);
3802 CPDF_Object *pObj = m_parser.ParseIndirectObjectAt(NULL, 0, objNum, NULL);
3803 if (!pObj) {
3804 m_Pos += m_parser.m_Syntax.SavePos();
3805 return 0;
3806 }
3807 CPDF_Dictionary* pDict = pObj->GetDict();
3808 CPDF_Object *pName = pDict ? pDict->GetElement(FX_BSTRC("Type")) : NULL;
3809 if (pName && pName->GetType() == PDFOBJ_NAME) {
3810 if (pName->GetString() == FX_BSTRC("XRef")) {
3811 m_Pos += m_parser.m_Syntax.SavePos();
3812 xref_offset = pObj->GetDict()->GetInteger(FX_BSTRC("Prev"));
3813 pObj->Release();
3814 return 1;
3815 } else {
3816 pObj->Release();
3817 return -1;
3818 }
3819 }
3820 pObj->Release();
3821 return -1;
3822 }
3823 pHints->AddSegment(m_Pos, req_size);
3824 return 0;
3825 }
SetStartOffset(FX_FILESIZE dwOffset)3826 inline void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset)
3827 {
3828 m_Pos = dwOffset;
3829 }
3830 #define MAX_WORD_BUFFER 256
GetNextToken(CFX_ByteString & token)3831 FX_BOOL CPDF_DataAvail::GetNextToken(CFX_ByteString &token)
3832 {
3833 m_WordSize = 0;
3834 FX_BYTE ch;
3835 if (!GetNextChar(ch)) {
3836 return FALSE;
3837 }
3838 FX_BYTE type = PDF_CharType[ch];
3839 while (1) {
3840 while (type == 'W') {
3841 if (!GetNextChar(ch)) {
3842 return FALSE;
3843 }
3844 type = PDF_CharType[ch];
3845 }
3846 if (ch != '%') {
3847 break;
3848 }
3849 while (1) {
3850 if (!GetNextChar(ch)) {
3851 return FALSE;
3852 }
3853 if (ch == '\r' || ch == '\n') {
3854 break;
3855 }
3856 }
3857 type = PDF_CharType[ch];
3858 }
3859 if (type == 'D') {
3860 m_WordBuffer[m_WordSize++] = ch;
3861 if (ch == '/') {
3862 while (1) {
3863 if (!GetNextChar(ch)) {
3864 return FALSE;
3865 }
3866 type = PDF_CharType[ch];
3867 if (type != 'R' && type != 'N') {
3868 m_Pos --;
3869 CFX_ByteString ret(m_WordBuffer, m_WordSize);
3870 token = ret;
3871 return TRUE;
3872 }
3873 if (m_WordSize < MAX_WORD_BUFFER) {
3874 m_WordBuffer[m_WordSize++] = ch;
3875 }
3876 }
3877 } else if (ch == '<') {
3878 if (!GetNextChar(ch)) {
3879 return FALSE;
3880 }
3881 if (ch == '<') {
3882 m_WordBuffer[m_WordSize++] = ch;
3883 } else {
3884 m_Pos --;
3885 }
3886 } else if (ch == '>') {
3887 if (!GetNextChar(ch)) {
3888 return FALSE;
3889 }
3890 if (ch == '>') {
3891 m_WordBuffer[m_WordSize++] = ch;
3892 } else {
3893 m_Pos --;
3894 }
3895 }
3896 CFX_ByteString ret(m_WordBuffer, m_WordSize);
3897 token = ret;
3898 return TRUE;
3899 }
3900 while (1) {
3901 if (m_WordSize < MAX_WORD_BUFFER) {
3902 m_WordBuffer[m_WordSize++] = ch;
3903 }
3904 if (!GetNextChar(ch)) {
3905 return FALSE;
3906 }
3907 type = PDF_CharType[ch];
3908 if (type == 'D' || type == 'W') {
3909 m_Pos --;
3910 break;
3911 }
3912 }
3913 CFX_ByteString ret(m_WordBuffer, m_WordSize);
3914 token = ret;
3915 return TRUE;
3916 }
GetNextChar(FX_BYTE & ch)3917 FX_BOOL CPDF_DataAvail::GetNextChar(FX_BYTE &ch)
3918 {
3919 FX_FILESIZE pos = m_Pos;
3920 if (pos >= m_dwFileLen) {
3921 return FALSE;
3922 }
3923 if (m_bufferOffset >= pos || (FX_FILESIZE)(m_bufferOffset + m_bufferSize) <= pos) {
3924 FX_FILESIZE read_pos = pos;
3925 FX_DWORD read_size = 512;
3926 if ((FX_FILESIZE)read_size > m_dwFileLen) {
3927 read_size = (FX_DWORD)m_dwFileLen;
3928 }
3929 if ((FX_FILESIZE)(read_pos + read_size) > m_dwFileLen) {
3930 read_pos = m_dwFileLen - read_size;
3931 }
3932 if (!m_pFileRead->ReadBlock(m_bufferData, read_pos, read_size)) {
3933 return FALSE;
3934 }
3935 m_bufferOffset = read_pos;
3936 m_bufferSize = read_size;
3937 }
3938 ch = m_bufferData[pos - m_bufferOffset];
3939 m_Pos ++;
3940 return TRUE;
3941 }
CheckCrossRefItem(IFX_DownloadHints * pHints)3942 FX_BOOL CPDF_DataAvail::CheckCrossRefItem(IFX_DownloadHints *pHints)
3943 {
3944 FX_INT32 iSize = 0;
3945 CFX_ByteString token;
3946 while (1) {
3947 if (!GetNextToken(token)) {
3948 iSize = (FX_INT32)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3949 pHints->AddSegment(m_Pos, iSize);
3950 return FALSE;
3951 }
3952 if (token == "trailer") {
3953 m_dwTrailerOffset = m_Pos;
3954 m_docStatus = PDF_DATAAVAIL_TRAILER;
3955 return TRUE;
3956 }
3957 }
3958 }
CheckAllCrossRefStream(IFX_DownloadHints * pHints)3959 FX_BOOL CPDF_DataAvail::CheckAllCrossRefStream(IFX_DownloadHints *pHints)
3960 {
3961 FX_FILESIZE xref_offset = 0;
3962 FX_INT32 nRet = CheckCrossRefStream(pHints, xref_offset);
3963 if (nRet == 1) {
3964 if (!xref_offset) {
3965 m_docStatus = PDF_DATAAVAIL_LOADALLCRSOSSREF;
3966 } else {
3967 m_dwCurrentXRefSteam = xref_offset;
3968 m_Pos = xref_offset;
3969 }
3970 return TRUE;
3971 } else if (nRet == -1) {
3972 m_docStatus = PDF_DATAAVAIL_ERROR;
3973 }
3974 return FALSE;
3975 }
CheckCrossRef(IFX_DownloadHints * pHints)3976 FX_BOOL CPDF_DataAvail::CheckCrossRef(IFX_DownloadHints* pHints)
3977 {
3978 FX_INT32 iSize = 0;
3979 CFX_ByteString token;
3980 if (!GetNextToken(token)) {
3981 iSize = (FX_INT32)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3982 pHints->AddSegment(m_Pos, iSize);
3983 return FALSE;
3984 }
3985 if (token == "xref") {
3986 m_CrossOffset.InsertAt(0, m_dwXRefOffset);
3987 while (1) {
3988 if (!GetNextToken(token)) {
3989 iSize = (FX_INT32)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
3990 pHints->AddSegment(m_Pos, iSize);
3991 m_docStatus = PDF_DATAAVAIL_CROSSREF_ITEM;
3992 return FALSE;
3993 }
3994 if (token == "trailer") {
3995 m_dwTrailerOffset = m_Pos;
3996 m_docStatus = PDF_DATAAVAIL_TRAILER;
3997 return TRUE;
3998 }
3999 }
4000 } else {
4001 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
4002 return TRUE;
4003 }
4004 return FALSE;
4005 }
CheckTrailerAppend(IFX_DownloadHints * pHints)4006 FX_BOOL CPDF_DataAvail::CheckTrailerAppend(IFX_DownloadHints* pHints)
4007 {
4008 if (m_Pos < m_dwFileLen) {
4009 FX_FILESIZE dwAppendPos = m_Pos + m_syntaxParser.SavePos();
4010 FX_INT32 iSize = (FX_INT32)(dwAppendPos + 512 > m_dwFileLen ? m_dwFileLen - dwAppendPos : 512);
4011 if (!m_pFileAvail->IsDataAvail(dwAppendPos, iSize)) {
4012 pHints->AddSegment(dwAppendPos, iSize);
4013 return FALSE;
4014 }
4015 }
4016 if (m_dwPrevXRefOffset) {
4017 SetStartOffset(m_dwPrevXRefOffset);
4018 m_docStatus = PDF_DATAAVAIL_CROSSREF;
4019 } else {
4020 m_docStatus = PDF_DATAAVAIL_LOADALLCRSOSSREF;
4021 }
4022 return TRUE;
4023 }
CheckTrailer(IFX_DownloadHints * pHints)4024 FX_BOOL CPDF_DataAvail::CheckTrailer(IFX_DownloadHints* pHints)
4025 {
4026 FX_INT32 iTrailerSize = (FX_INT32)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
4027 if (m_pFileAvail->IsDataAvail(m_Pos, iTrailerSize)) {
4028 FX_INT32 iSize = (FX_INT32)(m_Pos + iTrailerSize - m_dwTrailerOffset);
4029 CFX_BinaryBuf buf(iSize);
4030 FX_LPBYTE pBuf = buf.GetBuffer();
4031 if (!pBuf) {
4032 m_docStatus = PDF_DATAAVAIL_ERROR;
4033 return FALSE;
4034 }
4035 if (!m_pFileRead->ReadBlock(pBuf, m_dwTrailerOffset, iSize)) {
4036 return FALSE;
4037 }
4038 CFX_SmartPointer<IFX_FileStream> file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE));
4039 m_syntaxParser.InitParser(file.Get(), 0);
4040 CPDF_Object *pTrailer = m_syntaxParser.GetObject(NULL, 0, 0, 0);
4041 if (!pTrailer) {
4042 m_Pos += m_syntaxParser.SavePos();
4043 pHints->AddSegment(m_Pos, iTrailerSize);
4044 return FALSE;
4045 }
4046 if (pTrailer->GetType() != PDFOBJ_DICTIONARY) {
4047 return FALSE;
4048 }
4049 CPDF_Dictionary *pTrailerDict = pTrailer->GetDict();
4050 if (pTrailerDict) {
4051 CPDF_Object *pEncrypt = pTrailerDict->GetElement("Encrypt");
4052 if (pEncrypt && pEncrypt->GetType() == PDFOBJ_REFERENCE) {
4053 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
4054 pTrailer->Release();
4055 return TRUE;
4056 }
4057 }
4058 FX_DWORD xrefpos = GetDirectInteger(pTrailer->GetDict(), FX_BSTRC("Prev"));
4059 if (xrefpos) {
4060 m_dwPrevXRefOffset = GetDirectInteger(pTrailer->GetDict(), FX_BSTRC("XRefStm"));
4061 pTrailer->Release();
4062 if (m_dwPrevXRefOffset) {
4063 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
4064 } else {
4065 m_dwPrevXRefOffset = xrefpos;
4066 if (m_dwPrevXRefOffset >= m_dwFileLen) {
4067 m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
4068 } else {
4069 SetStartOffset(m_dwPrevXRefOffset);
4070 m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND;
4071 }
4072 }
4073 return TRUE;
4074 } else {
4075 m_dwPrevXRefOffset = 0;
4076 m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND;
4077 pTrailer->Release();
4078 }
4079 return TRUE;
4080 }
4081 pHints->AddSegment(m_Pos, iTrailerSize);
4082 return FALSE;
4083 }
CheckPage(FX_INT32 iPage,IFX_DownloadHints * pHints)4084 FX_BOOL CPDF_DataAvail::CheckPage(FX_INT32 iPage, IFX_DownloadHints* pHints)
4085 {
4086 while (TRUE) {
4087 switch (m_docStatus) {
4088 case PDF_DATAAVAIL_PAGETREE:
4089 if (!LoadDocPages(pHints)) {
4090 return FALSE;
4091 }
4092 break;
4093 case PDF_DATAAVAIL_PAGE:
4094 if (!LoadDocPage(iPage, pHints)) {
4095 return FALSE;
4096 }
4097 break;
4098 case PDF_DATAAVAIL_ERROR:
4099 return LoadAllFile(pHints);
4100 default:
4101 m_bPagesTreeLoad = TRUE;
4102 m_bPagesLoad = TRUE;
4103 m_bCurPageDictLoadOK = TRUE;
4104 m_docStatus = PDF_DATAAVAIL_PAGE;
4105 return TRUE;
4106 }
4107 }
4108 }
CheckArrayPageNode(FX_DWORD dwPageNo,CPDF_PageNode * pPageNode,IFX_DownloadHints * pHints)4109 FX_BOOL CPDF_DataAvail::CheckArrayPageNode(FX_DWORD dwPageNo, CPDF_PageNode *pPageNode, IFX_DownloadHints* pHints)
4110 {
4111 FX_BOOL bExist = FALSE;
4112 CPDF_Object *pPages = GetObject(dwPageNo, pHints, &bExist);
4113 if (!bExist) {
4114 m_docStatus = PDF_DATAAVAIL_ERROR;
4115 return FALSE;
4116 }
4117 if (!pPages) {
4118 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
4119 m_docStatus = PDF_DATAAVAIL_ERROR;
4120 return FALSE;
4121 }
4122 return FALSE;
4123 }
4124 if (pPages->GetType() != PDFOBJ_ARRAY) {
4125 pPages->Release();
4126 m_docStatus = PDF_DATAAVAIL_ERROR;
4127 return FALSE;
4128 }
4129 pPageNode->m_type = PDF_PAGENODE_PAGES;
4130 CPDF_Array* pArray = (CPDF_Array*)pPages;
4131 for (FX_DWORD i = 0; i < pArray->GetCount(); ++i) {
4132 CPDF_Object *pKid = (CPDF_Object *)pArray->GetElement(i);
4133 if (!pKid || pKid->GetType() != PDFOBJ_REFERENCE) {
4134 continue;
4135 }
4136 CPDF_PageNode *pNode = new CPDF_PageNode();
4137 pPageNode->m_childNode.Add(pNode);
4138 pNode->m_dwPageNo = ((CPDF_Reference*)pKid)->GetRefObjNum();
4139 }
4140 pPages->Release();
4141 return TRUE;
4142 }
CheckUnkownPageNode(FX_DWORD dwPageNo,CPDF_PageNode * pPageNode,IFX_DownloadHints * pHints)4143 FX_BOOL CPDF_DataAvail::CheckUnkownPageNode(FX_DWORD dwPageNo, CPDF_PageNode *pPageNode, IFX_DownloadHints* pHints)
4144 {
4145 FX_BOOL bExist = FALSE;
4146 CPDF_Object *pPage = GetObject(dwPageNo, pHints, &bExist);
4147 if (!bExist) {
4148 m_docStatus = PDF_DATAAVAIL_ERROR;
4149 return FALSE;
4150 }
4151 if (!pPage) {
4152 if (m_docStatus == PDF_DATAAVAIL_ERROR) {
4153 m_docStatus = PDF_DATAAVAIL_ERROR;
4154 return FALSE;
4155 }
4156 return FALSE;
4157 }
4158 if (pPage->GetType() == PDFOBJ_ARRAY) {
4159 pPageNode->m_dwPageNo = dwPageNo;
4160 pPageNode->m_type = PDF_PAGENODE_ARRAY;
4161 pPage->Release();
4162 return TRUE;
4163 }
4164 if (pPage->GetType() != PDFOBJ_DICTIONARY) {
4165 pPage->Release();
4166 m_docStatus = PDF_DATAAVAIL_ERROR;
4167 return FALSE;
4168 }
4169 pPageNode->m_dwPageNo = dwPageNo;
4170 CPDF_Dictionary* pDict = pPage->GetDict();
4171 CFX_ByteString type = pDict ? pDict->GetString(FX_BSTRC("Type")) : CFX_ByteString();
4172 if (type == FX_BSTRC("Pages")) {
4173 pPageNode->m_type = PDF_PAGENODE_PAGES;
4174 CPDF_Object *pKids = pDict->GetElement(FX_BSTRC("Kids"));
4175 if (!pKids) {
4176 m_docStatus = PDF_DATAAVAIL_PAGE;
4177 return TRUE;
4178 }
4179 switch (pKids->GetType()) {
4180 case PDFOBJ_REFERENCE: {
4181 CPDF_Reference *pKid = (CPDF_Reference *)pKids;
4182 CPDF_PageNode *pNode = new CPDF_PageNode();
4183 pPageNode->m_childNode.Add(pNode);
4184 pNode->m_dwPageNo = pKid->GetRefObjNum();
4185 }
4186 break;
4187 case PDFOBJ_ARRAY: {
4188 CPDF_Array *pKidsArray = (CPDF_Array *)pKids;
4189 for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) {
4190 CPDF_Object *pKid = (CPDF_Object *)pKidsArray->GetElement(i);
4191 if (!pKid || pKid->GetType() != PDFOBJ_REFERENCE) {
4192 continue;
4193 }
4194 CPDF_PageNode *pNode = new CPDF_PageNode();
4195 pPageNode->m_childNode.Add(pNode);
4196 pNode->m_dwPageNo = ((CPDF_Reference*)pKid)->GetRefObjNum();
4197 }
4198 }
4199 break;
4200 default:
4201 break;
4202 }
4203 } else if (type == FX_BSTRC("Page")) {
4204 pPageNode->m_type = PDF_PAGENODE_PAGE;
4205 } else {
4206 pPage->Release();
4207 m_docStatus = PDF_DATAAVAIL_ERROR;
4208 return FALSE;
4209 }
4210 pPage->Release();
4211 return TRUE;
4212 }
CheckPageNode(CPDF_PageNode & pageNodes,FX_INT32 iPage,FX_INT32 & iCount,IFX_DownloadHints * pHints)4213 FX_BOOL CPDF_DataAvail::CheckPageNode(CPDF_PageNode &pageNodes, FX_INT32 iPage, FX_INT32 &iCount, IFX_DownloadHints* pHints)
4214 {
4215 FX_INT32 iSize = pageNodes.m_childNode.GetSize();
4216 if (iSize <= 0 || iPage >= iSize) {
4217 m_docStatus = PDF_DATAAVAIL_ERROR;
4218 return FALSE;
4219 }
4220 for (FX_INT32 i = 0; i < iSize; ++i) {
4221 CPDF_PageNode *pNode = (CPDF_PageNode*)pageNodes.m_childNode.GetAt(i);
4222 if (!pNode) {
4223 continue;
4224 }
4225 switch (pNode->m_type) {
4226 case PDF_PAGENODE_UNKOWN:
4227 if (!CheckUnkownPageNode(pNode->m_dwPageNo, pNode, pHints)) {
4228 return FALSE;
4229 }
4230 --i;
4231 break;
4232 case PDF_PAGENODE_PAGE:
4233 iCount++;
4234 if (iPage == iCount && m_pDocument) {
4235 m_pDocument->m_PageList.SetAt(iPage, pNode->m_dwPageNo);
4236 }
4237 break;
4238 case PDF_PAGENODE_PAGES:
4239 if (!CheckPageNode(*pNode, iPage, iCount, pHints)) {
4240 return FALSE;
4241 }
4242 break;
4243 case PDF_PAGENODE_ARRAY:
4244 if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode, pHints)) {
4245 return FALSE;
4246 }
4247 --i;
4248 break;
4249 }
4250 if (iPage == iCount) {
4251 m_docStatus = PDF_DATAAVAIL_DONE;
4252 return TRUE;
4253 }
4254 }
4255 return TRUE;
4256 }
LoadDocPage(FX_INT32 iPage,IFX_DownloadHints * pHints)4257 FX_BOOL CPDF_DataAvail::LoadDocPage(FX_INT32 iPage, IFX_DownloadHints* pHints)
4258 {
4259 if (m_pDocument->GetPageCount() <= iPage || m_pDocument->m_PageList.GetAt(iPage)) {
4260 m_docStatus = PDF_DATAAVAIL_DONE;
4261 return TRUE;
4262 }
4263 if (m_pageNodes.m_type == PDF_PAGENODE_PAGE) {
4264 if (iPage == 0) {
4265 m_docStatus = PDF_DATAAVAIL_DONE;
4266 return TRUE;
4267 }
4268 m_docStatus = PDF_DATAAVAIL_ERROR;
4269 return TRUE;
4270 }
4271 FX_INT32 iCount = -1;
4272 return CheckPageNode(m_pageNodes, iPage, iCount, pHints);
4273 }
CheckPageCount(IFX_DownloadHints * pHints)4274 FX_BOOL CPDF_DataAvail::CheckPageCount(IFX_DownloadHints* pHints)
4275 {
4276 FX_BOOL bExist = FALSE;
4277 CPDF_Object *pPages = GetObject(m_PagesObjNum, pHints, &bExist);
4278 if (!bExist) {
4279 m_docStatus = PDF_DATAAVAIL_ERROR;
4280 return FALSE;
4281 }
4282 if (!pPages) {
4283 return FALSE;
4284 }
4285 CPDF_Dictionary* pPagesDict = pPages->GetDict();
4286 if (!pPagesDict) {
4287 pPages->Release();
4288 m_docStatus = PDF_DATAAVAIL_ERROR;
4289 return FALSE;
4290 }
4291 if (!pPagesDict->KeyExist(FX_BSTRC("Kids"))) {
4292 pPages->Release();
4293 return TRUE;
4294 }
4295 int count = pPagesDict->GetInteger(FX_BSTRC("Count"));
4296 if (count > 0) {
4297 pPages->Release();
4298 return TRUE;
4299 }
4300 pPages->Release();
4301 return FALSE;
4302 }
LoadDocPages(IFX_DownloadHints * pHints)4303 FX_BOOL CPDF_DataAvail::LoadDocPages(IFX_DownloadHints* pHints)
4304 {
4305 if (!CheckUnkownPageNode(m_PagesObjNum, &m_pageNodes, pHints)) {
4306 return FALSE;
4307 }
4308 if (CheckPageCount(pHints)) {
4309 m_docStatus = PDF_DATAAVAIL_PAGE;
4310 return TRUE;
4311 } else {
4312 m_bTotalLoadPageTree = TRUE;
4313 }
4314 return FALSE;
4315 }
LoadPages(IFX_DownloadHints * pHints)4316 FX_BOOL CPDF_DataAvail::LoadPages(IFX_DownloadHints* pHints)
4317 {
4318 while (!m_bPagesTreeLoad) {
4319 if (!CheckPageStatus(pHints)) {
4320 return FALSE;
4321 }
4322 }
4323 if (m_bPagesLoad) {
4324 return TRUE;
4325 }
4326 m_pDocument->LoadPages();
4327 return FALSE;
4328 }
CheckLinearizedData(IFX_DownloadHints * pHints)4329 FX_BOOL CPDF_DataAvail::CheckLinearizedData(IFX_DownloadHints* pHints)
4330 {
4331 if (m_bLinearedDataOK) {
4332 return TRUE;
4333 }
4334
4335 if (!m_bMainXRefLoadTried) {
4336 FX_SAFE_DWORD data_size = m_dwFileLen;
4337 data_size -= m_dwLastXRefOffset;
4338 if (!data_size.IsValid()) {
4339 return FALSE;
4340 }
4341 if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, data_size.ValueOrDie())) {
4342 pHints->AddSegment(m_dwLastXRefOffset, data_size.ValueOrDie());
4343 return FALSE;
4344 }
4345 FX_DWORD dwRet = ((CPDF_Parser *)m_pDocument->GetParser())->LoadLinearizedMainXRefTable();
4346 m_bMainXRefLoadTried = TRUE;
4347 if (dwRet != PDFPARSE_ERROR_SUCCESS) {
4348 return FALSE;
4349 }
4350 if (!PreparePageItem()) {
4351 return FALSE;
4352 }
4353 m_bMainXRefLoadedOK = TRUE;
4354 m_bLinearedDataOK = TRUE;
4355 }
4356
4357 return m_bLinearedDataOK;
4358 }
CheckPageAnnots(FX_INT32 iPage,IFX_DownloadHints * pHints)4359 FX_BOOL CPDF_DataAvail::CheckPageAnnots(FX_INT32 iPage, IFX_DownloadHints* pHints)
4360 {
4361 if (!m_objs_array.GetSize()) {
4362 m_objs_array.RemoveAll();
4363 m_objnum_array.RemoveAll();
4364 CPDF_Dictionary *pPageDict = m_pDocument->GetPage(iPage);
4365 if (!pPageDict) {
4366 return TRUE;
4367 }
4368 CPDF_Object *pAnnots = pPageDict->GetElement(FX_BSTRC("Annots"));
4369 if (!pAnnots) {
4370 return TRUE;
4371 }
4372 CFX_PtrArray obj_array;
4373 obj_array.Add(pAnnots);
4374 FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array);
4375 if (bRet) {
4376 m_objs_array.RemoveAll();
4377 }
4378 return bRet;
4379 } else {
4380 CFX_PtrArray new_objs_array;
4381 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4382 m_objs_array.RemoveAll();
4383 if (!bRet) {
4384 m_objs_array.Append(new_objs_array);
4385 }
4386 return bRet;
4387 }
4388 }
CheckLinearizedFirstPage(FX_INT32 iPage,IFX_DownloadHints * pHints)4389 FX_BOOL CPDF_DataAvail::CheckLinearizedFirstPage(FX_INT32 iPage, IFX_DownloadHints* pHints)
4390 {
4391 if (!m_bAnnotsLoad) {
4392 if (!CheckPageAnnots(iPage, pHints)) {
4393 return FALSE;
4394 }
4395 m_bAnnotsLoad = TRUE;
4396 }
4397 if (m_bAnnotsLoad)
4398 if (!CheckLinearizedData(pHints)) {
4399 return FALSE;
4400 }
4401 m_bPageLoadedOK = FALSE;
4402 return TRUE;
4403 }
HaveResourceAncestor(CPDF_Dictionary * pDict)4404 FX_BOOL CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary *pDict)
4405 {
4406 CFX_AutoRestorer<int> restorer(&s_CurrentDataAvailRecursionDepth);
4407 if (++s_CurrentDataAvailRecursionDepth > kMaxDataAvailRecursionDepth) {
4408 return FALSE;
4409 }
4410 CPDF_Object *pParent = pDict->GetElement("Parent");
4411 if (!pParent) {
4412 return FALSE;
4413 }
4414 CPDF_Dictionary *pParentDict = pParent->GetDict();
4415 if (!pParentDict) {
4416 return FALSE;
4417 }
4418 CPDF_Object *pRet = pParentDict->GetElement("Resources");
4419 if (pRet) {
4420 m_pPageResource = pRet;
4421 return TRUE;
4422 }
4423 return HaveResourceAncestor(pParentDict);
4424 }
IsPageAvail(FX_INT32 iPage,IFX_DownloadHints * pHints)4425 FX_BOOL CPDF_DataAvail::IsPageAvail(FX_INT32 iPage, IFX_DownloadHints* pHints)
4426 {
4427 if (!m_pDocument) {
4428 return FALSE;
4429 }
4430 if (IsFirstCheck(iPage)) {
4431 m_bCurPageDictLoadOK = FALSE;
4432 m_bPageLoadedOK = FALSE;
4433 m_bAnnotsLoad = FALSE;
4434 m_bNeedDownLoadResource = FALSE;
4435 m_objs_array.RemoveAll();
4436 m_objnum_array.RemoveAll();
4437 }
4438 if (m_pagesLoadState == NULL) {
4439 m_pagesLoadState = new CFX_CMapDWordToDWord();
4440 }
4441 FX_DWORD dwPageLoad = 0;
4442 if (m_pagesLoadState->Lookup(iPage, dwPageLoad) && dwPageLoad != 0) {
4443 return TRUE;
4444 }
4445 if (m_bLinearized) {
4446 if ((FX_DWORD)iPage == m_dwFirstPageNo) {
4447 m_pagesLoadState->SetAt(iPage, TRUE);
4448 return TRUE;
4449 }
4450 if (!CheckLinearizedData(pHints)) {
4451 return FALSE;
4452 }
4453 if (m_bMainXRefLoadedOK) {
4454 if (m_bTotalLoadPageTree) {
4455 if (!LoadPages(pHints)) {
4456 return FALSE;
4457 }
4458 } else {
4459 if (!m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) {
4460 return FALSE;
4461 }
4462 }
4463 } else {
4464 if (!LoadAllFile(pHints)) {
4465 return FALSE;
4466 }
4467 ((CPDF_Parser *)m_pDocument->GetParser())->RebuildCrossRef();
4468 ResetFirstCheck(iPage);
4469 return TRUE;
4470 }
4471 } else {
4472 if (!m_bTotalLoadPageTree) {
4473 if (!m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) {
4474 return FALSE;
4475 }
4476 }
4477 }
4478 if (m_bHaveAcroForm && !m_bAcroFormLoad) {
4479 if (!CheckAcroFormSubObject(pHints)) {
4480 return FALSE;
4481 }
4482 m_bAcroFormLoad = TRUE;
4483 }
4484 if (!m_bPageLoadedOK) {
4485 if (!m_objs_array.GetSize()) {
4486 m_objs_array.RemoveAll();
4487 m_objnum_array.RemoveAll();
4488 m_pPageDict = m_pDocument->GetPage(iPage);
4489 if (!m_pPageDict) {
4490 ResetFirstCheck(iPage);
4491 return TRUE;
4492 }
4493 CFX_PtrArray obj_array;
4494 obj_array.Add(m_pPageDict);
4495 FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array);
4496 if (bRet) {
4497 m_objs_array.RemoveAll();
4498 m_bPageLoadedOK = TRUE;
4499 } else {
4500 return bRet;
4501 }
4502 } else {
4503 CFX_PtrArray new_objs_array;
4504 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4505 m_objs_array.RemoveAll();
4506 if (bRet) {
4507 m_bPageLoadedOK = TRUE;
4508 } else {
4509 m_objs_array.Append(new_objs_array);
4510 return bRet;
4511 }
4512 }
4513 }
4514 if (m_bPageLoadedOK) {
4515 if (!m_bAnnotsLoad) {
4516 if (!CheckPageAnnots(iPage, pHints)) {
4517 return FALSE;
4518 }
4519 m_bAnnotsLoad = TRUE;
4520 }
4521 }
4522 if (m_pPageDict && !m_bNeedDownLoadResource) {
4523 m_pPageResource = m_pPageDict->GetElement("Resources");
4524 if (!m_pPageResource) {
4525 m_bNeedDownLoadResource = HaveResourceAncestor(m_pPageDict);
4526 } else {
4527 m_bNeedDownLoadResource = TRUE;
4528 }
4529 }
4530 if (m_bNeedDownLoadResource) {
4531 FX_BOOL bRet = CheckResources(pHints);
4532 if (!bRet) {
4533 return FALSE;
4534 }
4535 m_bNeedDownLoadResource = FALSE;
4536 }
4537 m_bPageLoadedOK = FALSE;
4538 m_bAnnotsLoad = FALSE;
4539 m_bCurPageDictLoadOK = FALSE;
4540 ResetFirstCheck(iPage);
4541 m_pagesLoadState->SetAt(iPage, TRUE);
4542 return TRUE;
4543 }
CheckResources(IFX_DownloadHints * pHints)4544 FX_BOOL CPDF_DataAvail::CheckResources(IFX_DownloadHints* pHints)
4545 {
4546 if (!m_objs_array.GetSize()) {
4547 m_objs_array.RemoveAll();
4548 CFX_PtrArray obj_array;
4549 obj_array.Add(m_pPageResource);
4550 FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array);
4551 if (bRet) {
4552 m_objs_array.RemoveAll();
4553 }
4554 return bRet;
4555 } else {
4556 CFX_PtrArray new_objs_array;
4557 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4558 m_objs_array.RemoveAll();
4559 if (!bRet) {
4560 m_objs_array.Append(new_objs_array);
4561 }
4562 return bRet;
4563 }
4564 }
GetLinearizedMainXRefInfo(FX_FILESIZE * pPos,FX_DWORD * pSize)4565 void CPDF_DataAvail::GetLinearizedMainXRefInfo(FX_FILESIZE *pPos, FX_DWORD *pSize)
4566 {
4567 if (pPos) {
4568 *pPos = m_dwLastXRefOffset;
4569 }
4570 if (pSize) {
4571 *pSize = (FX_DWORD)(m_dwFileLen - m_dwLastXRefOffset);
4572 }
4573 }
IsFormAvail(IFX_DownloadHints * pHints)4574 FX_INT32 CPDF_DataAvail::IsFormAvail(IFX_DownloadHints *pHints)
4575 {
4576 if (!m_pDocument) {
4577 return PDFFORM_AVAIL;
4578 }
4579 if (!m_bLinearizedFormParamLoad) {
4580 CPDF_Dictionary *pRoot = m_pDocument->GetRoot();
4581 if (!pRoot) {
4582 return PDFFORM_AVAIL;
4583 }
4584 CPDF_Object *pAcroForm = pRoot->GetElement(FX_BSTRC("AcroForm"));
4585 if (!pAcroForm) {
4586 return PDFFORM_NOTEXIST;
4587 }
4588 if (!CheckLinearizedData(pHints)) {
4589 return PDFFORM_NOTAVAIL;
4590 }
4591 if (!m_objs_array.GetSize()) {
4592 m_objs_array.Add(pAcroForm->GetDict());
4593 }
4594 m_bLinearizedFormParamLoad = TRUE;
4595 }
4596 CFX_PtrArray new_objs_array;
4597 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array);
4598 m_objs_array.RemoveAll();
4599 if (!bRet) {
4600 m_objs_array.Append(new_objs_array);
4601 return PDFFORM_NOTAVAIL;
4602 }
4603 return PDFFORM_AVAIL;
4604 }
AddObjNum(FX_DWORD dwObjNum)4605 void CPDF_SortObjNumArray::AddObjNum(FX_DWORD dwObjNum)
4606 {
4607 FX_INT32 iNext = 0;
4608 if (BinarySearch(dwObjNum, iNext)) {
4609 return;
4610 }
4611 m_number_array.InsertAt(iNext, dwObjNum);
4612 }
Find(FX_DWORD dwObjNum)4613 FX_BOOL CPDF_SortObjNumArray::Find(FX_DWORD dwObjNum)
4614 {
4615 FX_INT32 iNext = 0;
4616 return BinarySearch(dwObjNum, iNext);
4617 }
BinarySearch(FX_DWORD value,FX_INT32 & iNext)4618 FX_BOOL CPDF_SortObjNumArray::BinarySearch(FX_DWORD value, FX_INT32 &iNext)
4619 {
4620 FX_INT32 iLen = m_number_array.GetSize();
4621 FX_INT32 iLow = 0;
4622 FX_INT32 iHigh = iLen - 1;
4623 FX_INT32 iMid = 0;
4624 while (iLow <= iHigh) {
4625 iMid = (iLow + iHigh) / 2;
4626 if (m_number_array.GetAt(iMid) == value) {
4627 iNext = iMid;
4628 return TRUE;
4629 } else if (m_number_array.GetAt(iMid) > value) {
4630 iHigh = iMid - 1;
4631 } else if (m_number_array.GetAt(iMid) < value) {
4632 iLow = iMid + 1;
4633 }
4634 }
4635 iNext = iLow;
4636 return FALSE;
4637 }
~CPDF_PageNode()4638 CPDF_PageNode::~CPDF_PageNode()
4639 {
4640 FX_INT32 iSize = m_childNode.GetSize();
4641 for (FX_INT32 i = 0; i < iSize; ++i) {
4642 CPDF_PageNode *pNode = (CPDF_PageNode*)m_childNode[i];
4643 if (pNode) {
4644 delete pNode;
4645 }
4646 }
4647 m_childNode.RemoveAll();
4648 }
4649