1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #ifndef CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_
8 #define CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_
9
10 #include <map>
11 #include <memory>
12 #include <set>
13
14 #include "core/include/fpdfapi/fpdf_objects.h"
15 #include "core/include/fxcrt/fx_system.h"
16
17 class CFX_Font;
18 class CFX_Matrix;
19 class CPDF_ColorSpace;
20 class CPDF_CryptoHandler;
21 class CPDF_DocPageData;
22 class CPDF_DocRenderData;
23 class CPDF_Font;
24 class CPDF_FontEncoding;
25 class CPDF_IccProfile;
26 class CPDF_Image;
27 class CPDF_Object;
28 class CPDF_Parser;
29 class CPDF_Pattern;
30 class CPDF_SecurityHandler;
31 class CPDF_StandardSecurityHandler;
32 class IFX_FileRead;
33
34 #define FPDFPERM_PRINT 0x0004
35 #define FPDFPERM_MODIFY 0x0008
36 #define FPDFPERM_EXTRACT 0x0010
37 #define FPDFPERM_ANNOT_FORM 0x0020
38 #define FPDFPERM_FILL_FORM 0x0100
39 #define FPDFPERM_EXTRACT_ACCESS 0x0200
40 #define FPDFPERM_ASSEMBLE 0x0400
41 #define FPDFPERM_PRINT_HIGH 0x0800
42 #define FPDF_PAGE_MAX_NUM 0xFFFFF
43
44 // Use the accessors below instead of directly accessing PDF_CharType.
45 extern const char PDF_CharType[256];
46
PDFCharIsWhitespace(uint8_t c)47 inline bool PDFCharIsWhitespace(uint8_t c) {
48 return PDF_CharType[c] == 'W';
49 }
PDFCharIsNumeric(uint8_t c)50 inline bool PDFCharIsNumeric(uint8_t c) {
51 return PDF_CharType[c] == 'N';
52 }
PDFCharIsDelimiter(uint8_t c)53 inline bool PDFCharIsDelimiter(uint8_t c) {
54 return PDF_CharType[c] == 'D';
55 }
PDFCharIsOther(uint8_t c)56 inline bool PDFCharIsOther(uint8_t c) {
57 return PDF_CharType[c] == 'R';
58 }
59
PDFCharIsLineEnding(uint8_t c)60 inline bool PDFCharIsLineEnding(uint8_t c) {
61 return c == '\r' || c == '\n';
62 }
63
64 // Indexed by 8-bit char code, contains unicode code points.
65 extern const FX_WORD PDFDocEncoding[256];
66
67 class CPDF_Document : public CFX_PrivateData, public CPDF_IndirectObjectHolder {
68 public:
69 CPDF_Document();
70 explicit CPDF_Document(CPDF_Parser* pParser);
71
72 ~CPDF_Document();
73
GetParser()74 CPDF_Parser* GetParser() const { return m_pParser; }
75
GetRoot()76 CPDF_Dictionary* GetRoot() const { return m_pRootDict; }
77
GetInfo()78 CPDF_Dictionary* GetInfo() const { return m_pInfoDict; }
79
GetID(CFX_ByteString & id1,CFX_ByteString & id2)80 void GetID(CFX_ByteString& id1, CFX_ByteString& id2) const {
81 id1 = m_ID1;
82 id2 = m_ID2;
83 }
84
85 int GetPageCount() const;
86
87 CPDF_Dictionary* GetPage(int iPage);
88
89 int GetPageIndex(FX_DWORD objnum);
90
91 FX_DWORD GetUserPermissions(FX_BOOL bCheckRevision = FALSE) const;
92
93 FX_BOOL IsOwner() const;
94
GetPageData()95 CPDF_DocPageData* GetPageData() { return GetValidatePageData(); }
96
97 void ClearPageData();
98
99 void RemoveColorSpaceFromPageData(CPDF_Object* pObject);
100
GetRenderData()101 CPDF_DocRenderData* GetRenderData() { return GetValidateRenderData(); }
102
103 void ClearRenderData();
104
105 void ClearRenderFont();
106
107 FX_BOOL IsFormStream(FX_DWORD objnum, FX_BOOL& bForm) const;
108
109 // |pFontDict| must not be null.
110 CPDF_Font* LoadFont(CPDF_Dictionary* pFontDict);
111
112 CPDF_ColorSpace* LoadColorSpace(CPDF_Object* pCSObj,
113 CPDF_Dictionary* pResources = NULL);
114
115 CPDF_Pattern* LoadPattern(CPDF_Object* pObj,
116 FX_BOOL bShading,
117 const CFX_Matrix* matrix = NULL);
118
119 CPDF_Image* LoadImageF(CPDF_Object* pObj);
120
121 CPDF_StreamAcc* LoadFontFile(CPDF_Stream* pStream);
122
123 CPDF_IccProfile* LoadIccProfile(CPDF_Stream* pStream);
124
125 #if _FXM_PLATFORM_ == _FXM_PLATFORM_WINDOWS_
126
127 CPDF_Font* AddWindowsFont(LOGFONTA* pLogFont,
128 FX_BOOL bVert,
129 FX_BOOL bTranslateName = FALSE);
130 CPDF_Font* AddWindowsFont(LOGFONTW* pLogFont,
131 FX_BOOL bVert,
132 FX_BOOL bTranslateName = FALSE);
133 #endif
134 #if _FXM_PLATFORM_ == _FXM_PLATFORM_APPLE_
135 CPDF_Font* AddMacFont(CTFontRef pFont,
136 FX_BOOL bVert,
137 FX_BOOL bTranslateName = FALSE);
138 #endif
139
140 CPDF_Font* AddStandardFont(const FX_CHAR* font, CPDF_FontEncoding* pEncoding);
141
142 CPDF_Font* AddFont(CFX_Font* pFont, int charset, FX_BOOL bVert);
143
144 void CreateNewDoc();
145
146 CPDF_Dictionary* CreateNewPage(int iPage);
147
148 void DeletePage(int iPage);
149
150 void LoadDoc();
151 void LoadAsynDoc(CPDF_Dictionary* pLinearized);
152 void LoadPages();
153
154 protected:
155 CPDF_Dictionary* m_pRootDict;
156
157 CPDF_Dictionary* m_pInfoDict;
158
159 CFX_ByteString m_ID1;
160
161 CFX_ByteString m_ID2;
162
163 FX_BOOL m_bLinearized;
164
165 FX_DWORD m_dwFirstPageNo;
166
167 FX_DWORD m_dwFirstPageObjNum;
168
169 CFX_DWordArray m_PageList;
170
171 int _GetPageCount() const;
172 CPDF_Dictionary* _FindPDFPage(CPDF_Dictionary* pPages,
173 int iPage,
174 int nPagesToGo,
175 int level);
176 int _FindPageIndex(CPDF_Dictionary* pNode,
177 FX_DWORD& skip_count,
178 FX_DWORD objnum,
179 int& index,
180 int level = 0);
181 FX_BOOL IsContentUsedElsewhere(FX_DWORD objnum, CPDF_Dictionary* pPageDict);
182 FX_BOOL CheckOCGVisible(CPDF_Dictionary* pOCG, FX_BOOL bPrinting);
183 CPDF_DocPageData* GetValidatePageData();
184 CPDF_DocRenderData* GetValidateRenderData();
185 friend class CPDF_Creator;
186 friend class CPDF_Parser;
187 friend class CPDF_DataAvail;
188 friend class CPDF_OCContext;
189
190 CPDF_DocPageData* m_pDocPage;
191
192 CPDF_DocRenderData* m_pDocRender;
193 };
194
195 #define PDFWORD_EOF 0
196 #define PDFWORD_NUMBER 1
197 #define PDFWORD_TEXT 2
198 #define PDFWORD_DELIMITER 3
199 #define PDFWORD_NAME 4
200 class CPDF_SimpleParser {
201 public:
202 CPDF_SimpleParser(const uint8_t* pData, FX_DWORD dwSize);
203
204 CPDF_SimpleParser(const CFX_ByteStringC& str);
205
206 CFX_ByteStringC GetWord();
207
208 FX_BOOL SearchToken(const CFX_ByteStringC& token);
209
210 FX_BOOL SkipWord(const CFX_ByteStringC& token);
211
212 FX_BOOL FindTagPair(const CFX_ByteStringC& start_token,
213 const CFX_ByteStringC& end_token,
214 FX_DWORD& start_pos,
215 FX_DWORD& end_pos);
216
217 FX_BOOL FindTagParam(const CFX_ByteStringC& token, int nParams);
218
GetPos()219 FX_DWORD GetPos() { return m_dwCurPos; }
220
SetPos(FX_DWORD pos)221 void SetPos(FX_DWORD pos) {
222 ASSERT(pos <= m_dwSize);
223 m_dwCurPos = pos;
224 }
225
226 private:
227 void ParseWord(const uint8_t*& pStart, FX_DWORD& dwSize, int& type);
228
229 const uint8_t* m_pData;
230
231 FX_DWORD m_dwSize;
232
233 FX_DWORD m_dwCurPos;
234 };
235 class CPDF_SyntaxParser {
236 public:
237 CPDF_SyntaxParser();
238 virtual ~CPDF_SyntaxParser();
239
240 void InitParser(IFX_FileRead* pFileAccess, FX_DWORD HeaderOffset);
241
SavePos()242 FX_FILESIZE SavePos() { return m_Pos; }
243
RestorePos(FX_FILESIZE pos)244 void RestorePos(FX_FILESIZE pos) { m_Pos = pos; }
245
246 CPDF_Object* GetObject(CPDF_IndirectObjectHolder* pObjList,
247 FX_DWORD objnum,
248 FX_DWORD gennum,
249 PARSE_CONTEXT* pContext,
250 FX_BOOL bDecrypt);
251
252 CPDF_Object* GetObjectByStrict(CPDF_IndirectObjectHolder* pObjList,
253 FX_DWORD objnum,
254 FX_DWORD gennum,
255 PARSE_CONTEXT* pContext);
256
257 int GetDirectNum();
258
259 CFX_ByteString GetString(FX_DWORD objnum, FX_DWORD gennum);
260
261 CFX_ByteString GetName();
262
263 CFX_ByteString GetKeyword();
264
265 void GetBinary(uint8_t* buffer, FX_DWORD size);
266
267 void ToNextLine();
268
269 void ToNextWord();
270
271 FX_BOOL SearchWord(const CFX_ByteStringC& word,
272 FX_BOOL bWholeWord,
273 FX_BOOL bForward,
274 FX_FILESIZE limit);
275
276 int SearchMultiWord(const CFX_ByteStringC& words,
277 FX_BOOL bWholeWord,
278 FX_FILESIZE limit);
279
280 FX_FILESIZE FindTag(const CFX_ByteStringC& tag, FX_FILESIZE limit);
281
SetEncrypt(CPDF_CryptoHandler * pCryptoHandler)282 void SetEncrypt(CPDF_CryptoHandler* pCryptoHandler) {
283 m_pCryptoHandler.reset(pCryptoHandler);
284 }
285
IsEncrypted()286 FX_BOOL IsEncrypted() { return m_pCryptoHandler != NULL; }
287
288 FX_BOOL GetCharAt(FX_FILESIZE pos, uint8_t& ch);
289
290 FX_BOOL ReadBlock(uint8_t* pBuf, FX_DWORD size);
291
292 CFX_ByteString GetNextWord(bool* bIsNumber);
293
294 protected:
295 friend class CPDF_Parser;
296 friend class CPDF_DataAvail;
297 friend class fpdf_parser_parser_ReadHexString_Test;
298
299 static const int kParserMaxRecursionDepth = 64;
300 static int s_CurrentRecursionDepth;
301
302 virtual FX_BOOL GetNextChar(uint8_t& ch);
303
304 FX_BOOL GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch);
305
306 void GetNextWordInternal(bool* bIsNumber);
307
308 bool IsWholeWord(FX_FILESIZE startpos,
309 FX_FILESIZE limit,
310 const CFX_ByteStringC& tag,
311 FX_BOOL checkKeyword);
312
313 CFX_ByteString ReadString();
314
315 CFX_ByteString ReadHexString();
316
317 unsigned int ReadEOLMarkers(FX_FILESIZE pos);
318
319 CPDF_Stream* ReadStream(CPDF_Dictionary* pDict,
320 PARSE_CONTEXT* pContext,
321 FX_DWORD objnum,
322 FX_DWORD gennum);
323
324 FX_FILESIZE m_Pos;
325
326 FX_BOOL m_bFileStream;
327
328 int m_MetadataObjnum;
329
330 IFX_FileRead* m_pFileAccess;
331
332 FX_DWORD m_HeaderOffset;
333
334 FX_FILESIZE m_FileLen;
335
336 uint8_t* m_pFileBuf;
337
338 FX_DWORD m_BufSize;
339
340 FX_FILESIZE m_BufOffset;
341
342 std::unique_ptr<CPDF_CryptoHandler> m_pCryptoHandler;
343
344 uint8_t m_WordBuffer[257];
345
346 FX_DWORD m_WordSize;
347
348 FX_FILESIZE m_dwWordPos;
349 };
350
351 #define PDFPARSE_TYPEONLY 1
352 #define PDFPARSE_NOSTREAM 2
353 struct PARSE_CONTEXT {
354 uint32_t m_Flags;
355 FX_FILESIZE m_DictStart;
356 FX_FILESIZE m_DictEnd;
357 FX_FILESIZE m_DataStart;
358 FX_FILESIZE m_DataEnd;
359 };
360
361 #define PDFPARSE_ERROR_SUCCESS 0
362 #define PDFPARSE_ERROR_FILE 1
363 #define PDFPARSE_ERROR_FORMAT 2
364 #define PDFPARSE_ERROR_PASSWORD 3
365 #define PDFPARSE_ERROR_HANDLER 4
366 #define PDFPARSE_ERROR_CERT 5
367
368 class CPDF_Parser {
369 public:
370 CPDF_Parser();
371 ~CPDF_Parser();
372
373 FX_DWORD StartParse(IFX_FileRead* pFile,
374 FX_BOOL bReParse = FALSE,
375 FX_BOOL bOwnFileRead = TRUE);
376
377 void CloseParser(FX_BOOL bReParse = FALSE);
378
379 FX_DWORD GetPermissions(FX_BOOL bCheckRevision = FALSE);
380
381 FX_BOOL IsOwner();
382
SetPassword(const FX_CHAR * password)383 void SetPassword(const FX_CHAR* password) { m_Password = password; }
384
GetPassword()385 CFX_ByteString GetPassword() { return m_Password; }
386
GetCryptoHandler()387 CPDF_CryptoHandler* GetCryptoHandler() {
388 return m_Syntax.m_pCryptoHandler.get();
389 }
390
391 void SetSecurityHandler(CPDF_SecurityHandler* pSecurityHandler,
392 FX_BOOL bForced = FALSE);
393
GetTrailer()394 CPDF_Dictionary* GetTrailer() { return m_pTrailer; }
395
GetLastXRefOffset()396 FX_FILESIZE GetLastXRefOffset() { return m_LastXRefOffset; }
397
GetDocument()398 CPDF_Document* GetDocument() { return m_pDocument; }
399
400 FX_DWORD GetRootObjNum();
401 FX_DWORD GetInfoObjNum();
402 CPDF_Array* GetIDArray();
403
GetEncryptDict()404 CPDF_Dictionary* GetEncryptDict() { return m_pEncryptDict; }
405
406 CPDF_Object* ParseIndirectObject(CPDF_IndirectObjectHolder* pObjList,
407 FX_DWORD objnum,
408 PARSE_CONTEXT* pContext = NULL);
409 FX_DWORD GetLastObjNum() const;
410 bool IsValidObjectNumber(FX_DWORD objnum) const;
411 FX_BOOL IsFormStream(FX_DWORD objnum, FX_BOOL& bForm);
412
413 FX_FILESIZE GetObjectOffset(FX_DWORD objnum) const;
414
415 FX_FILESIZE GetObjectSize(FX_DWORD objnum) const;
416
417 void GetIndirectBinary(FX_DWORD objnum, uint8_t*& pBuffer, FX_DWORD& size);
418
GetFileAccess()419 IFX_FileRead* GetFileAccess() const { return m_Syntax.m_pFileAccess; }
420
GetFileVersion()421 int GetFileVersion() const { return m_FileVersion; }
422
IsXRefStream()423 FX_BOOL IsXRefStream() const { return m_bXRefStream; }
424 CPDF_Object* ParseIndirectObjectAt(CPDF_IndirectObjectHolder* pObjList,
425 FX_FILESIZE pos,
426 FX_DWORD objnum,
427 PARSE_CONTEXT* pContext);
428
429 CPDF_Object* ParseIndirectObjectAtByStrict(
430 CPDF_IndirectObjectHolder* pObjList,
431 FX_FILESIZE pos,
432 FX_DWORD objnum,
433 PARSE_CONTEXT* pContext,
434 FX_FILESIZE* pResultPos);
435
436 FX_DWORD StartAsynParse(IFX_FileRead* pFile,
437 FX_BOOL bReParse = FALSE,
438 FX_BOOL bOwnFileRead = TRUE);
439
GetFirstPageNo()440 FX_DWORD GetFirstPageNo() { return m_dwFirstPageNo; }
441
442 protected:
443 CPDF_Object* ParseDirect(CPDF_Object* pObj);
444
445 FX_BOOL LoadAllCrossRefV4(FX_FILESIZE pos);
446
447 FX_BOOL LoadAllCrossRefV5(FX_FILESIZE pos);
448
449 bool LoadCrossRefV4(FX_FILESIZE pos, FX_FILESIZE streampos, FX_BOOL bSkip);
450
451 FX_BOOL LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef);
452
453 CPDF_Dictionary* LoadTrailerV4();
454
455 FX_BOOL RebuildCrossRef();
456
457 FX_DWORD SetEncryptHandler();
458
459 void ReleaseEncryptHandler();
460
461 FX_BOOL LoadLinearizedAllCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount);
462
463 FX_BOOL LoadLinearizedCrossRefV4(FX_FILESIZE pos, FX_DWORD dwObjCount);
464
465 FX_BOOL LoadLinearizedAllCrossRefV5(FX_FILESIZE pos);
466
467 FX_DWORD LoadLinearizedMainXRefTable();
468
469 CPDF_StreamAcc* GetObjectStream(FX_DWORD number);
470
471 FX_BOOL IsLinearizedFile(IFX_FileRead* pFileAccess, FX_DWORD offset);
472
473 bool FindPosInOffsets(FX_FILESIZE pos) const;
474
475 void SetEncryptDictionary(CPDF_Dictionary* pDict);
476
477 FX_FILESIZE GetObjectPositionOrZero(FX_DWORD objnum) const;
478 void ShrinkObjectMap(FX_DWORD size);
479
480 CPDF_Document* m_pDocument;
481
482 CPDF_SyntaxParser m_Syntax;
483 FX_BOOL m_bOwnFileRead;
484 int m_FileVersion;
485
486 CPDF_Dictionary* m_pTrailer;
487
488 CPDF_Dictionary* m_pEncryptDict;
489
490 FX_FILESIZE m_LastXRefOffset;
491
492 FX_BOOL m_bXRefStream;
493
494 std::unique_ptr<CPDF_SecurityHandler> m_pSecurityHandler;
495
496 FX_BOOL m_bForceUseSecurityHandler;
497
498 CFX_ByteString m_bsRecipient;
499
500 CFX_ByteString m_FilePath;
501
502 CFX_ByteString m_Password;
503
504 struct ObjectInfo {
ObjectInfoObjectInfo505 ObjectInfo() : pos(0) {}
506
507 FX_FILESIZE pos;
508 // TODO(thestig): Use fields below in place of |m_V5Type| and |m_ObjVersion|
509 #if 0
510 uint8_t type;
511 uint16_t gennum;
512 #endif
513 };
514 std::map<FX_DWORD, ObjectInfo> m_ObjectInfo;
515
516 CFX_ByteArray m_V5Type;
517 CFX_WordArray m_ObjVersion;
518
519 CFX_FileSizeArray m_SortedOffset;
520
521 CFX_ArrayTemplate<CPDF_Dictionary*> m_Trailers;
522
523 FX_BOOL m_bVersionUpdated;
524
525 CPDF_Object* m_pLinearized;
526
527 FX_DWORD m_dwFirstPageNo;
528
529 FX_DWORD m_dwXrefStartObjNum;
530
531 // A map of object numbers to indirect streams. Map owns the streams.
532 CFX_MapPtrToPtr m_ObjectStreamMap;
533
534 // Mapping of object numbers to offsets. The offsets are relative to the first
535 // object in the stream.
536 using StreamObjectCache = std::map<FX_DWORD, FX_DWORD>;
537
538 // Mapping of streams to their object caches. This is valid as long as the
539 // streams in |m_ObjectStreamMap| are valid.
540 std::map<CPDF_StreamAcc*, StreamObjectCache> m_ObjCache;
541
542 // All indirect object numbers that are being parsed.
543 std::set<FX_DWORD> m_ParsingObjNums;
544
545 friend class CPDF_Creator;
546 friend class CPDF_DataAvail;
547 };
548
549 #define FXCIPHER_NONE 0
550 #define FXCIPHER_RC4 1
551 #define FXCIPHER_AES 2
552 #define FXCIPHER_AES2 3
553 class CPDF_SecurityHandler {
554 public:
~CPDF_SecurityHandler()555 virtual ~CPDF_SecurityHandler() {}
556
557 virtual FX_BOOL OnInit(CPDF_Parser* pParser,
558 CPDF_Dictionary* pEncryptDict) = 0;
559
560 virtual FX_DWORD GetPermissions() = 0;
561
562 virtual FX_BOOL IsOwner() = 0;
563
564 virtual FX_BOOL GetCryptInfo(int& cipher,
565 const uint8_t*& buffer,
566 int& keylen) = 0;
567
IsMetadataEncrypted()568 virtual FX_BOOL IsMetadataEncrypted() { return TRUE; }
569
570 virtual CPDF_CryptoHandler* CreateCryptoHandler() = 0;
571
GetStandardHandler()572 virtual CPDF_StandardSecurityHandler* GetStandardHandler() { return NULL; }
573 };
574 #define PDF_ENCRYPT_CONTENT 0
575 class CPDF_StandardSecurityHandler : public CPDF_SecurityHandler {
576 public:
577 CPDF_StandardSecurityHandler();
578 ~CPDF_StandardSecurityHandler() override;
579
580 // CPDF_SecurityHandler
581 FX_BOOL OnInit(CPDF_Parser* pParser, CPDF_Dictionary* pEncryptDict) override;
582 FX_DWORD GetPermissions() override;
IsOwner()583 FX_BOOL IsOwner() override { return m_bOwner; }
584 FX_BOOL GetCryptInfo(int& cipher,
585 const uint8_t*& buffer,
586 int& keylen) override;
587 FX_BOOL IsMetadataEncrypted() override;
588 CPDF_CryptoHandler* CreateCryptoHandler() override;
GetStandardHandler()589 CPDF_StandardSecurityHandler* GetStandardHandler() override { return this; }
590
591 void OnCreate(CPDF_Dictionary* pEncryptDict,
592 CPDF_Array* pIdArray,
593 const uint8_t* user_pass,
594 FX_DWORD user_size,
595 const uint8_t* owner_pass,
596 FX_DWORD owner_size,
597 FX_DWORD type = PDF_ENCRYPT_CONTENT);
598
599 void OnCreate(CPDF_Dictionary* pEncryptDict,
600 CPDF_Array* pIdArray,
601 const uint8_t* user_pass,
602 FX_DWORD user_size,
603 FX_DWORD type = PDF_ENCRYPT_CONTENT);
604
605 CFX_ByteString GetUserPassword(const uint8_t* owner_pass, FX_DWORD pass_size);
606 CFX_ByteString GetUserPassword(const uint8_t* owner_pass,
607 FX_DWORD pass_size,
608 int32_t key_len);
GetVersion()609 int GetVersion() { return m_Version; }
GetRevision()610 int GetRevision() { return m_Revision; }
611
612 int CheckPassword(const uint8_t* password,
613 FX_DWORD pass_size,
614 FX_BOOL bOwner,
615 uint8_t* key);
616 int CheckPassword(const uint8_t* password,
617 FX_DWORD pass_size,
618 FX_BOOL bOwner,
619 uint8_t* key,
620 int key_len);
621
622 private:
623 int m_Version;
624
625 int m_Revision;
626
627 CPDF_Parser* m_pParser;
628
629 CPDF_Dictionary* m_pEncryptDict;
630
631 FX_BOOL LoadDict(CPDF_Dictionary* pEncryptDict);
632 FX_BOOL LoadDict(CPDF_Dictionary* pEncryptDict,
633 FX_DWORD type,
634 int& cipher,
635 int& key_len);
636
637 FX_BOOL CheckUserPassword(const uint8_t* password,
638 FX_DWORD pass_size,
639 FX_BOOL bIgnoreEncryptMeta,
640 uint8_t* key,
641 int32_t key_len);
642
643 FX_BOOL CheckOwnerPassword(const uint8_t* password,
644 FX_DWORD pass_size,
645 uint8_t* key,
646 int32_t key_len);
647 FX_BOOL AES256_CheckPassword(const uint8_t* password,
648 FX_DWORD size,
649 FX_BOOL bOwner,
650 uint8_t* key);
651 void AES256_SetPassword(CPDF_Dictionary* pEncryptDict,
652 const uint8_t* password,
653 FX_DWORD size,
654 FX_BOOL bOwner,
655 const uint8_t* key);
656 void AES256_SetPerms(CPDF_Dictionary* pEncryptDict,
657 FX_DWORD permission,
658 FX_BOOL bEncryptMetadata,
659 const uint8_t* key);
660 void OnCreate(CPDF_Dictionary* pEncryptDict,
661 CPDF_Array* pIdArray,
662 const uint8_t* user_pass,
663 FX_DWORD user_size,
664 const uint8_t* owner_pass,
665 FX_DWORD owner_size,
666 FX_BOOL bDefault,
667 FX_DWORD type);
668 FX_BOOL CheckSecurity(int32_t key_len);
669
670 FX_BOOL m_bOwner;
671
672 FX_DWORD m_Permissions;
673
674 int m_Cipher;
675
676 uint8_t m_EncryptKey[32];
677
678 int m_KeyLen;
679 };
680 class CPDF_CryptoHandler {
681 public:
~CPDF_CryptoHandler()682 virtual ~CPDF_CryptoHandler() {}
683
684 virtual FX_BOOL Init(CPDF_Dictionary* pEncryptDict,
685 CPDF_SecurityHandler* pSecurityHandler) = 0;
686
687 virtual FX_DWORD DecryptGetSize(FX_DWORD src_size) = 0;
688
689 virtual void* DecryptStart(FX_DWORD objnum, FX_DWORD gennum) = 0;
690
691 virtual FX_BOOL DecryptStream(void* context,
692 const uint8_t* src_buf,
693 FX_DWORD src_size,
694 CFX_BinaryBuf& dest_buf) = 0;
695
696 virtual FX_BOOL DecryptFinish(void* context, CFX_BinaryBuf& dest_buf) = 0;
697
698 virtual FX_DWORD EncryptGetSize(FX_DWORD objnum,
699 FX_DWORD version,
700 const uint8_t* src_buf,
701 FX_DWORD src_size) = 0;
702
703 virtual FX_BOOL EncryptContent(FX_DWORD objnum,
704 FX_DWORD version,
705 const uint8_t* src_buf,
706 FX_DWORD src_size,
707 uint8_t* dest_buf,
708 FX_DWORD& dest_size) = 0;
709
710 void Decrypt(FX_DWORD objnum, FX_DWORD version, CFX_ByteString& str);
711 };
712 class CPDF_StandardCryptoHandler : public CPDF_CryptoHandler {
713 public:
714 CPDF_StandardCryptoHandler();
715 ~CPDF_StandardCryptoHandler() override;
716
717 // CPDF_CryptoHandler
718 FX_BOOL Init(CPDF_Dictionary* pEncryptDict,
719 CPDF_SecurityHandler* pSecurityHandler) override;
720 FX_DWORD DecryptGetSize(FX_DWORD src_size) override;
721 void* DecryptStart(FX_DWORD objnum, FX_DWORD gennum) override;
722 FX_BOOL DecryptStream(void* context,
723 const uint8_t* src_buf,
724 FX_DWORD src_size,
725 CFX_BinaryBuf& dest_buf) override;
726 FX_BOOL DecryptFinish(void* context, CFX_BinaryBuf& dest_buf) override;
727 FX_DWORD EncryptGetSize(FX_DWORD objnum,
728 FX_DWORD version,
729 const uint8_t* src_buf,
730 FX_DWORD src_size) override;
731 FX_BOOL EncryptContent(FX_DWORD objnum,
732 FX_DWORD version,
733 const uint8_t* src_buf,
734 FX_DWORD src_size,
735 uint8_t* dest_buf,
736 FX_DWORD& dest_size) override;
737
738 FX_BOOL Init(int cipher, const uint8_t* key, int keylen);
739
740 protected:
741 virtual void CryptBlock(FX_BOOL bEncrypt,
742 FX_DWORD objnum,
743 FX_DWORD gennum,
744 const uint8_t* src_buf,
745 FX_DWORD src_size,
746 uint8_t* dest_buf,
747 FX_DWORD& dest_size);
748 virtual void* CryptStart(FX_DWORD objnum, FX_DWORD gennum, FX_BOOL bEncrypt);
749 virtual FX_BOOL CryptStream(void* context,
750 const uint8_t* src_buf,
751 FX_DWORD src_size,
752 CFX_BinaryBuf& dest_buf,
753 FX_BOOL bEncrypt);
754 virtual FX_BOOL CryptFinish(void* context,
755 CFX_BinaryBuf& dest_buf,
756 FX_BOOL bEncrypt);
757
758 uint8_t m_EncryptKey[32];
759
760 int m_KeyLen;
761
762 int m_Cipher;
763
764 uint8_t* m_pAESContext;
765 };
766 class CPDF_Point {
767 public:
CPDF_Point(FX_FLOAT xx,FX_FLOAT yy)768 CPDF_Point(FX_FLOAT xx, FX_FLOAT yy) {
769 x = xx;
770 y = yy;
771 }
772
773 FX_FLOAT x;
774
775 FX_FLOAT y;
776 };
777
778 #define CPDF_Rect CFX_FloatRect
779 CFX_ByteString PDF_NameDecode(const CFX_ByteStringC& orig);
780 CFX_ByteString PDF_NameDecode(const CFX_ByteString& orig);
781 CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig);
782 CFX_ByteString PDF_EncodeString(const CFX_ByteString& src,
783 FX_BOOL bHex = FALSE);
784 CFX_WideString PDF_DecodeText(const uint8_t* pData,
785 FX_DWORD size,
786 CFX_CharMap* pCharMap = NULL);
787 inline CFX_WideString PDF_DecodeText(const CFX_ByteString& bstr,
788 CFX_CharMap* pCharMap = NULL) {
789 return PDF_DecodeText((const uint8_t*)bstr.c_str(), bstr.GetLength(),
790 pCharMap);
791 }
792 CFX_ByteString PDF_EncodeText(const FX_WCHAR* pString,
793 int len = -1,
794 CFX_CharMap* pCharMap = NULL);
795 inline CFX_ByteString PDF_EncodeText(const CFX_WideString& str,
796 CFX_CharMap* pCharMap = NULL) {
797 return PDF_EncodeText(str.c_str(), str.GetLength(), pCharMap);
798 }
799 FX_FLOAT PDF_ClipFloat(FX_FLOAT f);
800 class CFDF_Document : public CPDF_IndirectObjectHolder {
801 public:
802 static CFDF_Document* CreateNewDoc();
803 static CFDF_Document* ParseFile(IFX_FileRead* pFile,
804 FX_BOOL bOwnFile = FALSE);
805 static CFDF_Document* ParseMemory(const uint8_t* pData, FX_DWORD size);
806
807 ~CFDF_Document();
808
809 FX_BOOL WriteBuf(CFX_ByteTextBuf& buf) const;
810
GetRoot()811 CPDF_Dictionary* GetRoot() const { return m_pRootDict; }
812
813 CFX_WideString GetWin32Path() const;
814
815 protected:
816 CFDF_Document();
817 void ParseStream(IFX_FileRead* pFile, FX_BOOL bOwnFile);
818 CPDF_Dictionary* m_pRootDict;
819 IFX_FileRead* m_pFile;
820 FX_BOOL m_bOwnFile;
821 };
822
823 CFX_WideString FPDF_FileSpec_GetWin32Path(const CPDF_Object* pFileSpec);
824 void FPDF_FileSpec_SetWin32Path(CPDF_Object* pFileSpec,
825 const CFX_WideString& fullpath);
826
827 void FlateEncode(const uint8_t* src_buf,
828 FX_DWORD src_size,
829 uint8_t*& dest_buf,
830 FX_DWORD& dest_size);
831 void FlateEncode(const uint8_t* src_buf,
832 FX_DWORD src_size,
833 int predictor,
834 int Colors,
835 int BitsPerComponent,
836 int Columns,
837 uint8_t*& dest_buf,
838 FX_DWORD& dest_size);
839 FX_DWORD FlateDecode(const uint8_t* src_buf,
840 FX_DWORD src_size,
841 uint8_t*& dest_buf,
842 FX_DWORD& dest_size);
843 FX_DWORD RunLengthDecode(const uint8_t* src_buf,
844 FX_DWORD src_size,
845 uint8_t*& dest_buf,
846 FX_DWORD& dest_size);
847 bool IsSignatureDict(const CPDF_Dictionary* pDict);
848
849 class CPDF_NumberTree {
850 public:
CPDF_NumberTree(CPDF_Dictionary * pRoot)851 CPDF_NumberTree(CPDF_Dictionary* pRoot) { m_pRoot = pRoot; }
852
853 CPDF_Object* LookupValue(int num);
854
855 protected:
856 CPDF_Dictionary* m_pRoot;
857 };
858
859 class IFX_FileAvail {
860 public:
~IFX_FileAvail()861 virtual ~IFX_FileAvail() {}
862 virtual FX_BOOL IsDataAvail(FX_FILESIZE offset, FX_DWORD size) = 0;
863 };
864 class IFX_DownloadHints {
865 public:
~IFX_DownloadHints()866 virtual ~IFX_DownloadHints() {}
867 virtual void AddSegment(FX_FILESIZE offset, FX_DWORD size) = 0;
868 };
869
870 class IPDF_DataAvail {
871 public:
872 // Must match PDF_DATA_* definitions in public/fpdf_dataavail.h, but cannot
873 // #include that header. fpdfsdk/src/fpdf_dataavail.cpp has static_asserts
874 // to make sure the two sets of values match.
875 enum DocAvailStatus {
876 DataError = -1, // PDF_DATA_ERROR
877 DataNotAvailable = 0, // PDF_DATA_NOTAVAIL
878 DataAvailable = 1, // PDF_DATA_AVAIL
879 };
880
881 // Must match PDF_*LINEAR* definitions in public/fpdf_dataavail.h, but cannot
882 // #include that header. fpdfsdk/src/fpdf_dataavail.cpp has static_asserts
883 // to make sure the two sets of values match.
884 enum DocLinearizationStatus {
885 LinearizationUnknown = -1, // PDF_LINEARIZATION_UNKNOWN
886 NotLinearized = 0, // PDF_NOT_LINEARIZED
887 Linearized = 1, // PDF_LINEARIZED
888 };
889
890 // Must match PDF_FORM_* definitions in public/fpdf_dataavail.h, but cannot
891 // #include that header. fpdfsdk/src/fpdf_dataavail.cpp has static_asserts
892 // to make sure the two sets of values match.
893 enum DocFormStatus {
894 FormError = -1, // PDF_FORM_ERROR
895 FormNotAvailable = 0, // PDF_FORM_NOTAVAIL
896 FormAvailable = 1, // PDF_FORM_AVAIL
897 FormNotExist = 2, // PDF_FORM_NOTEXIST
898 };
899
900 static IPDF_DataAvail* Create(IFX_FileAvail* pFileAvail,
901 IFX_FileRead* pFileRead);
~IPDF_DataAvail()902 virtual ~IPDF_DataAvail() {}
903
GetFileAvail()904 IFX_FileAvail* GetFileAvail() const { return m_pFileAvail; }
GetFileRead()905 IFX_FileRead* GetFileRead() const { return m_pFileRead; }
906
907 virtual DocAvailStatus IsDocAvail(IFX_DownloadHints* pHints) = 0;
908 virtual void SetDocument(CPDF_Document* pDoc) = 0;
909 virtual DocAvailStatus IsPageAvail(int iPage, IFX_DownloadHints* pHints) = 0;
910 virtual FX_BOOL IsLinearized() = 0;
911 virtual DocFormStatus IsFormAvail(IFX_DownloadHints* pHints) = 0;
912 virtual DocLinearizationStatus IsLinearizedPDF() = 0;
913 virtual void GetLinearizedMainXRefInfo(FX_FILESIZE* pPos,
914 FX_DWORD* pSize) = 0;
915
916 protected:
917 IPDF_DataAvail(IFX_FileAvail* pFileAvail, IFX_FileRead* pFileRead);
918
919 IFX_FileAvail* m_pFileAvail;
920 IFX_FileRead* m_pFileRead;
921 };
922
923 enum PDF_PAGENODE_TYPE {
924 PDF_PAGENODE_UNKOWN = 0,
925 PDF_PAGENODE_PAGE,
926 PDF_PAGENODE_PAGES,
927 PDF_PAGENODE_ARRAY,
928 };
929 class CPDF_PageNode {
930 public:
CPDF_PageNode()931 CPDF_PageNode() : m_type(PDF_PAGENODE_UNKOWN) {}
932 ~CPDF_PageNode();
933 PDF_PAGENODE_TYPE m_type;
934 FX_DWORD m_dwPageNo;
935 CFX_ArrayTemplate<CPDF_PageNode*> m_childNode;
936 };
937 enum PDF_DATAAVAIL_STATUS {
938 PDF_DATAAVAIL_HEADER = 0,
939 PDF_DATAAVAIL_FIRSTPAGE,
940 PDF_DATAAVAIL_FIRSTPAGE_PREPARE,
941 PDF_DATAAVAIL_HINTTABLE,
942 PDF_DATAAVAIL_END,
943 PDF_DATAAVAIL_CROSSREF,
944 PDF_DATAAVAIL_CROSSREF_ITEM,
945 PDF_DATAAVAIL_CROSSREF_STREAM,
946 PDF_DATAAVAIL_TRAILER,
947 PDF_DATAAVAIL_LOADALLCROSSREF,
948 PDF_DATAAVAIL_ROOT,
949 PDF_DATAAVAIL_INFO,
950 PDF_DATAAVAIL_ACROFORM,
951 PDF_DATAAVAIL_ACROFORM_SUBOBJECT,
952 PDF_DATAAVAIL_PAGETREE,
953 PDF_DATAAVAIL_PAGE,
954 PDF_DATAAVAIL_PAGE_LATERLOAD,
955 PDF_DATAAVAIL_RESOURCES,
956 PDF_DATAAVAIL_DONE,
957 PDF_DATAAVAIL_ERROR,
958 PDF_DATAAVAIL_LOADALLFILE,
959 PDF_DATAAVAIL_TRAILER_APPEND
960 };
961
962 // Public for testing.
963 FX_DWORD A85Decode(const uint8_t* src_buf,
964 FX_DWORD src_size,
965 uint8_t*& dest_buf,
966 FX_DWORD& dest_size);
967 // Public for testing.
968 FX_DWORD HexDecode(const uint8_t* src_buf,
969 FX_DWORD src_size,
970 uint8_t*& dest_buf,
971 FX_DWORD& dest_size);
972 // Public for testing.
973 FX_DWORD FPDFAPI_FlateOrLZWDecode(FX_BOOL bLZW,
974 const uint8_t* src_buf,
975 FX_DWORD src_size,
976 CPDF_Dictionary* pParams,
977 FX_DWORD estimated_size,
978 uint8_t*& dest_buf,
979 FX_DWORD& dest_size);
980 FX_BOOL PDF_DataDecode(const uint8_t* src_buf,
981 FX_DWORD src_size,
982 const CPDF_Dictionary* pDict,
983 uint8_t*& dest_buf,
984 FX_DWORD& dest_size,
985 CFX_ByteString& ImageEncoding,
986 CPDF_Dictionary*& pImageParms,
987 FX_DWORD estimated_size,
988 FX_BOOL bImageAcc);
989
990 #endif // CORE_INCLUDE_FPDFAPI_FPDF_PARSER_H_
991