1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
8
9 #include <algorithm>
10 #include <utility>
11 #include <vector>
12
13 #include "core/fpdfapi/cpdf_modulemgr.h"
14 #include "core/fpdfapi/parser/cpdf_array.h"
15 #include "core/fpdfapi/parser/cpdf_boolean.h"
16 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
17 #include "core/fpdfapi/parser/cpdf_dictionary.h"
18 #include "core/fpdfapi/parser/cpdf_name.h"
19 #include "core/fpdfapi/parser/cpdf_null.h"
20 #include "core/fpdfapi/parser/cpdf_number.h"
21 #include "core/fpdfapi/parser/cpdf_reference.h"
22 #include "core/fpdfapi/parser/cpdf_stream.h"
23 #include "core/fpdfapi/parser/cpdf_string.h"
24 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
25 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
26 #include "core/fxcrt/fx_ext.h"
27 #include "third_party/base/numerics/safe_math.h"
28 #include "third_party/base/ptr_util.h"
29
30 namespace {
31
32 enum class ReadStatus { Normal, Backslash, Octal, FinishOctal, CarriageReturn };
33
34 } // namespace
35
36 // static
37 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
38
CPDF_SyntaxParser()39 CPDF_SyntaxParser::CPDF_SyntaxParser()
40 : CPDF_SyntaxParser(CFX_WeakPtr<CFX_ByteStringPool>()) {}
41
CPDF_SyntaxParser(const CFX_WeakPtr<CFX_ByteStringPool> & pPool)42 CPDF_SyntaxParser::CPDF_SyntaxParser(
43 const CFX_WeakPtr<CFX_ByteStringPool>& pPool)
44 : m_MetadataObjnum(0),
45 m_pFileAccess(nullptr),
46 m_pFileBuf(nullptr),
47 m_BufSize(CPDF_ModuleMgr::kFileBufSize),
48 m_pPool(pPool) {}
49
~CPDF_SyntaxParser()50 CPDF_SyntaxParser::~CPDF_SyntaxParser() {
51 FX_Free(m_pFileBuf);
52 }
53
GetCharAt(FX_FILESIZE pos,uint8_t & ch)54 bool CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
55 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
56 m_Pos = pos;
57 return GetNextChar(ch);
58 }
59
ReadChar(FX_FILESIZE read_pos,uint32_t read_size)60 bool CPDF_SyntaxParser::ReadChar(FX_FILESIZE read_pos, uint32_t read_size) {
61 if (static_cast<FX_FILESIZE>(read_pos + read_size) > m_FileLen) {
62 if (m_FileLen < static_cast<FX_FILESIZE>(read_size)) {
63 read_pos = 0;
64 read_size = static_cast<uint32_t>(m_FileLen);
65 } else {
66 read_pos = m_FileLen - read_size;
67 }
68 }
69 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
70 return false;
71
72 m_BufOffset = read_pos;
73 return true;
74 }
75
GetNextChar(uint8_t & ch)76 bool CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
77 FX_FILESIZE pos = m_Pos + m_HeaderOffset;
78 if (pos >= m_FileLen)
79 return false;
80
81 if (CheckPosition(pos)) {
82 FX_FILESIZE read_pos = pos;
83 uint32_t read_size = m_BufSize;
84 read_size = std::min(read_size, static_cast<uint32_t>(m_FileLen));
85 if (!ReadChar(read_pos, read_size))
86 return false;
87 }
88 ch = m_pFileBuf[pos - m_BufOffset];
89 m_Pos++;
90 return true;
91 }
92
GetCharAtBackward(FX_FILESIZE pos,uint8_t & ch)93 bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {
94 pos += m_HeaderOffset;
95 if (pos >= m_FileLen)
96 return false;
97
98 if (CheckPosition(pos)) {
99 FX_FILESIZE read_pos;
100 if (pos < static_cast<FX_FILESIZE>(m_BufSize))
101 read_pos = 0;
102 else
103 read_pos = pos - m_BufSize + 1;
104 uint32_t read_size = m_BufSize;
105 if (!ReadChar(read_pos, read_size))
106 return false;
107 }
108 ch = m_pFileBuf[pos - m_BufOffset];
109 return true;
110 }
111
ReadBlock(uint8_t * pBuf,uint32_t size)112 bool CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, uint32_t size) {
113 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))
114 return false;
115 m_Pos += size;
116 return true;
117 }
118
GetNextWordInternal(bool * bIsNumber)119 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
120 m_WordSize = 0;
121 if (bIsNumber)
122 *bIsNumber = true;
123
124 uint8_t ch;
125 if (!GetNextChar(ch))
126 return;
127
128 while (1) {
129 while (PDFCharIsWhitespace(ch)) {
130 if (!GetNextChar(ch))
131 return;
132 }
133
134 if (ch != '%')
135 break;
136
137 while (1) {
138 if (!GetNextChar(ch))
139 return;
140 if (PDFCharIsLineEnding(ch))
141 break;
142 }
143 }
144
145 if (PDFCharIsDelimiter(ch)) {
146 if (bIsNumber)
147 *bIsNumber = false;
148
149 m_WordBuffer[m_WordSize++] = ch;
150 if (ch == '/') {
151 while (1) {
152 if (!GetNextChar(ch))
153 return;
154
155 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
156 m_Pos--;
157 return;
158 }
159
160 if (m_WordSize < sizeof(m_WordBuffer) - 1)
161 m_WordBuffer[m_WordSize++] = ch;
162 }
163 } else if (ch == '<') {
164 if (!GetNextChar(ch))
165 return;
166
167 if (ch == '<')
168 m_WordBuffer[m_WordSize++] = ch;
169 else
170 m_Pos--;
171 } else if (ch == '>') {
172 if (!GetNextChar(ch))
173 return;
174
175 if (ch == '>')
176 m_WordBuffer[m_WordSize++] = ch;
177 else
178 m_Pos--;
179 }
180 return;
181 }
182
183 while (1) {
184 if (m_WordSize < sizeof(m_WordBuffer) - 1)
185 m_WordBuffer[m_WordSize++] = ch;
186
187 if (!PDFCharIsNumeric(ch)) {
188 if (bIsNumber)
189 *bIsNumber = false;
190 }
191
192 if (!GetNextChar(ch))
193 return;
194
195 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
196 m_Pos--;
197 break;
198 }
199 }
200 }
201
ReadString()202 CFX_ByteString CPDF_SyntaxParser::ReadString() {
203 uint8_t ch;
204 if (!GetNextChar(ch))
205 return CFX_ByteString();
206
207 CFX_ByteTextBuf buf;
208 int32_t parlevel = 0;
209 ReadStatus status = ReadStatus::Normal;
210 int32_t iEscCode = 0;
211 while (1) {
212 switch (status) {
213 case ReadStatus::Normal:
214 if (ch == ')') {
215 if (parlevel == 0)
216 return buf.MakeString();
217 parlevel--;
218 } else if (ch == '(') {
219 parlevel++;
220 }
221 if (ch == '\\')
222 status = ReadStatus::Backslash;
223 else
224 buf.AppendChar(ch);
225 break;
226 case ReadStatus::Backslash:
227 if (ch >= '0' && ch <= '7') {
228 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
229 status = ReadStatus::Octal;
230 break;
231 }
232
233 if (ch == 'n') {
234 buf.AppendChar('\n');
235 } else if (ch == 'r') {
236 buf.AppendChar('\r');
237 } else if (ch == 't') {
238 buf.AppendChar('\t');
239 } else if (ch == 'b') {
240 buf.AppendChar('\b');
241 } else if (ch == 'f') {
242 buf.AppendChar('\f');
243 } else if (ch == '\r') {
244 status = ReadStatus::CarriageReturn;
245 break;
246 } else if (ch != '\n') {
247 buf.AppendChar(ch);
248 }
249 status = ReadStatus::Normal;
250 break;
251 case ReadStatus::Octal:
252 if (ch >= '0' && ch <= '7') {
253 iEscCode =
254 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
255 status = ReadStatus::FinishOctal;
256 } else {
257 buf.AppendChar(iEscCode);
258 status = ReadStatus::Normal;
259 continue;
260 }
261 break;
262 case ReadStatus::FinishOctal:
263 status = ReadStatus::Normal;
264 if (ch >= '0' && ch <= '7') {
265 iEscCode =
266 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
267 buf.AppendChar(iEscCode);
268 } else {
269 buf.AppendChar(iEscCode);
270 continue;
271 }
272 break;
273 case ReadStatus::CarriageReturn:
274 status = ReadStatus::Normal;
275 if (ch != '\n')
276 continue;
277 break;
278 }
279
280 if (!GetNextChar(ch))
281 break;
282 }
283
284 GetNextChar(ch);
285 return buf.MakeString();
286 }
287
ReadHexString()288 CFX_ByteString CPDF_SyntaxParser::ReadHexString() {
289 uint8_t ch;
290 if (!GetNextChar(ch))
291 return CFX_ByteString();
292
293 CFX_ByteTextBuf buf;
294 bool bFirst = true;
295 uint8_t code = 0;
296 while (1) {
297 if (ch == '>')
298 break;
299
300 if (std::isxdigit(ch)) {
301 int val = FXSYS_toHexDigit(ch);
302 if (bFirst) {
303 code = val * 16;
304 } else {
305 code += val;
306 buf.AppendByte(code);
307 }
308 bFirst = !bFirst;
309 }
310
311 if (!GetNextChar(ch))
312 break;
313 }
314 if (!bFirst)
315 buf.AppendByte(code);
316
317 return buf.MakeString();
318 }
319
ToNextLine()320 void CPDF_SyntaxParser::ToNextLine() {
321 uint8_t ch;
322 while (GetNextChar(ch)) {
323 if (ch == '\n')
324 break;
325
326 if (ch == '\r') {
327 GetNextChar(ch);
328 if (ch != '\n')
329 --m_Pos;
330 break;
331 }
332 }
333 }
334
ToNextWord()335 void CPDF_SyntaxParser::ToNextWord() {
336 uint8_t ch;
337 if (!GetNextChar(ch))
338 return;
339
340 while (1) {
341 while (PDFCharIsWhitespace(ch)) {
342 if (!GetNextChar(ch))
343 return;
344 }
345
346 if (ch != '%')
347 break;
348
349 while (1) {
350 if (!GetNextChar(ch))
351 return;
352 if (PDFCharIsLineEnding(ch))
353 break;
354 }
355 }
356 m_Pos--;
357 }
358
GetNextWord(bool * bIsNumber)359 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
360 GetNextWordInternal(bIsNumber);
361 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);
362 }
363
GetKeyword()364 CFX_ByteString CPDF_SyntaxParser::GetKeyword() {
365 return GetNextWord(nullptr);
366 }
367
GetObject(CPDF_IndirectObjectHolder * pObjList,uint32_t objnum,uint32_t gennum,bool bDecrypt)368 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetObject(
369 CPDF_IndirectObjectHolder* pObjList,
370 uint32_t objnum,
371 uint32_t gennum,
372 bool bDecrypt) {
373 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
374 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
375 return nullptr;
376
377 FX_FILESIZE SavedObjPos = m_Pos;
378 bool bIsNumber;
379 CFX_ByteString word = GetNextWord(&bIsNumber);
380 if (word.GetLength() == 0)
381 return nullptr;
382
383 if (bIsNumber) {
384 FX_FILESIZE SavedPos = m_Pos;
385 CFX_ByteString nextword = GetNextWord(&bIsNumber);
386 if (bIsNumber) {
387 CFX_ByteString nextword2 = GetNextWord(nullptr);
388 if (nextword2 == "R") {
389 uint32_t objnum = FXSYS_atoui(word.c_str());
390 if (objnum == CPDF_Object::kInvalidObjNum)
391 return nullptr;
392 return pdfium::MakeUnique<CPDF_Reference>(pObjList, objnum);
393 }
394 }
395 m_Pos = SavedPos;
396 return pdfium::MakeUnique<CPDF_Number>(word.AsStringC());
397 }
398
399 if (word == "true" || word == "false")
400 return pdfium::MakeUnique<CPDF_Boolean>(word == "true");
401
402 if (word == "null")
403 return pdfium::MakeUnique<CPDF_Null>();
404
405 if (word == "(") {
406 CFX_ByteString str = ReadString();
407 if (m_pCryptoHandler && bDecrypt)
408 m_pCryptoHandler->Decrypt(objnum, gennum, str);
409 return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false);
410 }
411 if (word == "<") {
412 CFX_ByteString str = ReadHexString();
413 if (m_pCryptoHandler && bDecrypt)
414 m_pCryptoHandler->Decrypt(objnum, gennum, str);
415 return pdfium::MakeUnique<CPDF_String>(m_pPool, str, true);
416 }
417 if (word == "[") {
418 std::unique_ptr<CPDF_Array> pArray = pdfium::MakeUnique<CPDF_Array>();
419 while (std::unique_ptr<CPDF_Object> pObj =
420 GetObject(pObjList, objnum, gennum, true)) {
421 pArray->Add(std::move(pObj));
422 }
423 return std::move(pArray);
424 }
425 if (word[0] == '/') {
426 return pdfium::MakeUnique<CPDF_Name>(
427 m_pPool,
428 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
429 }
430 if (word == "<<") {
431 int32_t nKeys = 0;
432 FX_FILESIZE dwSignValuePos = 0;
433 std::unique_ptr<CPDF_Dictionary> pDict =
434 pdfium::MakeUnique<CPDF_Dictionary>(m_pPool);
435 while (1) {
436 CFX_ByteString key = GetNextWord(nullptr);
437 if (key.IsEmpty())
438 return nullptr;
439
440 FX_FILESIZE SavedPos = m_Pos - key.GetLength();
441 if (key == ">>")
442 break;
443
444 if (key == "endobj") {
445 m_Pos = SavedPos;
446 break;
447 }
448 if (key[0] != '/')
449 continue;
450
451 ++nKeys;
452 key = PDF_NameDecode(key);
453 if (key.IsEmpty())
454 continue;
455
456 if (key == "/Contents")
457 dwSignValuePos = m_Pos;
458
459 std::unique_ptr<CPDF_Object> pObj =
460 GetObject(pObjList, objnum, gennum, true);
461 if (!pObj)
462 continue;
463
464 CFX_ByteString keyNoSlash(key.raw_str() + 1, key.GetLength() - 1);
465 pDict->SetFor(keyNoSlash, std::move(pObj));
466 }
467
468 // Only when this is a signature dictionary and has contents, we reset the
469 // contents to the un-decrypted form.
470 if (pDict->IsSignatureDict() && dwSignValuePos) {
471 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
472 m_Pos = dwSignValuePos;
473 pDict->SetFor("Contents", GetObject(pObjList, objnum, gennum, false));
474 }
475
476 FX_FILESIZE SavedPos = m_Pos;
477 CFX_ByteString nextword = GetNextWord(nullptr);
478 if (nextword != "stream") {
479 m_Pos = SavedPos;
480 return std::move(pDict);
481 }
482 return ReadStream(std::move(pDict), objnum, gennum);
483 }
484 if (word == ">>")
485 m_Pos = SavedObjPos;
486
487 return nullptr;
488 }
489
GetObjectForStrict(CPDF_IndirectObjectHolder * pObjList,uint32_t objnum,uint32_t gennum)490 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetObjectForStrict(
491 CPDF_IndirectObjectHolder* pObjList,
492 uint32_t objnum,
493 uint32_t gennum) {
494 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
495 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
496 return nullptr;
497
498 FX_FILESIZE SavedObjPos = m_Pos;
499 bool bIsNumber;
500 CFX_ByteString word = GetNextWord(&bIsNumber);
501 if (word.GetLength() == 0)
502 return nullptr;
503
504 if (bIsNumber) {
505 FX_FILESIZE SavedPos = m_Pos;
506 CFX_ByteString nextword = GetNextWord(&bIsNumber);
507 if (bIsNumber) {
508 CFX_ByteString nextword2 = GetNextWord(nullptr);
509 if (nextword2 == "R") {
510 uint32_t objnum = FXSYS_atoui(word.c_str());
511 if (objnum == CPDF_Object::kInvalidObjNum)
512 return nullptr;
513 return pdfium::MakeUnique<CPDF_Reference>(pObjList, objnum);
514 }
515 }
516 m_Pos = SavedPos;
517 return pdfium::MakeUnique<CPDF_Number>(word.AsStringC());
518 }
519
520 if (word == "true" || word == "false")
521 return pdfium::MakeUnique<CPDF_Boolean>(word == "true");
522
523 if (word == "null")
524 return pdfium::MakeUnique<CPDF_Null>();
525
526 if (word == "(") {
527 CFX_ByteString str = ReadString();
528 if (m_pCryptoHandler)
529 m_pCryptoHandler->Decrypt(objnum, gennum, str);
530 return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false);
531 }
532 if (word == "<") {
533 CFX_ByteString str = ReadHexString();
534 if (m_pCryptoHandler)
535 m_pCryptoHandler->Decrypt(objnum, gennum, str);
536 return pdfium::MakeUnique<CPDF_String>(m_pPool, str, true);
537 }
538 if (word == "[") {
539 std::unique_ptr<CPDF_Array> pArray = pdfium::MakeUnique<CPDF_Array>();
540 while (std::unique_ptr<CPDF_Object> pObj =
541 GetObject(pObjList, objnum, gennum, true)) {
542 pArray->Add(std::move(pObj));
543 }
544 return m_WordBuffer[0] == ']' ? std::move(pArray) : nullptr;
545 }
546 if (word[0] == '/') {
547 return pdfium::MakeUnique<CPDF_Name>(
548 m_pPool,
549 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
550 }
551 if (word == "<<") {
552 std::unique_ptr<CPDF_Dictionary> pDict =
553 pdfium::MakeUnique<CPDF_Dictionary>(m_pPool);
554 while (1) {
555 FX_FILESIZE SavedPos = m_Pos;
556 CFX_ByteString key = GetNextWord(nullptr);
557 if (key.IsEmpty())
558 return nullptr;
559
560 if (key == ">>")
561 break;
562
563 if (key == "endobj") {
564 m_Pos = SavedPos;
565 break;
566 }
567 if (key[0] != '/')
568 continue;
569
570 key = PDF_NameDecode(key);
571 std::unique_ptr<CPDF_Object> obj(
572 GetObject(pObjList, objnum, gennum, true));
573 if (!obj) {
574 uint8_t ch;
575 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {
576 continue;
577 }
578 return nullptr;
579 }
580
581 if (key.GetLength() > 1) {
582 pDict->SetFor(CFX_ByteString(key.c_str() + 1, key.GetLength() - 1),
583 std::move(obj));
584 }
585 }
586
587 FX_FILESIZE SavedPos = m_Pos;
588 CFX_ByteString nextword = GetNextWord(nullptr);
589 if (nextword != "stream") {
590 m_Pos = SavedPos;
591 return std::move(pDict);
592 }
593 return ReadStream(std::move(pDict), objnum, gennum);
594 }
595 if (word == ">>")
596 m_Pos = SavedObjPos;
597
598 return nullptr;
599 }
600
ReadEOLMarkers(FX_FILESIZE pos)601 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
602 unsigned char byte1 = 0;
603 unsigned char byte2 = 0;
604
605 GetCharAt(pos, byte1);
606 GetCharAt(pos + 1, byte2);
607
608 if (byte1 == '\r' && byte2 == '\n')
609 return 2;
610
611 if (byte1 == '\r' || byte1 == '\n')
612 return 1;
613
614 return 0;
615 }
616
ReadStream(std::unique_ptr<CPDF_Dictionary> pDict,uint32_t objnum,uint32_t gennum)617 std::unique_ptr<CPDF_Stream> CPDF_SyntaxParser::ReadStream(
618 std::unique_ptr<CPDF_Dictionary> pDict,
619 uint32_t objnum,
620 uint32_t gennum) {
621 CPDF_Object* pLenObj = pDict->GetObjectFor("Length");
622 FX_FILESIZE len = -1;
623 CPDF_Reference* pLenObjRef = ToReference(pLenObj);
624
625 bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() &&
626 pLenObjRef->GetRefObjNum() != objnum);
627 if (pLenObj && differingObjNum)
628 len = pLenObj->GetInteger();
629
630 // Locate the start of stream.
631 ToNextLine();
632 FX_FILESIZE streamStartPos = m_Pos;
633
634 const CFX_ByteStringC kEndStreamStr("endstream");
635 const CFX_ByteStringC kEndObjStr("endobj");
636
637 CPDF_CryptoHandler* pCryptoHandler =
638 objnum == m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();
639 if (!pCryptoHandler) {
640 bool bSearchForKeyword = true;
641 if (len >= 0) {
642 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
643 pos += len;
644 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)
645 m_Pos = pos.ValueOrDie();
646
647 m_Pos += ReadEOLMarkers(m_Pos);
648 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
649 GetNextWordInternal(nullptr);
650 // Earlier version of PDF specification doesn't require EOL marker before
651 // 'endstream' keyword. If keyword 'endstream' follows the bytes in
652 // specified length, it signals the end of stream.
653 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.raw_str(),
654 kEndStreamStr.GetLength()) == 0) {
655 bSearchForKeyword = false;
656 }
657 }
658
659 if (bSearchForKeyword) {
660 // If len is not available, len needs to be calculated
661 // by searching the keywords "endstream" or "endobj".
662 m_Pos = streamStartPos;
663 FX_FILESIZE endStreamOffset = 0;
664 while (endStreamOffset >= 0) {
665 endStreamOffset = FindTag(kEndStreamStr, 0);
666
667 // Can't find "endstream".
668 if (endStreamOffset < 0)
669 break;
670
671 // Stop searching when "endstream" is found.
672 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,
673 kEndStreamStr, true)) {
674 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();
675 break;
676 }
677 }
678
679 m_Pos = streamStartPos;
680 FX_FILESIZE endObjOffset = 0;
681 while (endObjOffset >= 0) {
682 endObjOffset = FindTag(kEndObjStr, 0);
683
684 // Can't find "endobj".
685 if (endObjOffset < 0)
686 break;
687
688 // Stop searching when "endobj" is found.
689 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,
690 true)) {
691 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();
692 break;
693 }
694 }
695
696 // Can't find "endstream" or "endobj".
697 if (endStreamOffset < 0 && endObjOffset < 0)
698 return nullptr;
699
700 if (endStreamOffset < 0 && endObjOffset >= 0) {
701 // Correct the position of end stream.
702 endStreamOffset = endObjOffset;
703 } else if (endStreamOffset >= 0 && endObjOffset < 0) {
704 // Correct the position of end obj.
705 endObjOffset = endStreamOffset;
706 } else if (endStreamOffset > endObjOffset) {
707 endStreamOffset = endObjOffset;
708 }
709 len = endStreamOffset;
710
711 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
712 if (numMarkers == 2) {
713 len -= 2;
714 } else {
715 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
716 if (numMarkers == 1) {
717 len -= 1;
718 }
719 }
720 if (len < 0)
721 return nullptr;
722
723 pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(len));
724 }
725 m_Pos = streamStartPos;
726 }
727 if (len < 0)
728 return nullptr;
729
730 std::unique_ptr<uint8_t, FxFreeDeleter> pData;
731 if (len > 0) {
732 pData.reset(FX_Alloc(uint8_t, len));
733 ReadBlock(pData.get(), len);
734 if (pCryptoHandler) {
735 CFX_BinaryBuf dest_buf;
736 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));
737
738 void* context = pCryptoHandler->DecryptStart(objnum, gennum);
739 pCryptoHandler->DecryptStream(context, pData.get(), len, dest_buf);
740 pCryptoHandler->DecryptFinish(context, dest_buf);
741 len = dest_buf.GetSize();
742 pData = dest_buf.DetachBuffer();
743 }
744 }
745
746 auto pStream =
747 pdfium::MakeUnique<CPDF_Stream>(std::move(pData), len, std::move(pDict));
748 streamStartPos = m_Pos;
749 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
750 GetNextWordInternal(nullptr);
751
752 int numMarkers = ReadEOLMarkers(m_Pos);
753 if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) &&
754 numMarkers != 0 &&
755 FXSYS_memcmp(m_WordBuffer, kEndObjStr.raw_str(),
756 kEndObjStr.GetLength()) == 0) {
757 m_Pos = streamStartPos;
758 }
759 return pStream;
760 }
761
InitParser(const CFX_RetainPtr<IFX_SeekableReadStream> & pFileAccess,uint32_t HeaderOffset)762 void CPDF_SyntaxParser::InitParser(
763 const CFX_RetainPtr<IFX_SeekableReadStream>& pFileAccess,
764 uint32_t HeaderOffset) {
765 FX_Free(m_pFileBuf);
766
767 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);
768 m_HeaderOffset = HeaderOffset;
769 m_FileLen = pFileAccess->GetSize();
770 m_Pos = 0;
771 m_pFileAccess = pFileAccess;
772 m_BufOffset = 0;
773 pFileAccess->ReadBlock(m_pFileBuf, 0,
774 std::min(m_BufSize, static_cast<uint32_t>(m_FileLen)));
775 }
776
GetDirectNum()777 uint32_t CPDF_SyntaxParser::GetDirectNum() {
778 bool bIsNumber;
779 GetNextWordInternal(&bIsNumber);
780 if (!bIsNumber)
781 return 0;
782
783 m_WordBuffer[m_WordSize] = 0;
784 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));
785 }
786
IsWholeWord(FX_FILESIZE startpos,FX_FILESIZE limit,const CFX_ByteStringC & tag,bool checkKeyword)787 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
788 FX_FILESIZE limit,
789 const CFX_ByteStringC& tag,
790 bool checkKeyword) {
791 const uint32_t taglen = tag.GetLength();
792
793 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
794 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
795 !PDFCharIsWhitespace(tag[taglen - 1]);
796
797 uint8_t ch;
798 if (bCheckRight && startpos + (int32_t)taglen <= limit &&
799 GetCharAt(startpos + (int32_t)taglen, ch)) {
800 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
801 (checkKeyword && PDFCharIsDelimiter(ch))) {
802 return false;
803 }
804 }
805
806 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
807 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
808 (checkKeyword && PDFCharIsDelimiter(ch))) {
809 return false;
810 }
811 }
812 return true;
813 }
814
815 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards
816 // and drop the bool.
SearchWord(const CFX_ByteStringC & tag,bool bWholeWord,bool bForward,FX_FILESIZE limit)817 bool CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
818 bool bWholeWord,
819 bool bForward,
820 FX_FILESIZE limit) {
821 int32_t taglen = tag.GetLength();
822 if (taglen == 0)
823 return false;
824
825 FX_FILESIZE pos = m_Pos;
826 int32_t offset = 0;
827 if (!bForward)
828 offset = taglen - 1;
829
830 const uint8_t* tag_data = tag.raw_str();
831 uint8_t byte;
832 while (1) {
833 if (bForward) {
834 if (limit && pos >= m_Pos + limit)
835 return false;
836
837 if (!GetCharAt(pos, byte))
838 return false;
839
840 } else {
841 if (limit && pos <= m_Pos - limit)
842 return false;
843
844 if (!GetCharAtBackward(pos, byte))
845 return false;
846 }
847
848 if (byte == tag_data[offset]) {
849 if (bForward) {
850 offset++;
851 if (offset < taglen) {
852 pos++;
853 continue;
854 }
855 } else {
856 offset--;
857 if (offset >= 0) {
858 pos--;
859 continue;
860 }
861 }
862
863 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
864 if (!bWholeWord || IsWholeWord(startpos, limit, tag, false)) {
865 m_Pos = startpos;
866 return true;
867 }
868 }
869
870 if (bForward) {
871 offset = byte == tag_data[0] ? 1 : 0;
872 pos++;
873 } else {
874 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
875 pos--;
876 }
877
878 if (pos < 0)
879 return false;
880 }
881
882 return false;
883 }
884
FindTag(const CFX_ByteStringC & tag,FX_FILESIZE limit)885 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,
886 FX_FILESIZE limit) {
887 int32_t taglen = tag.GetLength();
888 int32_t match = 0;
889 limit += m_Pos;
890 FX_FILESIZE startpos = m_Pos;
891
892 while (1) {
893 uint8_t ch;
894 if (!GetNextChar(ch))
895 return -1;
896
897 if (ch == tag[match]) {
898 match++;
899 if (match == taglen)
900 return m_Pos - startpos - taglen;
901 } else {
902 match = ch == tag[0] ? 1 : 0;
903 }
904
905 if (limit && m_Pos == limit)
906 return -1;
907 }
908 return -1;
909 }
910
SetEncrypt(std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler)911 void CPDF_SyntaxParser::SetEncrypt(
912 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {
913 m_pCryptoHandler = std::move(pCryptoHandler);
914 }
915