1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
8 
9 #include <algorithm>
10 #include <sstream>
11 #include <utility>
12 #include <vector>
13 
14 #include "core/fpdfapi/cpdf_modulemgr.h"
15 #include "core/fpdfapi/parser/cpdf_array.h"
16 #include "core/fpdfapi/parser/cpdf_boolean.h"
17 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
18 #include "core/fpdfapi/parser/cpdf_dictionary.h"
19 #include "core/fpdfapi/parser/cpdf_name.h"
20 #include "core/fpdfapi/parser/cpdf_null.h"
21 #include "core/fpdfapi/parser/cpdf_number.h"
22 #include "core/fpdfapi/parser/cpdf_read_validator.h"
23 #include "core/fpdfapi/parser/cpdf_reference.h"
24 #include "core/fpdfapi/parser/cpdf_stream.h"
25 #include "core/fpdfapi/parser/cpdf_string.h"
26 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
27 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
28 #include "core/fxcrt/autorestorer.h"
29 #include "core/fxcrt/cfx_binarybuf.h"
30 #include "core/fxcrt/fx_extension.h"
31 #include "third_party/base/numerics/safe_math.h"
32 #include "third_party/base/ptr_util.h"
33 
34 namespace {
35 
36 enum class ReadStatus { Normal, Backslash, Octal, FinishOctal, CarriageReturn };
37 
38 }  // namespace
39 
40 // static
41 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
42 
CPDF_SyntaxParser()43 CPDF_SyntaxParser::CPDF_SyntaxParser()
44     : CPDF_SyntaxParser(WeakPtr<ByteStringPool>()) {}
45 
CPDF_SyntaxParser(const WeakPtr<ByteStringPool> & pPool)46 CPDF_SyntaxParser::CPDF_SyntaxParser(const WeakPtr<ByteStringPool>& pPool)
47     : m_pFileAccess(nullptr), m_pPool(pPool) {}
48 
~CPDF_SyntaxParser()49 CPDF_SyntaxParser::~CPDF_SyntaxParser() {
50 }
51 
GetCharAt(FX_FILESIZE pos,uint8_t & ch)52 bool CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
53   AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
54   m_Pos = pos;
55   return GetNextChar(ch);
56 }
57 
ReadBlockAt(FX_FILESIZE read_pos)58 bool CPDF_SyntaxParser::ReadBlockAt(FX_FILESIZE read_pos) {
59   if (read_pos >= m_FileLen)
60     return false;
61   size_t read_size = CPDF_ModuleMgr::kFileBufSize;
62   FX_SAFE_FILESIZE safe_end = read_pos;
63   safe_end += read_size;
64   if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_FileLen)
65     read_size = m_FileLen - read_pos;
66 
67   m_pFileBuf.resize(read_size);
68   if (!m_pFileAccess->ReadBlock(m_pFileBuf.data(), read_pos, read_size)) {
69     m_pFileBuf.clear();
70     return false;
71   }
72 
73   m_BufOffset = read_pos;
74   return true;
75 }
76 
GetNextChar(uint8_t & ch)77 bool CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
78   FX_FILESIZE pos = m_Pos + m_HeaderOffset;
79   if (pos >= m_FileLen)
80     return false;
81 
82   if (!IsPositionRead(pos) && !ReadBlockAt(pos))
83     return false;
84 
85   ch = m_pFileBuf[pos - m_BufOffset];
86   m_Pos++;
87   return true;
88 }
89 
GetCharAtBackward(FX_FILESIZE pos,uint8_t * ch)90 bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch) {
91   pos += m_HeaderOffset;
92   if (pos >= m_FileLen)
93     return false;
94 
95   if (!IsPositionRead(pos)) {
96     FX_FILESIZE block_start = 0;
97     if (pos >= CPDF_ModuleMgr::kFileBufSize)
98       block_start = pos - CPDF_ModuleMgr::kFileBufSize + 1;
99     if (!ReadBlockAt(block_start) || !IsPositionRead(pos))
100       return false;
101   }
102   *ch = m_pFileBuf[pos - m_BufOffset];
103   return true;
104 }
105 
ReadBlock(uint8_t * pBuf,uint32_t size)106 bool CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, uint32_t size) {
107   if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))
108     return false;
109   m_Pos += size;
110   return true;
111 }
112 
GetNextWordInternal(bool * bIsNumber)113 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
114   m_WordSize = 0;
115   if (bIsNumber)
116     *bIsNumber = true;
117 
118   ToNextWord();
119   uint8_t ch;
120   if (!GetNextChar(ch))
121     return;
122 
123   if (PDFCharIsDelimiter(ch)) {
124     if (bIsNumber)
125       *bIsNumber = false;
126 
127     m_WordBuffer[m_WordSize++] = ch;
128     if (ch == '/') {
129       while (1) {
130         if (!GetNextChar(ch))
131           return;
132 
133         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
134           m_Pos--;
135           return;
136         }
137 
138         if (m_WordSize < sizeof(m_WordBuffer) - 1)
139           m_WordBuffer[m_WordSize++] = ch;
140       }
141     } else if (ch == '<') {
142       if (!GetNextChar(ch))
143         return;
144 
145       if (ch == '<')
146         m_WordBuffer[m_WordSize++] = ch;
147       else
148         m_Pos--;
149     } else if (ch == '>') {
150       if (!GetNextChar(ch))
151         return;
152 
153       if (ch == '>')
154         m_WordBuffer[m_WordSize++] = ch;
155       else
156         m_Pos--;
157     }
158     return;
159   }
160 
161   while (1) {
162     if (m_WordSize < sizeof(m_WordBuffer) - 1)
163       m_WordBuffer[m_WordSize++] = ch;
164 
165     if (!PDFCharIsNumeric(ch)) {
166       if (bIsNumber)
167         *bIsNumber = false;
168     }
169 
170     if (!GetNextChar(ch))
171       return;
172 
173     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
174       m_Pos--;
175       break;
176     }
177   }
178 }
179 
ReadString()180 ByteString CPDF_SyntaxParser::ReadString() {
181   uint8_t ch;
182   if (!GetNextChar(ch))
183     return ByteString();
184 
185   std::ostringstream buf;
186   int32_t parlevel = 0;
187   ReadStatus status = ReadStatus::Normal;
188   int32_t iEscCode = 0;
189   while (1) {
190     switch (status) {
191       case ReadStatus::Normal:
192         if (ch == ')') {
193           if (parlevel == 0)
194             return ByteString(buf);
195           parlevel--;
196         } else if (ch == '(') {
197           parlevel++;
198         }
199         if (ch == '\\')
200           status = ReadStatus::Backslash;
201         else
202           buf << static_cast<char>(ch);
203         break;
204       case ReadStatus::Backslash:
205         if (ch >= '0' && ch <= '7') {
206           iEscCode = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
207           status = ReadStatus::Octal;
208           break;
209         }
210 
211         if (ch == '\r') {
212           status = ReadStatus::CarriageReturn;
213           break;
214         }
215         if (ch == 'n') {
216           buf << '\n';
217         } else if (ch == 'r') {
218           buf << '\r';
219         } else if (ch == 't') {
220           buf << '\t';
221         } else if (ch == 'b') {
222           buf << '\b';
223         } else if (ch == 'f') {
224           buf << '\f';
225         } else if (ch != '\n') {
226           buf << static_cast<char>(ch);
227         }
228         status = ReadStatus::Normal;
229         break;
230       case ReadStatus::Octal:
231         if (ch >= '0' && ch <= '7') {
232           iEscCode =
233               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
234           status = ReadStatus::FinishOctal;
235         } else {
236           buf << static_cast<char>(iEscCode);
237           status = ReadStatus::Normal;
238           continue;
239         }
240         break;
241       case ReadStatus::FinishOctal:
242         status = ReadStatus::Normal;
243         if (ch >= '0' && ch <= '7') {
244           iEscCode =
245               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
246           buf << static_cast<char>(iEscCode);
247         } else {
248           buf << static_cast<char>(iEscCode);
249           continue;
250         }
251         break;
252       case ReadStatus::CarriageReturn:
253         status = ReadStatus::Normal;
254         if (ch != '\n')
255           continue;
256         break;
257     }
258 
259     if (!GetNextChar(ch))
260       break;
261   }
262 
263   GetNextChar(ch);
264   return ByteString(buf);
265 }
266 
ReadHexString()267 ByteString CPDF_SyntaxParser::ReadHexString() {
268   uint8_t ch;
269   if (!GetNextChar(ch))
270     return ByteString();
271 
272   std::ostringstream buf;
273   bool bFirst = true;
274   uint8_t code = 0;
275   while (1) {
276     if (ch == '>')
277       break;
278 
279     if (std::isxdigit(ch)) {
280       int val = FXSYS_HexCharToInt(ch);
281       if (bFirst) {
282         code = val * 16;
283       } else {
284         code += val;
285         buf << static_cast<char>(code);
286       }
287       bFirst = !bFirst;
288     }
289 
290     if (!GetNextChar(ch))
291       break;
292   }
293   if (!bFirst)
294     buf << static_cast<char>(code);
295 
296   return ByteString(buf);
297 }
298 
ToNextLine()299 void CPDF_SyntaxParser::ToNextLine() {
300   uint8_t ch;
301   while (GetNextChar(ch)) {
302     if (ch == '\n')
303       break;
304 
305     if (ch == '\r') {
306       GetNextChar(ch);
307       if (ch != '\n')
308         --m_Pos;
309       break;
310     }
311   }
312 }
313 
ToNextWord()314 void CPDF_SyntaxParser::ToNextWord() {
315   uint8_t ch;
316   if (!GetNextChar(ch))
317     return;
318 
319   while (1) {
320     while (PDFCharIsWhitespace(ch)) {
321       if (!GetNextChar(ch))
322         return;
323     }
324 
325     if (ch != '%')
326       break;
327 
328     while (1) {
329       if (!GetNextChar(ch))
330         return;
331       if (PDFCharIsLineEnding(ch))
332         break;
333     }
334   }
335   m_Pos--;
336 }
337 
GetNextWord(bool * bIsNumber)338 ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
339   const CPDF_ReadValidator::Session read_session(GetValidator().Get());
340   GetNextWordInternal(bIsNumber);
341   ByteString ret;
342   if (!GetValidator()->has_read_problems())
343     ret = ByteString(m_WordBuffer, m_WordSize);
344   return ret;
345 }
346 
PeekNextWord(bool * bIsNumber)347 ByteString CPDF_SyntaxParser::PeekNextWord(bool* bIsNumber) {
348   AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
349   return GetNextWord(bIsNumber);
350 }
351 
GetKeyword()352 ByteString CPDF_SyntaxParser::GetKeyword() {
353   return GetNextWord(nullptr);
354 }
355 
GetObjectBody(CPDF_IndirectObjectHolder * pObjList)356 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetObjectBody(
357     CPDF_IndirectObjectHolder* pObjList) {
358   const CPDF_ReadValidator::Session read_session(GetValidator().Get());
359   auto result = GetObjectBodyInternal(pObjList, ParseType::kLoose);
360   if (GetValidator()->has_read_problems())
361     return nullptr;
362   return result;
363 }
364 
GetObjectBodyInternal(CPDF_IndirectObjectHolder * pObjList,ParseType parse_type)365 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetObjectBodyInternal(
366     CPDF_IndirectObjectHolder* pObjList,
367     ParseType parse_type) {
368   AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
369   if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
370     return nullptr;
371 
372   FX_FILESIZE SavedObjPos = m_Pos;
373   bool bIsNumber;
374   ByteString word = GetNextWord(&bIsNumber);
375   if (word.GetLength() == 0)
376     return nullptr;
377 
378   if (bIsNumber) {
379     FX_FILESIZE SavedPos = m_Pos;
380     ByteString nextword = GetNextWord(&bIsNumber);
381     if (bIsNumber) {
382       ByteString nextword2 = GetNextWord(nullptr);
383       if (nextword2 == "R") {
384         uint32_t refnum = FXSYS_atoui(word.c_str());
385         if (refnum == CPDF_Object::kInvalidObjNum)
386           return nullptr;
387         return pdfium::MakeUnique<CPDF_Reference>(pObjList, refnum);
388       }
389     }
390     m_Pos = SavedPos;
391     return pdfium::MakeUnique<CPDF_Number>(word.AsStringView());
392   }
393 
394   if (word == "true" || word == "false")
395     return pdfium::MakeUnique<CPDF_Boolean>(word == "true");
396 
397   if (word == "null")
398     return pdfium::MakeUnique<CPDF_Null>();
399 
400   if (word == "(") {
401     ByteString str = ReadString();
402     return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false);
403   }
404   if (word == "<") {
405     ByteString str = ReadHexString();
406     return pdfium::MakeUnique<CPDF_String>(m_pPool, str, true);
407   }
408   if (word == "[") {
409     auto pArray = pdfium::MakeUnique<CPDF_Array>();
410     while (std::unique_ptr<CPDF_Object> pObj =
411                GetObjectBodyInternal(pObjList, ParseType::kLoose)) {
412       pArray->Add(std::move(pObj));
413     }
414     return (parse_type == ParseType::kLoose || m_WordBuffer[0] == ']')
415                ? std::move(pArray)
416                : nullptr;
417   }
418   if (word[0] == '/') {
419     return pdfium::MakeUnique<CPDF_Name>(
420         m_pPool,
421         PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1)));
422   }
423   if (word == "<<") {
424     std::unique_ptr<CPDF_Dictionary> pDict =
425         pdfium::MakeUnique<CPDF_Dictionary>(m_pPool);
426     while (1) {
427       ByteString key = GetNextWord(nullptr);
428       if (key.IsEmpty())
429         return nullptr;
430 
431       FX_FILESIZE SavedPos = m_Pos - key.GetLength();
432       if (key == ">>")
433         break;
434 
435       if (key == "endobj") {
436         m_Pos = SavedPos;
437         break;
438       }
439       if (key[0] != '/')
440         continue;
441 
442       key = PDF_NameDecode(key);
443 
444       if (key.IsEmpty() && parse_type == ParseType::kLoose)
445         continue;
446 
447       std::unique_ptr<CPDF_Object> pObj =
448           GetObjectBodyInternal(pObjList, ParseType::kLoose);
449       if (!pObj) {
450         if (parse_type == ParseType::kLoose)
451           continue;
452 
453         ToNextLine();
454         return nullptr;
455       }
456 
457       if (!key.IsEmpty()) {
458         ByteString keyNoSlash(key.raw_str() + 1, key.GetLength() - 1);
459         pDict->SetFor(keyNoSlash, std::move(pObj));
460       }
461     }
462 
463     FX_FILESIZE SavedPos = m_Pos;
464     ByteString nextword = GetNextWord(nullptr);
465     if (nextword != "stream") {
466       m_Pos = SavedPos;
467       return std::move(pDict);
468     }
469     return ReadStream(std::move(pDict));
470   }
471   if (word == ">>")
472     m_Pos = SavedObjPos;
473 
474   return nullptr;
475 }
476 
GetIndirectObject(CPDF_IndirectObjectHolder * pObjList,ParseType parse_type)477 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetIndirectObject(
478     CPDF_IndirectObjectHolder* pObjList,
479     ParseType parse_type) {
480   const CPDF_ReadValidator::Session read_session(GetValidator().Get());
481   const FX_FILESIZE saved_pos = GetPos();
482   bool is_number = false;
483   ByteString word = GetNextWord(&is_number);
484   if (!is_number || word.IsEmpty()) {
485     SetPos(saved_pos);
486     return nullptr;
487   }
488   const uint32_t parser_objnum = FXSYS_atoui(word.c_str());
489 
490   word = GetNextWord(&is_number);
491   if (!is_number || word.IsEmpty()) {
492     SetPos(saved_pos);
493     return nullptr;
494   }
495   const uint32_t parser_gennum = FXSYS_atoui(word.c_str());
496 
497   if (GetKeyword() != "obj") {
498     SetPos(saved_pos);
499     return nullptr;
500   }
501 
502   std::unique_ptr<CPDF_Object> pObj =
503       GetObjectBodyInternal(pObjList, parse_type);
504   if (pObj) {
505     pObj->SetObjNum(parser_objnum);
506     pObj->SetGenNum(parser_gennum);
507   }
508 
509   return GetValidator()->has_read_problems() ? nullptr : std::move(pObj);
510 }
511 
ReadEOLMarkers(FX_FILESIZE pos)512 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
513   unsigned char byte1 = 0;
514   unsigned char byte2 = 0;
515 
516   GetCharAt(pos, byte1);
517   GetCharAt(pos + 1, byte2);
518 
519   if (byte1 == '\r' && byte2 == '\n')
520     return 2;
521 
522   if (byte1 == '\r' || byte1 == '\n')
523     return 1;
524 
525   return 0;
526 }
527 
ReadStream(std::unique_ptr<CPDF_Dictionary> pDict)528 std::unique_ptr<CPDF_Stream> CPDF_SyntaxParser::ReadStream(
529     std::unique_ptr<CPDF_Dictionary> pDict) {
530   const CPDF_Number* pLenObj = ToNumber(pDict->GetDirectObjectFor("Length"));
531   FX_FILESIZE len = pLenObj ? pLenObj->GetInteger() : -1;
532 
533   // Locate the start of stream.
534   ToNextLine();
535   FX_FILESIZE streamStartPos = m_Pos;
536 
537   const ByteStringView kEndStreamStr("endstream");
538   const ByteStringView kEndObjStr("endobj");
539 
540     bool bSearchForKeyword = true;
541     if (len >= 0) {
542       pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
543       pos += len;
544       if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)
545         m_Pos = pos.ValueOrDie();
546 
547       m_Pos += ReadEOLMarkers(m_Pos);
548       memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
549       GetNextWordInternal(nullptr);
550       // Earlier version of PDF specification doesn't require EOL marker before
551       // 'endstream' keyword. If keyword 'endstream' follows the bytes in
552       // specified length, it signals the end of stream.
553       if (memcmp(m_WordBuffer, kEndStreamStr.raw_str(),
554                  kEndStreamStr.GetLength()) == 0) {
555         bSearchForKeyword = false;
556       }
557     }
558 
559     if (bSearchForKeyword) {
560       // If len is not available, len needs to be calculated
561       // by searching the keywords "endstream" or "endobj".
562       m_Pos = streamStartPos;
563       FX_FILESIZE endStreamOffset = 0;
564       while (endStreamOffset >= 0) {
565         endStreamOffset = FindTag(kEndStreamStr, 0);
566 
567         // Can't find "endstream".
568         if (endStreamOffset < 0)
569           break;
570 
571         // Stop searching when "endstream" is found.
572         if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,
573                         kEndStreamStr, true)) {
574           endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();
575           break;
576         }
577       }
578 
579       m_Pos = streamStartPos;
580       FX_FILESIZE endObjOffset = 0;
581       while (endObjOffset >= 0) {
582         endObjOffset = FindTag(kEndObjStr, 0);
583 
584         // Can't find "endobj".
585         if (endObjOffset < 0)
586           break;
587 
588         // Stop searching when "endobj" is found.
589         if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,
590                         true)) {
591           endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();
592           break;
593         }
594       }
595 
596       // Can't find "endstream" or "endobj".
597       if (endStreamOffset < 0 && endObjOffset < 0)
598         return nullptr;
599 
600       if (endStreamOffset < 0 && endObjOffset >= 0) {
601         // Correct the position of end stream.
602         endStreamOffset = endObjOffset;
603       } else if (endStreamOffset >= 0 && endObjOffset < 0) {
604         // Correct the position of end obj.
605         endObjOffset = endStreamOffset;
606       } else if (endStreamOffset > endObjOffset) {
607         endStreamOffset = endObjOffset;
608       }
609       len = endStreamOffset;
610 
611       int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
612       if (numMarkers == 2) {
613         len -= 2;
614       } else {
615         numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
616         if (numMarkers == 1) {
617           len -= 1;
618         }
619       }
620       if (len < 0)
621         return nullptr;
622 
623       pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(len));
624     }
625     m_Pos = streamStartPos;
626 
627   // Read up to the end of the buffer. Note, we allow zero length streams as
628   // we need to pass them through when we are importing pages into a new
629   // document.
630   len = std::min(len, m_FileLen - m_Pos - m_HeaderOffset);
631   if (len < 0)
632     return nullptr;
633 
634   std::unique_ptr<uint8_t, FxFreeDeleter> pData;
635   if (len > 0) {
636     pData.reset(FX_Alloc(uint8_t, len));
637     ReadBlock(pData.get(), len);
638   }
639   auto pStream =
640       pdfium::MakeUnique<CPDF_Stream>(std::move(pData), len, std::move(pDict));
641   streamStartPos = m_Pos;
642   memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
643   GetNextWordInternal(nullptr);
644 
645   int numMarkers = ReadEOLMarkers(m_Pos);
646   if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) &&
647       numMarkers != 0 &&
648       memcmp(m_WordBuffer, kEndObjStr.raw_str(), kEndObjStr.GetLength()) == 0) {
649     m_Pos = streamStartPos;
650   }
651   return pStream;
652 }
653 
InitParser(const RetainPtr<IFX_SeekableReadStream> & pFileAccess,uint32_t HeaderOffset)654 void CPDF_SyntaxParser::InitParser(
655     const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
656     uint32_t HeaderOffset) {
657   ASSERT(pFileAccess);
658   return InitParserWithValidator(
659       pdfium::MakeRetain<CPDF_ReadValidator>(pFileAccess, nullptr),
660       HeaderOffset);
661 }
662 
InitParserWithValidator(const RetainPtr<CPDF_ReadValidator> & validator,uint32_t HeaderOffset)663 void CPDF_SyntaxParser::InitParserWithValidator(
664     const RetainPtr<CPDF_ReadValidator>& validator,
665     uint32_t HeaderOffset) {
666   ASSERT(validator);
667   m_pFileBuf.clear();
668   m_HeaderOffset = HeaderOffset;
669   m_FileLen = validator->GetSize();
670   m_Pos = 0;
671   m_pFileAccess = validator;
672   m_BufOffset = 0;
673 }
674 
GetDirectNum()675 uint32_t CPDF_SyntaxParser::GetDirectNum() {
676   bool bIsNumber;
677   GetNextWordInternal(&bIsNumber);
678   if (!bIsNumber)
679     return 0;
680 
681   m_WordBuffer[m_WordSize] = 0;
682   return FXSYS_atoui(reinterpret_cast<const char*>(m_WordBuffer));
683 }
684 
IsWholeWord(FX_FILESIZE startpos,FX_FILESIZE limit,const ByteStringView & tag,bool checkKeyword)685 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
686                                     FX_FILESIZE limit,
687                                     const ByteStringView& tag,
688                                     bool checkKeyword) {
689   const uint32_t taglen = tag.GetLength();
690 
691   bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
692   bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
693                      !PDFCharIsWhitespace(tag[taglen - 1]);
694 
695   uint8_t ch;
696   if (bCheckRight && startpos + (int32_t)taglen <= limit &&
697       GetCharAt(startpos + (int32_t)taglen, ch)) {
698     if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
699         (checkKeyword && PDFCharIsDelimiter(ch))) {
700       return false;
701     }
702   }
703 
704   if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
705     if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
706         (checkKeyword && PDFCharIsDelimiter(ch))) {
707       return false;
708     }
709   }
710   return true;
711 }
712 
BackwardsSearchToWord(const ByteStringView & tag,FX_FILESIZE limit)713 bool CPDF_SyntaxParser::BackwardsSearchToWord(const ByteStringView& tag,
714                                               FX_FILESIZE limit) {
715   int32_t taglen = tag.GetLength();
716   if (taglen == 0)
717     return false;
718 
719   FX_FILESIZE pos = m_Pos;
720   int32_t offset = taglen - 1;
721   while (1) {
722     if (limit && pos <= m_Pos - limit)
723       return false;
724 
725     uint8_t byte;
726     if (!GetCharAtBackward(pos, &byte))
727       return false;
728 
729     if (byte == tag[offset]) {
730       offset--;
731       if (offset >= 0) {
732         pos--;
733         continue;
734       }
735       if (IsWholeWord(pos, limit, tag, false)) {
736         m_Pos = pos;
737         return true;
738       }
739     }
740     offset = byte == tag[taglen - 1] ? taglen - 2 : taglen - 1;
741     pos--;
742     if (pos < 0)
743       return false;
744   }
745 }
746 
FindTag(const ByteStringView & tag,FX_FILESIZE limit)747 FX_FILESIZE CPDF_SyntaxParser::FindTag(const ByteStringView& tag,
748                                        FX_FILESIZE limit) {
749   int32_t taglen = tag.GetLength();
750   int32_t match = 0;
751   limit += m_Pos;
752   FX_FILESIZE startpos = m_Pos;
753 
754   while (1) {
755     uint8_t ch;
756     if (!GetNextChar(ch))
757       return -1;
758 
759     if (ch == tag[match]) {
760       match++;
761       if (match == taglen)
762         return m_Pos - startpos - taglen;
763     } else {
764       match = ch == tag[0] ? 1 : 0;
765     }
766 
767     if (limit && m_Pos == limit)
768       return -1;
769   }
770   return -1;
771 }
772 
GetFileAccess() const773 RetainPtr<IFX_SeekableReadStream> CPDF_SyntaxParser::GetFileAccess() const {
774   return m_pFileAccess;
775 }
776 
IsPositionRead(FX_FILESIZE pos) const777 bool CPDF_SyntaxParser::IsPositionRead(FX_FILESIZE pos) const {
778   return m_BufOffset <= pos &&
779          pos < static_cast<FX_FILESIZE>(m_BufOffset + m_pFileBuf.size());
780 }
781