1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/page/cpdf_streamparser.h"
8 
9 #include <limits.h>
10 
11 #include <algorithm>
12 #include <memory>
13 #include <sstream>
14 #include <utility>
15 
16 #include "core/fpdfapi/cpdf_modulemgr.h"
17 #include "core/fpdfapi/page/cpdf_docpagedata.h"
18 #include "core/fpdfapi/parser/cpdf_array.h"
19 #include "core/fpdfapi/parser/cpdf_boolean.h"
20 #include "core/fpdfapi/parser/cpdf_dictionary.h"
21 #include "core/fpdfapi/parser/cpdf_document.h"
22 #include "core/fpdfapi/parser/cpdf_name.h"
23 #include "core/fpdfapi/parser/cpdf_null.h"
24 #include "core/fpdfapi/parser/cpdf_number.h"
25 #include "core/fpdfapi/parser/cpdf_stream.h"
26 #include "core/fpdfapi/parser/cpdf_string.h"
27 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
28 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
29 #include "core/fxcodec/codec/ccodec_jpegmodule.h"
30 #include "core/fxcodec/codec/ccodec_scanlinedecoder.h"
31 #include "core/fxcrt/fx_extension.h"
32 
33 namespace {
34 
35 const uint32_t kMaxNestedParsingLevel = 512;
36 const uint32_t kMaxWordBuffer = 256;
37 const size_t kMaxStringLength = 32767;
38 
DecodeAllScanlines(std::unique_ptr<CCodec_ScanlineDecoder> pDecoder,uint8_t ** dest_buf,uint32_t * dest_size)39 uint32_t DecodeAllScanlines(std::unique_ptr<CCodec_ScanlineDecoder> pDecoder,
40                             uint8_t** dest_buf,
41                             uint32_t* dest_size) {
42   if (!pDecoder)
43     return FX_INVALID_OFFSET;
44   int ncomps = pDecoder->CountComps();
45   int bpc = pDecoder->GetBPC();
46   int width = pDecoder->GetWidth();
47   int height = pDecoder->GetHeight();
48   int pitch = (width * ncomps * bpc + 7) / 8;
49   if (height == 0 || pitch > (1 << 30) / height)
50     return FX_INVALID_OFFSET;
51 
52   *dest_buf = FX_Alloc2D(uint8_t, pitch, height);
53   *dest_size = pitch * height;  // Safe since checked alloc returned.
54   for (int row = 0; row < height; ++row) {
55     const uint8_t* pLine = pDecoder->GetScanline(row);
56     if (!pLine)
57       break;
58 
59     memcpy(*dest_buf + row * pitch, pLine, pitch);
60   }
61   return pDecoder->GetSrcOffset();
62 }
63 
DecodeInlineStream(const uint8_t * src_buf,uint32_t limit,int width,int height,const ByteString & decoder,CPDF_Dictionary * pParam,uint8_t ** dest_buf,uint32_t * dest_size)64 uint32_t DecodeInlineStream(const uint8_t* src_buf,
65                             uint32_t limit,
66                             int width,
67                             int height,
68                             const ByteString& decoder,
69                             CPDF_Dictionary* pParam,
70                             uint8_t** dest_buf,
71                             uint32_t* dest_size) {
72   if (decoder == "CCITTFaxDecode" || decoder == "CCF") {
73     std::unique_ptr<CCodec_ScanlineDecoder> pDecoder =
74         FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam);
75     return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size);
76   }
77   if (decoder == "ASCII85Decode" || decoder == "A85")
78     return A85Decode(src_buf, limit, dest_buf, dest_size);
79   if (decoder == "ASCIIHexDecode" || decoder == "AHx")
80     return HexDecode(src_buf, limit, dest_buf, dest_size);
81   if (decoder == "FlateDecode" || decoder == "Fl") {
82     return FPDFAPI_FlateOrLZWDecode(false, src_buf, limit, pParam, *dest_size,
83                                     dest_buf, dest_size);
84   }
85   if (decoder == "LZWDecode" || decoder == "LZW") {
86     return FPDFAPI_FlateOrLZWDecode(true, src_buf, limit, pParam, 0, dest_buf,
87                                     dest_size);
88   }
89   if (decoder == "DCTDecode" || decoder == "DCT") {
90     std::unique_ptr<CCodec_ScanlineDecoder> pDecoder =
91         CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder(
92             src_buf, limit, width, height, 0,
93             !pParam || pParam->GetIntegerFor("ColorTransform", 1));
94     return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size);
95   }
96   if (decoder == "RunLengthDecode" || decoder == "RL")
97     return RunLengthDecode(src_buf, limit, dest_buf, dest_size);
98   *dest_size = 0;
99   *dest_buf = 0;
100   return 0xFFFFFFFF;
101 }
102 
103 }  // namespace
104 
CPDF_StreamParser(const uint8_t * pData,uint32_t dwSize)105 CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize)
106     : m_pBuf(pData), m_Size(dwSize), m_Pos(0), m_pPool(nullptr) {}
107 
CPDF_StreamParser(const uint8_t * pData,uint32_t dwSize,const WeakPtr<ByteStringPool> & pPool)108 CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData,
109                                      uint32_t dwSize,
110                                      const WeakPtr<ByteStringPool>& pPool)
111     : m_pBuf(pData), m_Size(dwSize), m_Pos(0), m_pPool(pPool) {}
112 
~CPDF_StreamParser()113 CPDF_StreamParser::~CPDF_StreamParser() {}
114 
ReadInlineStream(CPDF_Document * pDoc,std::unique_ptr<CPDF_Dictionary> pDict,CPDF_Object * pCSObj)115 std::unique_ptr<CPDF_Stream> CPDF_StreamParser::ReadInlineStream(
116     CPDF_Document* pDoc,
117     std::unique_ptr<CPDF_Dictionary> pDict,
118     CPDF_Object* pCSObj) {
119   if (m_Pos == m_Size)
120     return nullptr;
121 
122   if (PDFCharIsWhitespace(m_pBuf[m_Pos]))
123     m_Pos++;
124 
125   ByteString Decoder;
126   CPDF_Dictionary* pParam = nullptr;
127   CPDF_Object* pFilter = pDict->GetDirectObjectFor("Filter");
128   if (pFilter) {
129     if (CPDF_Array* pArray = pFilter->AsArray()) {
130       Decoder = pArray->GetStringAt(0);
131       CPDF_Array* pParams = pDict->GetArrayFor("DecodeParms");
132       if (pParams)
133         pParam = pParams->GetDictAt(0);
134     } else {
135       Decoder = pFilter->GetString();
136       pParam = pDict->GetDictFor("DecodeParms");
137     }
138   }
139   uint32_t width = pDict->GetIntegerFor("Width");
140   uint32_t height = pDict->GetIntegerFor("Height");
141   uint32_t OrigSize = 0;
142   if (pCSObj) {
143     uint32_t bpc = pDict->GetIntegerFor("BitsPerComponent");
144     uint32_t nComponents = 1;
145     CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj);
146     if (pCS) {
147       nComponents = pCS->CountComponents();
148       pDoc->GetPageData()->ReleaseColorSpace(pCSObj);
149     } else {
150       nComponents = 3;
151     }
152     uint32_t pitch = width;
153     if (bpc && pitch > INT_MAX / bpc)
154       return nullptr;
155 
156     pitch *= bpc;
157     if (nComponents && pitch > INT_MAX / nComponents)
158       return nullptr;
159 
160     pitch *= nComponents;
161     if (pitch > INT_MAX - 7)
162       return nullptr;
163 
164     pitch += 7;
165     pitch /= 8;
166     OrigSize = pitch;
167   } else {
168     if (width > INT_MAX - 7)
169       return nullptr;
170 
171     OrigSize = ((width + 7) / 8);
172   }
173   if (height && OrigSize > INT_MAX / height)
174     return nullptr;
175 
176   OrigSize *= height;
177   std::unique_ptr<uint8_t, FxFreeDeleter> pData;
178   uint32_t dwStreamSize;
179   if (Decoder.IsEmpty()) {
180     if (OrigSize > m_Size - m_Pos)
181       OrigSize = m_Size - m_Pos;
182     pData.reset(FX_Alloc(uint8_t, OrigSize));
183     memcpy(pData.get(), m_pBuf + m_Pos, OrigSize);
184     dwStreamSize = OrigSize;
185     m_Pos += OrigSize;
186   } else {
187     uint8_t* pIgnore = nullptr;
188     uint32_t dwDestSize = OrigSize;
189     dwStreamSize =
190         DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height,
191                            Decoder, pParam, &pIgnore, &dwDestSize);
192     FX_Free(pIgnore);
193     if (static_cast<int>(dwStreamSize) < 0)
194       return nullptr;
195 
196     uint32_t dwSavePos = m_Pos;
197     m_Pos += dwStreamSize;
198     while (1) {
199       uint32_t dwPrevPos = m_Pos;
200       CPDF_StreamParser::SyntaxType type = ParseNextElement();
201       if (type == CPDF_StreamParser::EndOfData)
202         break;
203 
204       if (type != CPDF_StreamParser::Keyword) {
205         dwStreamSize += m_Pos - dwPrevPos;
206         continue;
207       }
208       if (GetWord() == "EI") {
209         m_Pos = dwPrevPos;
210         break;
211       }
212       dwStreamSize += m_Pos - dwPrevPos;
213     }
214     m_Pos = dwSavePos;
215     pData.reset(FX_Alloc(uint8_t, dwStreamSize));
216     memcpy(pData.get(), m_pBuf + m_Pos, dwStreamSize);
217     m_Pos += dwStreamSize;
218   }
219   pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(dwStreamSize));
220   return pdfium::MakeUnique<CPDF_Stream>(std::move(pData), dwStreamSize,
221                                          std::move(pDict));
222 }
223 
ParseNextElement()224 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() {
225   m_pLastObj.reset();
226   m_WordSize = 0;
227   if (!PositionIsInBounds())
228     return EndOfData;
229 
230   int ch = m_pBuf[m_Pos++];
231   while (1) {
232     while (PDFCharIsWhitespace(ch)) {
233       if (!PositionIsInBounds())
234         return EndOfData;
235 
236       ch = m_pBuf[m_Pos++];
237     }
238 
239     if (ch != '%')
240       break;
241 
242     while (1) {
243       if (!PositionIsInBounds())
244         return EndOfData;
245 
246       ch = m_pBuf[m_Pos++];
247       if (PDFCharIsLineEnding(ch))
248         break;
249     }
250   }
251 
252   if (PDFCharIsDelimiter(ch) && ch != '/') {
253     m_Pos--;
254     m_pLastObj = ReadNextObject(false, false, 0);
255     return Others;
256   }
257 
258   bool bIsNumber = true;
259   while (1) {
260     if (m_WordSize < kMaxWordBuffer)
261       m_WordBuffer[m_WordSize++] = ch;
262 
263     if (!PDFCharIsNumeric(ch))
264       bIsNumber = false;
265 
266     if (!PositionIsInBounds())
267       break;
268 
269     ch = m_pBuf[m_Pos++];
270 
271     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
272       m_Pos--;
273       break;
274     }
275   }
276 
277   m_WordBuffer[m_WordSize] = 0;
278   if (bIsNumber)
279     return Number;
280 
281   if (m_WordBuffer[0] == '/')
282     return Name;
283 
284   if (m_WordSize == 4) {
285     if (memcmp(m_WordBuffer, "true", 4) == 0) {
286       m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(true);
287       return Others;
288     }
289     if (memcmp(m_WordBuffer, "null", 4) == 0) {
290       m_pLastObj = pdfium::MakeUnique<CPDF_Null>();
291       return Others;
292     }
293   } else if (m_WordSize == 5) {
294     if (memcmp(m_WordBuffer, "false", 5) == 0) {
295       m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(false);
296       return Others;
297     }
298   }
299   return Keyword;
300 }
301 
ReadNextObject(bool bAllowNestedArray,bool bInArray,uint32_t dwRecursionLevel)302 std::unique_ptr<CPDF_Object> CPDF_StreamParser::ReadNextObject(
303     bool bAllowNestedArray,
304     bool bInArray,
305     uint32_t dwRecursionLevel) {
306   bool bIsNumber;
307   // Must get the next word before returning to avoid infinite loops.
308   GetNextWord(bIsNumber);
309   if (!m_WordSize || dwRecursionLevel > kMaxNestedParsingLevel)
310     return nullptr;
311 
312   if (bIsNumber) {
313     m_WordBuffer[m_WordSize] = 0;
314     return pdfium::MakeUnique<CPDF_Number>(
315         ByteStringView(m_WordBuffer, m_WordSize));
316   }
317 
318   int first_char = m_WordBuffer[0];
319   if (first_char == '/') {
320     ByteString name =
321         PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1));
322     return pdfium::MakeUnique<CPDF_Name>(m_pPool, name);
323   }
324 
325   if (first_char == '(') {
326     ByteString str = ReadString();
327     return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false);
328   }
329 
330   if (first_char == '<') {
331     if (m_WordSize == 1)
332       return pdfium::MakeUnique<CPDF_String>(m_pPool, ReadHexString(), true);
333 
334     auto pDict = pdfium::MakeUnique<CPDF_Dictionary>(m_pPool);
335     while (1) {
336       GetNextWord(bIsNumber);
337       if (m_WordSize == 2 && m_WordBuffer[0] == '>')
338         break;
339 
340       if (!m_WordSize || m_WordBuffer[0] != '/')
341         return nullptr;
342 
343       ByteString key =
344           PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1));
345       std::unique_ptr<CPDF_Object> pObj =
346           ReadNextObject(true, bInArray, dwRecursionLevel + 1);
347       if (!pObj)
348         return nullptr;
349 
350       if (!key.IsEmpty())
351         pDict->SetFor(key, std::move(pObj));
352     }
353     return std::move(pDict);
354   }
355 
356   if (first_char == '[') {
357     if ((!bAllowNestedArray && bInArray))
358       return nullptr;
359 
360     auto pArray = pdfium::MakeUnique<CPDF_Array>();
361     while (1) {
362       std::unique_ptr<CPDF_Object> pObj =
363           ReadNextObject(bAllowNestedArray, true, dwRecursionLevel + 1);
364       if (pObj) {
365         pArray->Add(std::move(pObj));
366         continue;
367       }
368       if (!m_WordSize || m_WordBuffer[0] == ']')
369         break;
370     }
371     return std::move(pArray);
372   }
373 
374   if (m_WordSize == 5 && !memcmp(m_WordBuffer, "false", 5))
375     return pdfium::MakeUnique<CPDF_Boolean>(false);
376 
377   if (m_WordSize == 4) {
378     if (memcmp(m_WordBuffer, "true", 4) == 0)
379       return pdfium::MakeUnique<CPDF_Boolean>(true);
380     if (memcmp(m_WordBuffer, "null", 4) == 0)
381       return pdfium::MakeUnique<CPDF_Null>();
382   }
383 
384   return nullptr;
385 }
386 
387 // TODO(npm): the following methods are almost identical in cpdf_syntaxparser
GetNextWord(bool & bIsNumber)388 void CPDF_StreamParser::GetNextWord(bool& bIsNumber) {
389   m_WordSize = 0;
390   bIsNumber = true;
391   if (!PositionIsInBounds())
392     return;
393 
394   int ch = m_pBuf[m_Pos++];
395   while (1) {
396     while (PDFCharIsWhitespace(ch)) {
397       if (!PositionIsInBounds()) {
398         return;
399       }
400       ch = m_pBuf[m_Pos++];
401     }
402 
403     if (ch != '%')
404       break;
405 
406     while (1) {
407       if (!PositionIsInBounds())
408         return;
409       ch = m_pBuf[m_Pos++];
410       if (PDFCharIsLineEnding(ch))
411         break;
412     }
413   }
414 
415   if (PDFCharIsDelimiter(ch)) {
416     bIsNumber = false;
417     m_WordBuffer[m_WordSize++] = ch;
418     if (ch == '/') {
419       while (1) {
420         if (!PositionIsInBounds())
421           return;
422         ch = m_pBuf[m_Pos++];
423         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
424           m_Pos--;
425           return;
426         }
427 
428         if (m_WordSize < kMaxWordBuffer)
429           m_WordBuffer[m_WordSize++] = ch;
430       }
431     } else if (ch == '<') {
432       if (!PositionIsInBounds())
433         return;
434       ch = m_pBuf[m_Pos++];
435       if (ch == '<')
436         m_WordBuffer[m_WordSize++] = ch;
437       else
438         m_Pos--;
439     } else if (ch == '>') {
440       if (!PositionIsInBounds())
441         return;
442       ch = m_pBuf[m_Pos++];
443       if (ch == '>')
444         m_WordBuffer[m_WordSize++] = ch;
445       else
446         m_Pos--;
447     }
448     return;
449   }
450 
451   while (1) {
452     if (m_WordSize < kMaxWordBuffer)
453       m_WordBuffer[m_WordSize++] = ch;
454     if (!PDFCharIsNumeric(ch))
455       bIsNumber = false;
456 
457     if (!PositionIsInBounds())
458       return;
459     ch = m_pBuf[m_Pos++];
460     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
461       m_Pos--;
462       break;
463     }
464   }
465 }
466 
ReadString()467 ByteString CPDF_StreamParser::ReadString() {
468   if (!PositionIsInBounds())
469     return ByteString();
470 
471   uint8_t ch = m_pBuf[m_Pos++];
472   std::ostringstream buf;
473   int parlevel = 0;
474   int status = 0;
475   int iEscCode = 0;
476   while (1) {
477     switch (status) {
478       case 0:
479         if (ch == ')') {
480           if (parlevel == 0) {
481             return ByteString(
482                 buf.str().c_str(),
483                 std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
484           }
485           parlevel--;
486           buf << ')';
487         } else if (ch == '(') {
488           parlevel++;
489           buf << '(';
490         } else if (ch == '\\') {
491           status = 1;
492         } else {
493           buf << static_cast<char>(ch);
494         }
495         break;
496       case 1:
497         if (ch >= '0' && ch <= '7') {
498           iEscCode = FXSYS_DecimalCharToInt(static_cast<char>(ch));
499           status = 2;
500           break;
501         }
502         if (ch == '\r') {
503           status = 4;
504           break;
505         }
506         if (ch == '\n') {
507           // Do nothing.
508         } else if (ch == 'n') {
509           buf << '\n';
510         } else if (ch == 'r') {
511           buf << '\r';
512         } else if (ch == 't') {
513           buf << '\t';
514         } else if (ch == 'b') {
515           buf << '\b';
516         } else if (ch == 'f') {
517           buf << '\f';
518         } else {
519           buf << static_cast<char>(ch);
520         }
521         status = 0;
522         break;
523       case 2:
524         if (ch >= '0' && ch <= '7') {
525           iEscCode =
526               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
527           status = 3;
528         } else {
529           buf << static_cast<char>(iEscCode);
530           status = 0;
531           continue;
532         }
533         break;
534       case 3:
535         if (ch >= '0' && ch <= '7') {
536           iEscCode =
537               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
538           buf << static_cast<char>(iEscCode);
539           status = 0;
540         } else {
541           buf << static_cast<char>(iEscCode);
542           status = 0;
543           continue;
544         }
545         break;
546       case 4:
547         status = 0;
548         if (ch != '\n')
549           continue;
550         break;
551     }
552     if (!PositionIsInBounds())
553       break;
554 
555     ch = m_pBuf[m_Pos++];
556   }
557   if (PositionIsInBounds())
558     ++m_Pos;
559 
560   return ByteString(
561       buf.str().c_str(),
562       std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
563 }
564 
ReadHexString()565 ByteString CPDF_StreamParser::ReadHexString() {
566   if (!PositionIsInBounds())
567     return ByteString();
568 
569   std::ostringstream buf;
570   bool bFirst = true;
571   int code = 0;
572   while (PositionIsInBounds()) {
573     int ch = m_pBuf[m_Pos++];
574 
575     if (ch == '>')
576       break;
577 
578     if (!std::isxdigit(ch))
579       continue;
580 
581     int val = FXSYS_HexCharToInt(ch);
582     if (bFirst) {
583       code = val * 16;
584     } else {
585       code += val;
586       buf << static_cast<uint8_t>(code);
587     }
588     bFirst = !bFirst;
589   }
590   if (!bFirst)
591     buf << static_cast<char>(code);
592 
593   return ByteString(
594       buf.str().c_str(),
595       std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
596 }
597 
PositionIsInBounds() const598 bool CPDF_StreamParser::PositionIsInBounds() const {
599   return m_Pos < m_Size;
600 }
601