1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/src/fpdfapi/fpdf_page/pageint.h"
8 
9 #include <limits.h>
10 
11 #include "core/include/fpdfapi/fpdf_module.h"
12 #include "core/include/fpdfapi/fpdf_page.h"
13 #include "core/include/fxcodec/fx_codec.h"
14 #include "core/include/fxcrt/fx_ext.h"
15 #include "core/include/fxcrt/fx_safe_types.h"
16 
17 namespace {
18 
19 const char kPathOperatorSubpath = 'm';
20 const char kPathOperatorLine = 'l';
21 const char kPathOperatorCubicBezier1 = 'c';
22 const char kPathOperatorCubicBezier2 = 'v';
23 const char kPathOperatorCubicBezier3 = 'y';
24 const char kPathOperatorClosePath = 'h';
25 const char kPathOperatorRectangle[] = "re";
26 
27 }  // namespace
28 
29 class CPDF_StreamParserAutoClearer {
30  public:
CPDF_StreamParserAutoClearer(CPDF_StreamParser ** scoped_variable,CPDF_StreamParser * new_parser)31   CPDF_StreamParserAutoClearer(CPDF_StreamParser** scoped_variable,
32                                CPDF_StreamParser* new_parser)
33       : scoped_variable_(scoped_variable) {
34     *scoped_variable_ = new_parser;
35   }
~CPDF_StreamParserAutoClearer()36   ~CPDF_StreamParserAutoClearer() { *scoped_variable_ = NULL; }
37 
38  private:
39   CPDF_StreamParser** scoped_variable_;
40 };
Parse(const uint8_t * pData,FX_DWORD dwSize,FX_DWORD max_cost)41 FX_DWORD CPDF_StreamContentParser::Parse(const uint8_t* pData,
42                                          FX_DWORD dwSize,
43                                          FX_DWORD max_cost) {
44   if (m_Level > _FPDF_MAX_FORM_LEVEL_) {
45     return dwSize;
46   }
47   FX_DWORD InitObjCount = m_pObjectList->CountObjects();
48   CPDF_StreamParser syntax(pData, dwSize);
49   CPDF_StreamParserAutoClearer auto_clearer(&m_pSyntax, &syntax);
50   m_CompatCount = 0;
51   while (1) {
52     FX_DWORD cost = m_pObjectList->CountObjects() - InitObjCount;
53     if (max_cost && cost >= max_cost) {
54       break;
55     }
56     switch (syntax.ParseNextElement()) {
57       case CPDF_StreamParser::EndOfData:
58         return m_pSyntax->GetPos();
59       case CPDF_StreamParser::Keyword:
60         OnOperator((char*)syntax.GetWordBuf());
61         ClearAllParams();
62         break;
63       case CPDF_StreamParser::Number:
64         AddNumberParam((char*)syntax.GetWordBuf(), syntax.GetWordSize());
65         break;
66       case CPDF_StreamParser::Name:
67         AddNameParam((const FX_CHAR*)syntax.GetWordBuf() + 1,
68                      syntax.GetWordSize() - 1);
69         break;
70       default:
71         AddObjectParam(syntax.GetObject());
72     }
73   }
74   return m_pSyntax->GetPos();
75 }
76 
Handle_BeginImage()77 void CPDF_StreamContentParser::Handle_BeginImage() {
78   FX_FILESIZE savePos = m_pSyntax->GetPos();
79   CPDF_Dictionary* pDict = new CPDF_Dictionary;
80   while (1) {
81     CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
82     if (type == CPDF_StreamParser::Keyword) {
83       CFX_ByteString bsKeyword(m_pSyntax->GetWordBuf(),
84                                m_pSyntax->GetWordSize());
85       if (bsKeyword != "ID") {
86         m_pSyntax->SetPos(savePos);
87         pDict->Release();
88         return;
89       }
90     }
91     if (type != CPDF_StreamParser::Name) {
92       break;
93     }
94     CFX_ByteString key((const FX_CHAR*)m_pSyntax->GetWordBuf() + 1,
95                        m_pSyntax->GetWordSize() - 1);
96     std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj(
97         m_pSyntax->ReadNextObject());
98     if (!key.IsEmpty()) {
99       FX_DWORD dwObjNum = pObj ? pObj->GetObjNum() : 0;
100       if (dwObjNum)
101         pDict->SetAtReference(key, m_pDocument, dwObjNum);
102       else
103         pDict->SetAt(key, pObj.release());
104     }
105   }
106   PDF_ReplaceAbbr(pDict);
107   CPDF_Object* pCSObj = NULL;
108   if (pDict->KeyExist("ColorSpace")) {
109     pCSObj = pDict->GetElementValue("ColorSpace");
110     if (pCSObj->IsName()) {
111       CFX_ByteString name = pCSObj->GetString();
112       if (name != "DeviceRGB" && name != "DeviceGray" && name != "DeviceCMYK") {
113         pCSObj = FindResourceObj("ColorSpace", name);
114         if (pCSObj && !pCSObj->GetObjNum()) {
115           pCSObj = pCSObj->Clone();
116           pDict->SetAt("ColorSpace", pCSObj);
117         }
118       }
119     }
120   }
121   CPDF_Stream* pStream = m_pSyntax->ReadInlineStream(
122       m_pDocument, pDict, pCSObj, m_Options.m_bDecodeInlineImage);
123   while (1) {
124     CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
125     if (type == CPDF_StreamParser::EndOfData) {
126       break;
127     }
128     if (type != CPDF_StreamParser::Keyword) {
129       continue;
130     }
131     if (m_pSyntax->GetWordSize() == 2 && m_pSyntax->GetWordBuf()[0] == 'E' &&
132         m_pSyntax->GetWordBuf()[1] == 'I') {
133       break;
134     }
135   }
136   if (m_Options.m_bTextOnly) {
137     if (pStream) {
138       pStream->Release();
139     } else {
140       pDict->Release();
141     }
142     return;
143   }
144   pDict->SetAtName("Subtype", "Image");
145   CPDF_ImageObject* pImgObj = AddImage(pStream, NULL, TRUE);
146   if (!pImgObj) {
147     if (pStream) {
148       pStream->Release();
149     } else {
150       pDict->Release();
151     }
152   }
153 }
ParsePathObject()154 void CPDF_StreamContentParser::ParsePathObject() {
155   FX_FLOAT params[6] = {};
156   int nParams = 0;
157   int last_pos = m_pSyntax->GetPos();
158   while (1) {
159     CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
160     FX_BOOL bProcessed = TRUE;
161     switch (type) {
162       case CPDF_StreamParser::EndOfData:
163         return;
164       case CPDF_StreamParser::Keyword: {
165         int len = m_pSyntax->GetWordSize();
166         if (len == 1) {
167           switch (m_pSyntax->GetWordBuf()[0]) {
168             case kPathOperatorSubpath:
169               AddPathPoint(params[0], params[1], FXPT_MOVETO);
170               nParams = 0;
171               break;
172             case kPathOperatorLine:
173               AddPathPoint(params[0], params[1], FXPT_LINETO);
174               nParams = 0;
175               break;
176             case kPathOperatorCubicBezier1:
177               AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
178               AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
179               AddPathPoint(params[4], params[5], FXPT_BEZIERTO);
180               nParams = 0;
181               break;
182             case kPathOperatorCubicBezier2:
183               AddPathPoint(m_PathCurrentX, m_PathCurrentY, FXPT_BEZIERTO);
184               AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
185               AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
186               nParams = 0;
187               break;
188             case kPathOperatorCubicBezier3:
189               AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
190               AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
191               AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
192               nParams = 0;
193               break;
194             case kPathOperatorClosePath:
195               Handle_ClosePath();
196               nParams = 0;
197               break;
198             default:
199               bProcessed = FALSE;
200               break;
201           }
202         } else if (len == 2) {
203           if (m_pSyntax->GetWordBuf()[0] == kPathOperatorRectangle[0] &&
204               m_pSyntax->GetWordBuf()[1] == kPathOperatorRectangle[1]) {
205             AddPathRect(params[0], params[1], params[2], params[3]);
206             nParams = 0;
207           } else {
208             bProcessed = FALSE;
209           }
210         } else {
211           bProcessed = FALSE;
212         }
213         if (bProcessed) {
214           last_pos = m_pSyntax->GetPos();
215         }
216         break;
217       }
218       case CPDF_StreamParser::Number: {
219         if (nParams == 6) {
220           break;
221         }
222         FX_BOOL bInteger;
223         int value;
224         FX_atonum(
225             CFX_ByteStringC(m_pSyntax->GetWordBuf(), m_pSyntax->GetWordSize()),
226             bInteger, &value);
227         params[nParams++] = bInteger ? (FX_FLOAT)value : *(FX_FLOAT*)&value;
228         break;
229       }
230       default:
231         bProcessed = FALSE;
232     }
233     if (!bProcessed) {
234       m_pSyntax->SetPos(last_pos);
235       return;
236     }
237   }
238 }
CPDF_StreamParser(const uint8_t * pData,FX_DWORD dwSize)239 CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, FX_DWORD dwSize) {
240   m_pBuf = pData;
241   m_Size = dwSize;
242   m_Pos = 0;
243   m_pLastObj = NULL;
244 }
~CPDF_StreamParser()245 CPDF_StreamParser::~CPDF_StreamParser() {
246   if (m_pLastObj) {
247     m_pLastObj->Release();
248   }
249 }
_DecodeAllScanlines(ICodec_ScanlineDecoder * pDecoder,uint8_t * & dest_buf,FX_DWORD & dest_size)250 FX_DWORD _DecodeAllScanlines(ICodec_ScanlineDecoder* pDecoder,
251                              uint8_t*& dest_buf,
252                              FX_DWORD& dest_size) {
253   if (!pDecoder) {
254     return (FX_DWORD)-1;
255   }
256   int ncomps = pDecoder->CountComps();
257   int bpc = pDecoder->GetBPC();
258   int width = pDecoder->GetWidth();
259   int height = pDecoder->GetHeight();
260   int pitch = (width * ncomps * bpc + 7) / 8;
261   if (height == 0 || pitch > (1 << 30) / height) {
262     delete pDecoder;
263     return -1;
264   }
265   dest_buf = FX_Alloc2D(uint8_t, pitch, height);
266   dest_size = pitch * height;  // Safe since checked alloc returned.
267   for (int row = 0; row < height; row++) {
268     const uint8_t* pLine = pDecoder->GetScanline(row);
269     if (!pLine)
270       break;
271 
272     FXSYS_memcpy(dest_buf + row * pitch, pLine, pitch);
273   }
274   FX_DWORD srcoff = pDecoder->GetSrcOffset();
275   delete pDecoder;
276   return srcoff;
277 }
278 ICodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder(
279     const uint8_t* src_buf,
280     FX_DWORD src_size,
281     int width,
282     int height,
283     const CPDF_Dictionary* pParams);
284 
PDF_DecodeInlineStream(const uint8_t * src_buf,FX_DWORD limit,int width,int height,CFX_ByteString & decoder,CPDF_Dictionary * pParam,uint8_t * & dest_buf,FX_DWORD & dest_size)285 FX_DWORD PDF_DecodeInlineStream(const uint8_t* src_buf,
286                                 FX_DWORD limit,
287                                 int width,
288                                 int height,
289                                 CFX_ByteString& decoder,
290                                 CPDF_Dictionary* pParam,
291                                 uint8_t*& dest_buf,
292                                 FX_DWORD& dest_size) {
293   if (decoder == "CCITTFaxDecode" || decoder == "CCF") {
294     ICodec_ScanlineDecoder* pDecoder =
295         FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam);
296     return _DecodeAllScanlines(pDecoder, dest_buf, dest_size);
297   }
298   if (decoder == "ASCII85Decode" || decoder == "A85") {
299     return A85Decode(src_buf, limit, dest_buf, dest_size);
300   }
301   if (decoder == "ASCIIHexDecode" || decoder == "AHx") {
302     return HexDecode(src_buf, limit, dest_buf, dest_size);
303   }
304   if (decoder == "FlateDecode" || decoder == "Fl") {
305     return FPDFAPI_FlateOrLZWDecode(FALSE, src_buf, limit, pParam, dest_size,
306                                     dest_buf, dest_size);
307   }
308   if (decoder == "LZWDecode" || decoder == "LZW") {
309     return FPDFAPI_FlateOrLZWDecode(TRUE, src_buf, limit, pParam, 0, dest_buf,
310                                     dest_size);
311   }
312   if (decoder == "DCTDecode" || decoder == "DCT") {
313     ICodec_ScanlineDecoder* pDecoder =
314         CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder(
315             src_buf, limit, width, height, 0,
316             pParam ? pParam->GetInteger("ColorTransform", 1) : 1);
317     return _DecodeAllScanlines(pDecoder, dest_buf, dest_size);
318   }
319   if (decoder == "RunLengthDecode" || decoder == "RL") {
320     return RunLengthDecode(src_buf, limit, dest_buf, dest_size);
321   }
322   dest_size = 0;
323   dest_buf = 0;
324   return (FX_DWORD)-1;
325 }
ReadInlineStream(CPDF_Document * pDoc,CPDF_Dictionary * pDict,CPDF_Object * pCSObj,FX_BOOL bDecode)326 CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc,
327                                                  CPDF_Dictionary* pDict,
328                                                  CPDF_Object* pCSObj,
329                                                  FX_BOOL bDecode) {
330   if (m_Pos == m_Size)
331     return nullptr;
332 
333   if (PDFCharIsWhitespace(m_pBuf[m_Pos]))
334     m_Pos++;
335 
336   CFX_ByteString Decoder;
337   CPDF_Dictionary* pParam = nullptr;
338   CPDF_Object* pFilter = pDict->GetElementValue("Filter");
339   if (pFilter) {
340     if (CPDF_Array* pArray = pFilter->AsArray()) {
341       Decoder = pArray->GetString(0);
342       CPDF_Array* pParams = pDict->GetArray("DecodeParms");
343       if (pParams)
344         pParam = pParams->GetDict(0);
345     } else {
346       Decoder = pFilter->GetString();
347       pParam = pDict->GetDict("DecodeParms");
348     }
349   }
350   FX_DWORD width = pDict->GetInteger("Width");
351   FX_DWORD height = pDict->GetInteger("Height");
352   FX_DWORD OrigSize = 0;
353   if (pCSObj) {
354     FX_DWORD bpc = pDict->GetInteger("BitsPerComponent");
355     FX_DWORD nComponents = 1;
356     CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj);
357     if (!pCS) {
358       nComponents = 3;
359     } else {
360       nComponents = pCS->CountComponents();
361       pDoc->GetPageData()->ReleaseColorSpace(pCSObj);
362     }
363     FX_DWORD pitch = width;
364     if (bpc && pitch > INT_MAX / bpc) {
365       return NULL;
366     }
367     pitch *= bpc;
368     if (nComponents && pitch > INT_MAX / nComponents) {
369       return NULL;
370     }
371     pitch *= nComponents;
372     if (pitch > INT_MAX - 7) {
373       return NULL;
374     }
375     pitch += 7;
376     pitch /= 8;
377     OrigSize = pitch;
378   } else {
379     if (width > INT_MAX - 7) {
380       return NULL;
381     }
382     OrigSize = ((width + 7) / 8);
383   }
384   if (height && OrigSize > INT_MAX / height) {
385     return NULL;
386   }
387   OrigSize *= height;
388   uint8_t* pData = NULL;
389   FX_DWORD dwStreamSize;
390   if (Decoder.IsEmpty()) {
391     if (OrigSize > m_Size - m_Pos) {
392       OrigSize = m_Size - m_Pos;
393     }
394     pData = FX_Alloc(uint8_t, OrigSize);
395     FXSYS_memcpy(pData, m_pBuf + m_Pos, OrigSize);
396     dwStreamSize = OrigSize;
397     m_Pos += OrigSize;
398   } else {
399     FX_DWORD dwDestSize = OrigSize;
400     dwStreamSize =
401         PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height,
402                                Decoder, pParam, pData, dwDestSize);
403     if ((int)dwStreamSize < 0) {
404       FX_Free(pData);
405       return NULL;
406     }
407     if (bDecode) {
408       m_Pos += dwStreamSize;
409       dwStreamSize = dwDestSize;
410       if (CPDF_Array* pArray = pFilter->AsArray()) {
411         pArray->RemoveAt(0);
412         CPDF_Array* pParams = pDict->GetArray("DecodeParms");
413         if (pParams)
414           pParams->RemoveAt(0);
415       } else {
416         pDict->RemoveAt("Filter");
417         pDict->RemoveAt("DecodeParms");
418       }
419     } else {
420       FX_Free(pData);
421       FX_DWORD dwSavePos = m_Pos;
422       m_Pos += dwStreamSize;
423       while (1) {
424         FX_DWORD dwPrevPos = m_Pos;
425         CPDF_StreamParser::SyntaxType type = ParseNextElement();
426         if (type == CPDF_StreamParser::EndOfData) {
427           break;
428         }
429         if (type != CPDF_StreamParser::Keyword) {
430           dwStreamSize += m_Pos - dwPrevPos;
431           continue;
432         }
433         if (GetWordSize() == 2 && GetWordBuf()[0] == 'E' &&
434             GetWordBuf()[1] == 'I') {
435           m_Pos = dwPrevPos;
436           break;
437         }
438         dwStreamSize += m_Pos - dwPrevPos;
439       }
440       m_Pos = dwSavePos;
441       pData = FX_Alloc(uint8_t, dwStreamSize);
442       FXSYS_memcpy(pData, m_pBuf + m_Pos, dwStreamSize);
443       m_Pos += dwStreamSize;
444     }
445   }
446   pDict->SetAtInteger("Length", (int)dwStreamSize);
447   return new CPDF_Stream(pData, dwStreamSize, pDict);
448 }
449 
450 #define MAX_WORD_BUFFER 256
451 #define MAX_STRING_LENGTH 32767
452 #define FXDWORD_TRUE FXDWORD_FROM_LSBFIRST(0x65757274)
453 #define FXDWORD_NULL FXDWORD_FROM_LSBFIRST(0x6c6c756e)
454 #define FXDWORD_FALS FXDWORD_FROM_LSBFIRST(0x736c6166)
ParseNextElement()455 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() {
456   if (m_pLastObj) {
457     m_pLastObj->Release();
458     m_pLastObj = nullptr;
459   }
460 
461   m_WordSize = 0;
462   FX_BOOL bIsNumber = TRUE;
463   if (!PositionIsInBounds())
464     return EndOfData;
465 
466   int ch = m_pBuf[m_Pos++];
467   while (1) {
468     while (PDFCharIsWhitespace(ch)) {
469       if (!PositionIsInBounds())
470         return EndOfData;
471 
472       ch = m_pBuf[m_Pos++];
473     }
474 
475     if (ch != '%')
476       break;
477 
478     while (1) {
479       if (!PositionIsInBounds())
480         return EndOfData;
481 
482       ch = m_pBuf[m_Pos++];
483       if (PDFCharIsLineEnding(ch))
484         break;
485     }
486   }
487 
488   if (PDFCharIsDelimiter(ch) && ch != '/') {
489     m_Pos--;
490     m_pLastObj = ReadNextObject();
491     return Others;
492   }
493 
494   while (1) {
495     if (m_WordSize < MAX_WORD_BUFFER)
496       m_WordBuffer[m_WordSize++] = ch;
497 
498     if (!PDFCharIsNumeric(ch))
499       bIsNumber = FALSE;
500 
501     if (!PositionIsInBounds())
502       break;
503 
504     ch = m_pBuf[m_Pos++];
505 
506     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
507       m_Pos--;
508       break;
509     }
510   }
511 
512   m_WordBuffer[m_WordSize] = 0;
513   if (bIsNumber)
514     return Number;
515   if (m_WordBuffer[0] == '/')
516     return Name;
517 
518   if (m_WordSize == 4) {
519     if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) {
520       m_pLastObj = new CPDF_Boolean(TRUE);
521       return Others;
522     }
523     if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) {
524       m_pLastObj = new CPDF_Null;
525       return Others;
526     }
527   } else if (m_WordSize == 5) {
528     if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') {
529       m_pLastObj = new CPDF_Boolean(FALSE);
530       return Others;
531     }
532   }
533   return Keyword;
534 }
535 
SkipPathObject()536 void CPDF_StreamParser::SkipPathObject() {
537   FX_DWORD command_startpos = m_Pos;
538   if (!PositionIsInBounds())
539     return;
540 
541   int ch = m_pBuf[m_Pos++];
542   while (1) {
543     while (PDFCharIsWhitespace(ch)) {
544       if (!PositionIsInBounds())
545         return;
546       ch = m_pBuf[m_Pos++];
547     }
548 
549     if (!PDFCharIsNumeric(ch)) {
550       m_Pos = command_startpos;
551       return;
552     }
553 
554     while (1) {
555       while (!PDFCharIsWhitespace(ch)) {
556         if (!PositionIsInBounds())
557           return;
558         ch = m_pBuf[m_Pos++];
559       }
560 
561       while (PDFCharIsWhitespace(ch)) {
562         if (!PositionIsInBounds())
563           return;
564         ch = m_pBuf[m_Pos++];
565       }
566 
567       if (PDFCharIsNumeric(ch))
568         continue;
569 
570       FX_DWORD op_startpos = m_Pos - 1;
571       while (!PDFCharIsWhitespace(ch) && !PDFCharIsDelimiter(ch)) {
572         if (!PositionIsInBounds())
573           return;
574         ch = m_pBuf[m_Pos++];
575       }
576 
577       if (m_Pos - op_startpos == 2) {
578         int op = m_pBuf[op_startpos];
579         if (op == kPathOperatorSubpath || op == kPathOperatorLine ||
580             op == kPathOperatorCubicBezier1 ||
581             op == kPathOperatorCubicBezier2 ||
582             op == kPathOperatorCubicBezier3) {
583           command_startpos = m_Pos;
584           break;
585         }
586       } else if (m_Pos - op_startpos == 3) {
587         if (m_pBuf[op_startpos] == kPathOperatorRectangle[0] &&
588             m_pBuf[op_startpos + 1] == kPathOperatorRectangle[1]) {
589           command_startpos = m_Pos;
590           break;
591         }
592       }
593       m_Pos = command_startpos;
594       return;
595     }
596   }
597 }
ReadNextObject(FX_BOOL bAllowNestedArray,FX_BOOL bInArray)598 CPDF_Object* CPDF_StreamParser::ReadNextObject(FX_BOOL bAllowNestedArray,
599                                                FX_BOOL bInArray) {
600   FX_BOOL bIsNumber;
601   GetNextWord(bIsNumber);
602   if (m_WordSize == 0) {
603     return NULL;
604   }
605   if (bIsNumber) {
606     m_WordBuffer[m_WordSize] = 0;
607     return new CPDF_Number(CFX_ByteStringC(m_WordBuffer, m_WordSize));
608   }
609   int first_char = m_WordBuffer[0];
610   if (first_char == '/') {
611     return new CPDF_Name(
612         PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
613   }
614   if (first_char == '(') {
615     return new CPDF_String(ReadString(), FALSE);
616   }
617   if (first_char == '<') {
618     if (m_WordSize == 1) {
619       return new CPDF_String(ReadHexString(), TRUE);
620     }
621     CPDF_Dictionary* pDict = new CPDF_Dictionary;
622     while (1) {
623       GetNextWord(bIsNumber);
624       if (m_WordSize == 0) {
625         pDict->Release();
626         return nullptr;
627       }
628       if (m_WordSize == 2 && m_WordBuffer[0] == '>') {
629         break;
630       }
631       if (m_WordBuffer[0] != '/') {
632         pDict->Release();
633         return nullptr;
634       }
635       CFX_ByteString key =
636           PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1));
637       CPDF_Object* pObj = ReadNextObject(TRUE);
638       if (!pObj) {
639         pDict->Release();
640         return nullptr;
641       }
642       if (!key.IsEmpty()) {
643         pDict->SetAt(key, pObj);
644       } else {
645         pObj->Release();
646       }
647     }
648     return pDict;
649   }
650   if (first_char == '[') {
651     if (!bAllowNestedArray && bInArray) {
652       return NULL;
653     }
654     CPDF_Array* pArray = new CPDF_Array;
655     while (1) {
656       CPDF_Object* pObj = ReadNextObject(bAllowNestedArray, TRUE);
657       if (pObj) {
658         pArray->Add(pObj);
659         continue;
660       }
661 
662       if (m_WordSize == 0 || m_WordBuffer[0] == ']')
663         break;
664     }
665     return pArray;
666   }
667   if (m_WordSize == 4) {
668     if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) {
669       return new CPDF_Boolean(TRUE);
670     }
671     if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) {
672       return new CPDF_Null;
673     }
674   } else if (m_WordSize == 5) {
675     if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') {
676       return new CPDF_Boolean(FALSE);
677     }
678   }
679   return NULL;
680 }
GetNextWord(FX_BOOL & bIsNumber)681 void CPDF_StreamParser::GetNextWord(FX_BOOL& bIsNumber) {
682   m_WordSize = 0;
683   bIsNumber = TRUE;
684   if (!PositionIsInBounds())
685     return;
686 
687   int ch = m_pBuf[m_Pos++];
688   while (1) {
689     while (PDFCharIsWhitespace(ch)) {
690       if (!PositionIsInBounds()) {
691         return;
692       }
693       ch = m_pBuf[m_Pos++];
694     }
695 
696     if (ch != '%')
697       break;
698 
699     while (1) {
700       if (!PositionIsInBounds())
701         return;
702       ch = m_pBuf[m_Pos++];
703       if (PDFCharIsLineEnding(ch))
704         break;
705     }
706   }
707 
708   if (PDFCharIsDelimiter(ch)) {
709     bIsNumber = FALSE;
710     m_WordBuffer[m_WordSize++] = ch;
711     if (ch == '/') {
712       while (1) {
713         if (!PositionIsInBounds())
714           return;
715         ch = m_pBuf[m_Pos++];
716         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
717           m_Pos--;
718           return;
719         }
720 
721         if (m_WordSize < MAX_WORD_BUFFER)
722           m_WordBuffer[m_WordSize++] = ch;
723       }
724     } else if (ch == '<') {
725       if (!PositionIsInBounds())
726         return;
727       ch = m_pBuf[m_Pos++];
728       if (ch == '<')
729         m_WordBuffer[m_WordSize++] = ch;
730       else
731         m_Pos--;
732     } else if (ch == '>') {
733       if (!PositionIsInBounds())
734         return;
735       ch = m_pBuf[m_Pos++];
736       if (ch == '>')
737         m_WordBuffer[m_WordSize++] = ch;
738       else
739         m_Pos--;
740     }
741     return;
742   }
743 
744   while (1) {
745     if (m_WordSize < MAX_WORD_BUFFER)
746       m_WordBuffer[m_WordSize++] = ch;
747     if (!PDFCharIsNumeric(ch))
748       bIsNumber = FALSE;
749 
750     if (!PositionIsInBounds())
751       return;
752     ch = m_pBuf[m_Pos++];
753     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
754       m_Pos--;
755       break;
756     }
757   }
758 }
759 
ReadString()760 CFX_ByteString CPDF_StreamParser::ReadString() {
761   if (!PositionIsInBounds())
762     return CFX_ByteString();
763 
764   int ch = m_pBuf[m_Pos++];
765   CFX_ByteTextBuf buf;
766   int parlevel = 0;
767   int status = 0, iEscCode = 0;
768   while (1) {
769     switch (status) {
770       case 0:
771         if (ch == ')') {
772           if (parlevel == 0) {
773             if (buf.GetLength() > MAX_STRING_LENGTH) {
774               return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
775             }
776             return buf.GetByteString();
777           }
778           parlevel--;
779           buf.AppendChar(')');
780         } else if (ch == '(') {
781           parlevel++;
782           buf.AppendChar('(');
783         } else if (ch == '\\') {
784           status = 1;
785         } else {
786           buf.AppendChar((char)ch);
787         }
788         break;
789       case 1:
790         if (ch >= '0' && ch <= '7') {
791           iEscCode = FXSYS_toDecimalDigit(ch);
792           status = 2;
793           break;
794         }
795         if (ch == 'n') {
796           buf.AppendChar('\n');
797         } else if (ch == 'r') {
798           buf.AppendChar('\r');
799         } else if (ch == 't') {
800           buf.AppendChar('\t');
801         } else if (ch == 'b') {
802           buf.AppendChar('\b');
803         } else if (ch == 'f') {
804           buf.AppendChar('\f');
805         } else if (ch == '\r') {
806           status = 4;
807           break;
808         } else if (ch == '\n') {
809         } else {
810           buf.AppendChar(ch);
811         }
812         status = 0;
813         break;
814       case 2:
815         if (ch >= '0' && ch <= '7') {
816           iEscCode = iEscCode * 8 + FXSYS_toDecimalDigit(ch);
817           status = 3;
818         } else {
819           buf.AppendChar(iEscCode);
820           status = 0;
821           continue;
822         }
823         break;
824       case 3:
825         if (ch >= '0' && ch <= '7') {
826           iEscCode = iEscCode * 8 + FXSYS_toDecimalDigit(ch);
827           buf.AppendChar(iEscCode);
828           status = 0;
829         } else {
830           buf.AppendChar(iEscCode);
831           status = 0;
832           continue;
833         }
834         break;
835       case 4:
836         status = 0;
837         if (ch != '\n') {
838           continue;
839         }
840         break;
841     }
842     if (!PositionIsInBounds())
843       break;
844 
845     ch = m_pBuf[m_Pos++];
846   }
847   if (PositionIsInBounds())
848     ch = m_pBuf[m_Pos++];
849 
850   if (buf.GetLength() > MAX_STRING_LENGTH) {
851     return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
852   }
853   return buf.GetByteString();
854 }
ReadHexString()855 CFX_ByteString CPDF_StreamParser::ReadHexString() {
856   if (!PositionIsInBounds())
857     return CFX_ByteString();
858 
859   CFX_ByteTextBuf buf;
860   bool bFirst = true;
861   int code = 0;
862   while (PositionIsInBounds()) {
863     int ch = m_pBuf[m_Pos++];
864 
865     if (ch == '>')
866       break;
867 
868     if (!std::isxdigit(ch))
869       continue;
870 
871     int val = FXSYS_toHexDigit(ch);
872     if (bFirst) {
873       code = val * 16;
874     } else {
875       code += val;
876       buf.AppendByte((uint8_t)code);
877     }
878     bFirst = !bFirst;
879   }
880   if (!bFirst)
881     buf.AppendChar((char)code);
882 
883   if (buf.GetLength() > MAX_STRING_LENGTH)
884     return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
885 
886   return buf.GetByteString();
887 }
888 
PositionIsInBounds() const889 bool CPDF_StreamParser::PositionIsInBounds() const {
890   return m_Pos < m_Size;
891 }
892 
CPDF_ContentParser()893 CPDF_ContentParser::CPDF_ContentParser()
894     : m_Status(Ready),
895       m_InternalStage(STAGE_GETCONTENT),
896       m_pObjects(nullptr),
897       m_bForm(false),
898       m_pType3Char(nullptr),
899       m_pData(nullptr),
900       m_Size(0),
901       m_CurrentOffset(0) {}
~CPDF_ContentParser()902 CPDF_ContentParser::~CPDF_ContentParser() {
903   if (!m_pSingleStream)
904     FX_Free(m_pData);
905 }
Start(CPDF_Page * pPage,CPDF_ParseOptions * pOptions)906 void CPDF_ContentParser::Start(CPDF_Page* pPage, CPDF_ParseOptions* pOptions) {
907   if (m_Status != Ready || !pPage || !pPage->m_pDocument ||
908       !pPage->m_pFormDict) {
909     m_Status = Done;
910     return;
911   }
912   m_pObjects = pPage;
913   m_bForm = FALSE;
914   if (pOptions) {
915     m_Options = *pOptions;
916   }
917   m_Status = ToBeContinued;
918   m_InternalStage = STAGE_GETCONTENT;
919   m_CurrentOffset = 0;
920 
921   CPDF_Object* pContent = pPage->m_pFormDict->GetElementValue("Contents");
922   if (!pContent) {
923     m_Status = Done;
924     return;
925   }
926   if (CPDF_Stream* pStream = pContent->AsStream()) {
927     m_nStreams = 0;
928     m_pSingleStream.reset(new CPDF_StreamAcc);
929     m_pSingleStream->LoadAllData(pStream, FALSE);
930   } else if (CPDF_Array* pArray = pContent->AsArray()) {
931     m_nStreams = pArray->GetCount();
932     if (m_nStreams)
933       m_StreamArray.resize(m_nStreams);
934     else
935       m_Status = Done;
936   } else {
937     m_Status = Done;
938   }
939 }
Start(CPDF_Form * pForm,CPDF_AllStates * pGraphicStates,CFX_Matrix * pParentMatrix,CPDF_Type3Char * pType3Char,CPDF_ParseOptions * pOptions,int level)940 void CPDF_ContentParser::Start(CPDF_Form* pForm,
941                                CPDF_AllStates* pGraphicStates,
942                                CFX_Matrix* pParentMatrix,
943                                CPDF_Type3Char* pType3Char,
944                                CPDF_ParseOptions* pOptions,
945                                int level) {
946   m_pType3Char = pType3Char;
947   m_pObjects = pForm;
948   m_bForm = TRUE;
949   CFX_Matrix form_matrix = pForm->m_pFormDict->GetMatrix("Matrix");
950   if (pGraphicStates) {
951     form_matrix.Concat(pGraphicStates->m_CTM);
952   }
953   CPDF_Array* pBBox = pForm->m_pFormDict->GetArray("BBox");
954   CFX_FloatRect form_bbox;
955   CPDF_Path ClipPath;
956   if (pBBox) {
957     form_bbox = pBBox->GetRect();
958     ClipPath.New();
959     ClipPath.AppendRect(form_bbox.left, form_bbox.bottom, form_bbox.right,
960                         form_bbox.top);
961     ClipPath.Transform(&form_matrix);
962     if (pParentMatrix) {
963       ClipPath.Transform(pParentMatrix);
964     }
965     form_bbox.Transform(&form_matrix);
966     if (pParentMatrix) {
967       form_bbox.Transform(pParentMatrix);
968     }
969   }
970   CPDF_Dictionary* pResources = pForm->m_pFormDict->GetDict("Resources");
971   m_pParser.reset(new CPDF_StreamContentParser(
972       pForm->m_pDocument, pForm->m_pPageResources, pForm->m_pResources,
973       pParentMatrix, pForm, pResources, &form_bbox, pOptions, pGraphicStates,
974       level));
975   m_pParser->GetCurStates()->m_CTM = form_matrix;
976   m_pParser->GetCurStates()->m_ParentMatrix = form_matrix;
977   if (ClipPath.NotNull()) {
978     m_pParser->GetCurStates()->m_ClipPath.AppendPath(ClipPath, FXFILL_WINDING,
979                                                      TRUE);
980   }
981   if (pForm->m_Transparency & PDFTRANS_GROUP) {
982     CPDF_GeneralStateData* pData =
983         m_pParser->GetCurStates()->m_GeneralState.GetModify();
984     pData->m_BlendType = FXDIB_BLEND_NORMAL;
985     pData->m_StrokeAlpha = 1.0f;
986     pData->m_FillAlpha = 1.0f;
987     pData->m_pSoftMask = NULL;
988   }
989   m_nStreams = 0;
990   m_pSingleStream.reset(new CPDF_StreamAcc);
991   m_pSingleStream->LoadAllData(pForm->m_pFormStream, FALSE);
992   m_pData = (uint8_t*)m_pSingleStream->GetData();
993   m_Size = m_pSingleStream->GetSize();
994   m_Status = ToBeContinued;
995   m_InternalStage = STAGE_PARSE;
996   m_CurrentOffset = 0;
997 }
Continue(IFX_Pause * pPause)998 void CPDF_ContentParser::Continue(IFX_Pause* pPause) {
999   int steps = 0;
1000   while (m_Status == ToBeContinued) {
1001     if (m_InternalStage == STAGE_GETCONTENT) {
1002       if (m_CurrentOffset == m_nStreams) {
1003         if (!m_StreamArray.empty()) {
1004           FX_SAFE_DWORD safeSize = 0;
1005           for (const auto& stream : m_StreamArray) {
1006             safeSize += stream->GetSize();
1007             safeSize += 1;
1008           }
1009           if (!safeSize.IsValid()) {
1010             m_Status = Done;
1011             return;
1012           }
1013           m_Size = safeSize.ValueOrDie();
1014           m_pData = FX_Alloc(uint8_t, m_Size);
1015           FX_DWORD pos = 0;
1016           for (const auto& stream : m_StreamArray) {
1017             FXSYS_memcpy(m_pData + pos, stream->GetData(), stream->GetSize());
1018             pos += stream->GetSize();
1019             m_pData[pos++] = ' ';
1020           }
1021           m_StreamArray.clear();
1022         } else {
1023           m_pData = (uint8_t*)m_pSingleStream->GetData();
1024           m_Size = m_pSingleStream->GetSize();
1025         }
1026         m_InternalStage = STAGE_PARSE;
1027         m_CurrentOffset = 0;
1028       } else {
1029         CPDF_Array* pContent = m_pObjects->m_pFormDict->GetArray("Contents");
1030         m_StreamArray[m_CurrentOffset].reset(new CPDF_StreamAcc);
1031         CPDF_Stream* pStreamObj = ToStream(
1032             pContent ? pContent->GetElementValue(m_CurrentOffset) : nullptr);
1033         m_StreamArray[m_CurrentOffset]->LoadAllData(pStreamObj, FALSE);
1034         m_CurrentOffset++;
1035       }
1036     }
1037     if (m_InternalStage == STAGE_PARSE) {
1038       if (!m_pParser) {
1039         m_pParser.reset(new CPDF_StreamContentParser(
1040             m_pObjects->m_pDocument, m_pObjects->m_pPageResources, nullptr,
1041             nullptr, m_pObjects, m_pObjects->m_pResources, &m_pObjects->m_BBox,
1042             &m_Options, nullptr, 0));
1043         m_pParser->GetCurStates()->m_ColorState.GetModify()->Default();
1044       }
1045       if (m_CurrentOffset >= m_Size) {
1046         m_InternalStage = STAGE_CHECKCLIP;
1047       } else {
1048         m_CurrentOffset +=
1049             m_pParser->Parse(m_pData + m_CurrentOffset,
1050                              m_Size - m_CurrentOffset, PARSE_STEP_LIMIT);
1051       }
1052     }
1053     if (m_InternalStage == STAGE_CHECKCLIP) {
1054       if (m_pType3Char) {
1055         m_pType3Char->m_bColored = m_pParser->IsColored();
1056         m_pType3Char->m_Width =
1057             FXSYS_round(m_pParser->GetType3Data()[0] * 1000);
1058         m_pType3Char->m_BBox.left =
1059             FXSYS_round(m_pParser->GetType3Data()[2] * 1000);
1060         m_pType3Char->m_BBox.bottom =
1061             FXSYS_round(m_pParser->GetType3Data()[3] * 1000);
1062         m_pType3Char->m_BBox.right =
1063             FXSYS_round(m_pParser->GetType3Data()[4] * 1000);
1064         m_pType3Char->m_BBox.top =
1065             FXSYS_round(m_pParser->GetType3Data()[5] * 1000);
1066       }
1067       FX_POSITION pos = m_pObjects->m_ObjectList.GetHeadPosition();
1068       while (pos) {
1069         CPDF_PageObject* pObj =
1070             (CPDF_PageObject*)m_pObjects->m_ObjectList.GetNext(pos);
1071         if (pObj->m_ClipPath.IsNull()) {
1072           continue;
1073         }
1074         if (pObj->m_ClipPath.GetPathCount() != 1) {
1075           continue;
1076         }
1077         if (pObj->m_ClipPath.GetTextCount()) {
1078           continue;
1079         }
1080         CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0);
1081         if (!ClipPath.IsRect() || pObj->m_Type == PDFPAGE_SHADING) {
1082           continue;
1083         }
1084         CFX_FloatRect old_rect(ClipPath.GetPointX(0), ClipPath.GetPointY(0),
1085                                ClipPath.GetPointX(2), ClipPath.GetPointY(2));
1086         CFX_FloatRect obj_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right,
1087                                pObj->m_Top);
1088         if (old_rect.Contains(obj_rect)) {
1089           pObj->m_ClipPath.SetNull();
1090         }
1091       }
1092       m_Status = Done;
1093       return;
1094     }
1095     steps++;
1096     if (pPause && pPause->NeedToPauseNow()) {
1097       break;
1098     }
1099   }
1100 }
1101