1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "../../../include/fpdfapi/fpdf_page.h"
8 #include "../../../include/fpdfapi/fpdf_module.h"
9 #include "../../../include/fxcodec/fx_codec.h"
10 #include "pageint.h"
11 #include <limits.h>
12 extern const FX_LPCSTR _PDF_OpCharType =
13     "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
14     "IIVIIIIVIIVIIIIIVVIIIIIIIIIIIIII"
15     "IIVVVVVVIVVVVVVIVVVVVIIVVIIIIIII"
16     "IIVVVVVVVVVVVVVVIVVVIIVVIVVIIIII"
17     "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
18     "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
19     "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
20     "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII";
_PDF_HasInvalidOpChar(FX_LPCSTR op)21 FX_BOOL _PDF_HasInvalidOpChar(FX_LPCSTR op)
22 {
23     if(!op) {
24         return FALSE;
25     }
26     FX_BYTE ch;
27     while((ch = *op++)) {
28         if(_PDF_OpCharType[ch] == 'I') {
29             return TRUE;
30         }
31     }
32     return FALSE;
33 }
34 class CPDF_StreamParserAutoClearer {
35   public:
CPDF_StreamParserAutoClearer(CPDF_StreamParser ** scoped_variable,CPDF_StreamParser * new_parser)36     CPDF_StreamParserAutoClearer(CPDF_StreamParser** scoped_variable, CPDF_StreamParser* new_parser)
37             : scoped_variable_(scoped_variable) {
38         *scoped_variable_ = new_parser;
39     }
~CPDF_StreamParserAutoClearer()40     ~CPDF_StreamParserAutoClearer() { *scoped_variable_ = NULL; }
41   private:
42     CPDF_StreamParser** scoped_variable_;
43 };
Parse(FX_LPCBYTE pData,FX_DWORD dwSize,FX_DWORD max_cost)44 FX_DWORD CPDF_StreamContentParser::Parse(FX_LPCBYTE pData, FX_DWORD dwSize, FX_DWORD max_cost)
45 {
46     if (m_Level > _FPDF_MAX_FORM_LEVEL_) {
47         return dwSize;
48     }
49     FX_DWORD InitObjCount = m_pObjectList->CountObjects();
50     CPDF_StreamParser syntax(pData, dwSize);
51     CPDF_StreamParserAutoClearer auto_clearer(&m_pSyntax, &syntax);
52     m_CompatCount = 0;
53     while (1) {
54         FX_DWORD cost = m_pObjectList->CountObjects() - InitObjCount;
55         if (max_cost && cost >= max_cost) {
56             break;
57         }
58         switch (syntax.ParseNextElement()) {
59             case CPDF_StreamParser::EndOfData:
60                 return m_pSyntax->GetPos();
61             case CPDF_StreamParser::Keyword:
62                 if(!OnOperator((char*)syntax.GetWordBuf()) && _PDF_HasInvalidOpChar((char*)syntax.GetWordBuf())) {
63                     m_bAbort = TRUE;
64                 }
65                 if (m_bAbort) {
66                     return m_pSyntax->GetPos();
67                 }
68                 ClearAllParams();
69                 break;
70             case CPDF_StreamParser::Number:
71                 AddNumberParam((char*)syntax.GetWordBuf(), syntax.GetWordSize());
72                 break;
73             case CPDF_StreamParser::Name:
74                 AddNameParam((FX_LPCSTR)syntax.GetWordBuf() + 1, syntax.GetWordSize() - 1);
75                 break;
76             default:
77                 AddObjectParam(syntax.GetObject());
78         }
79     }
80     return m_pSyntax->GetPos();
81 }
82 void _PDF_ReplaceAbbr(CPDF_Object* pObj);
Handle_BeginImage()83 void CPDF_StreamContentParser::Handle_BeginImage()
84 {
85     FX_FILESIZE savePos = m_pSyntax->GetPos();
86     CPDF_Dictionary* pDict = CPDF_Dictionary::Create();
87     while (1) {
88         CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
89         if (type == CPDF_StreamParser::Keyword) {
90             CFX_ByteString bsKeyword(m_pSyntax->GetWordBuf(), m_pSyntax->GetWordSize());
91             if (bsKeyword != FX_BSTRC("ID")) {
92                 m_pSyntax->SetPos(savePos);
93                 pDict->Release();
94                 return;
95             }
96         }
97         if (type != CPDF_StreamParser::Name) {
98             break;
99         }
100         CFX_ByteString key((FX_LPCSTR)m_pSyntax->GetWordBuf() + 1, m_pSyntax->GetWordSize() - 1);
101         CPDF_Object* pObj = m_pSyntax->ReadNextObject();
102         if (!key.IsEmpty()) {
103             pDict->SetAt(key, pObj, m_pDocument);
104         } else if (pObj) {
105             pObj->Release();
106         }
107     }
108     _PDF_ReplaceAbbr(pDict);
109     CPDF_Object* pCSObj = NULL;
110     if (pDict->KeyExist(FX_BSTRC("ColorSpace"))) {
111         pCSObj = pDict->GetElementValue(FX_BSTRC("ColorSpace"));
112         if (pCSObj->GetType() == PDFOBJ_NAME) {
113             CFX_ByteString name = pCSObj->GetString();
114             if (name != FX_BSTRC("DeviceRGB") && name != FX_BSTRC("DeviceGray") && name != FX_BSTRC("DeviceCMYK")) {
115                 pCSObj = FindResourceObj(FX_BSTRC("ColorSpace"), name);
116                 if (pCSObj && !pCSObj->GetObjNum()) {
117                     pCSObj = pCSObj->Clone();
118                     pDict->SetAt(FX_BSTRC("ColorSpace"), pCSObj, m_pDocument);
119                 }
120             }
121         }
122     }
123     CPDF_Stream* pStream = m_pSyntax->ReadInlineStream(m_pDocument, pDict, pCSObj, m_Options.m_bDecodeInlineImage);
124     while (1) {
125         CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
126         if (type == CPDF_StreamParser::EndOfData) {
127             break;
128         }
129         if (type != CPDF_StreamParser::Keyword) {
130             continue;
131         }
132         if (m_pSyntax->GetWordSize() == 2 && m_pSyntax->GetWordBuf()[0] == 'E' &&
133                 m_pSyntax->GetWordBuf()[1] == 'I') {
134             break;
135         }
136     }
137     if (m_Options.m_bTextOnly) {
138         if (pStream) {
139             pStream->Release();
140         } else {
141             pDict->Release();
142         }
143         return;
144     }
145     pDict->SetAtName(FX_BSTRC("Subtype"), FX_BSTRC("Image"));
146     CPDF_ImageObject *pImgObj = AddImage(pStream, NULL, TRUE);
147     if (!pImgObj) {
148         if (pStream) {
149             pStream->Release();
150         } else {
151             pDict->Release();
152         }
153     }
154 }
ParsePathObject()155 void CPDF_StreamContentParser::ParsePathObject()
156 {
157     FX_FLOAT params[6] = {0};
158     int nParams = 0;
159     int last_pos = m_pSyntax->GetPos();
160     while (1) {
161         CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
162         FX_BOOL bProcessed = TRUE;
163         switch (type) {
164             case CPDF_StreamParser::EndOfData:
165                 return;
166             case CPDF_StreamParser::Keyword: {
167                     int len = m_pSyntax->GetWordSize();
168                     if (len == 1) {
169                         switch (m_pSyntax->GetWordBuf()[0]) {
170                             case 'm':
171                                 AddPathPoint(params[0], params[1], FXPT_MOVETO);
172                                 nParams = 0;
173                                 break;
174                             case 'l':
175                                 AddPathPoint(params[0], params[1], FXPT_LINETO);
176                                 nParams = 0;
177                                 break;
178                             case 'c':
179                                 AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
180                                 AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
181                                 AddPathPoint(params[4], params[5], FXPT_BEZIERTO);
182                                 nParams = 0;
183                                 break;
184                             case 'v':
185                                 AddPathPoint(m_PathCurrentX, m_PathCurrentY, FXPT_BEZIERTO);
186                                 AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
187                                 AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
188                                 nParams = 0;
189                                 break;
190                             case 'y':
191                                 AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
192                                 AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
193                                 AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
194                                 nParams = 0;
195                                 break;
196                             case 'h':
197                                 Handle_ClosePath();
198                                 nParams = 0;
199                                 break;
200                             default:
201                                 bProcessed = FALSE;
202                                 break;
203                         }
204                     } else if (len == 2) {
205                         if (m_pSyntax->GetWordBuf()[0] == 'r' && m_pSyntax->GetWordBuf()[1] == 'e') {
206                             AddPathRect(params[0], params[1], params[2], params[3]);
207                             nParams = 0;
208                         } else {
209                             bProcessed = FALSE;
210                         }
211                     } else {
212                         bProcessed = FALSE;
213                     }
214                     if (bProcessed) {
215                         last_pos = m_pSyntax->GetPos();
216                     }
217                     break;
218                 }
219             case CPDF_StreamParser::Number: {
220                     if (nParams == 6) {
221                         break;
222                     }
223                     FX_BOOL bInteger;
224                     int value;
225                     FX_atonum(CFX_ByteStringC(m_pSyntax->GetWordBuf(), m_pSyntax->GetWordSize()), bInteger, &value);
226                     params[nParams++] = bInteger ? (FX_FLOAT)value : *(FX_FLOAT*)&value;
227                     break;
228                 }
229             default:
230                 bProcessed = FALSE;
231         }
232         if (!bProcessed) {
233             m_pSyntax->SetPos(last_pos);
234             return;
235         }
236     }
237 }
CPDF_StreamParser(const FX_BYTE * pData,FX_DWORD dwSize)238 CPDF_StreamParser::CPDF_StreamParser(const FX_BYTE* pData, FX_DWORD dwSize)
239 {
240     m_pBuf = pData;
241     m_Size = dwSize;
242     m_Pos = 0;
243     m_pLastObj = NULL;
244 }
~CPDF_StreamParser()245 CPDF_StreamParser::~CPDF_StreamParser()
246 {
247     if (m_pLastObj) {
248         m_pLastObj->Release();
249     }
250 }
_DecodeAllScanlines(ICodec_ScanlineDecoder * pDecoder,FX_LPBYTE & dest_buf,FX_DWORD & dest_size)251 FX_DWORD _DecodeAllScanlines(ICodec_ScanlineDecoder* pDecoder, FX_LPBYTE& dest_buf, FX_DWORD& dest_size)
252 {
253     if (pDecoder == NULL) {
254         return (FX_DWORD) - 1;
255     }
256     int ncomps = pDecoder->CountComps();
257     int bpc = pDecoder->GetBPC();
258     int width = pDecoder->GetWidth();
259     int height = pDecoder->GetHeight();
260     int pitch = (width * ncomps * bpc + 7) / 8;
261     if (height == 0 || pitch > (1 << 30) / height) {
262         delete pDecoder;
263         return -1;
264     }
265     dest_buf = FX_Alloc2D(FX_BYTE, pitch, height);
266     dest_size = pitch * height;  // Safe since checked alloc returned.
267     for (int row = 0; row < height; row ++) {
268         FX_LPBYTE pLine = pDecoder->GetScanline(row);
269         if (pLine == NULL) {
270             break;
271         }
272         FXSYS_memcpy32(dest_buf + row * pitch, pLine, pitch);
273     }
274     FX_DWORD srcoff = pDecoder->GetSrcOffset();
275     delete pDecoder;
276     return srcoff;
277 }
278 ICodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder(FX_LPCBYTE src_buf, FX_DWORD src_size, int width, int height,
279         const CPDF_Dictionary* pParams);
280 FX_DWORD _A85Decode(const FX_BYTE* src_buf, FX_DWORD src_size, FX_LPBYTE& dest_buf, FX_DWORD& dest_size);
281 FX_DWORD _HexDecode(const FX_BYTE* src_buf, FX_DWORD src_size, FX_LPBYTE& dest_buf, FX_DWORD& dest_size);
282 FX_DWORD FPDFAPI_FlateOrLZWDecode(FX_BOOL bLZW, const FX_BYTE* src_buf, FX_DWORD src_size, CPDF_Dictionary* pParams,
283                                   FX_DWORD estimated_size, FX_LPBYTE& dest_buf, FX_DWORD& dest_size);
PDF_DecodeInlineStream(const FX_BYTE * src_buf,FX_DWORD limit,int width,int height,CFX_ByteString & decoder,CPDF_Dictionary * pParam,FX_LPBYTE & dest_buf,FX_DWORD & dest_size)284 FX_DWORD PDF_DecodeInlineStream(const FX_BYTE* src_buf, FX_DWORD limit,
285                                 int width, int height, CFX_ByteString& decoder,
286                                 CPDF_Dictionary* pParam, FX_LPBYTE& dest_buf, FX_DWORD& dest_size)
287 {
288     if (decoder == FX_BSTRC("CCITTFaxDecode") || decoder == FX_BSTRC("CCF")) {
289         ICodec_ScanlineDecoder* pDecoder = FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam);
290         return _DecodeAllScanlines(pDecoder, dest_buf, dest_size);
291     } else if (decoder == FX_BSTRC("ASCII85Decode") || decoder == FX_BSTRC("A85")) {
292         return _A85Decode(src_buf, limit, dest_buf, dest_size);
293     } else if (decoder == FX_BSTRC("ASCIIHexDecode") || decoder == FX_BSTRC("AHx")) {
294         return _HexDecode(src_buf, limit, dest_buf, dest_size);
295     } else if (decoder == FX_BSTRC("FlateDecode") || decoder == FX_BSTRC("Fl")) {
296         return FPDFAPI_FlateOrLZWDecode(FALSE, src_buf, limit, pParam, dest_size, dest_buf, dest_size);
297     } else if (decoder == FX_BSTRC("LZWDecode") || decoder == FX_BSTRC("LZW")) {
298         return FPDFAPI_FlateOrLZWDecode(TRUE, src_buf, limit, pParam, 0, dest_buf, dest_size);
299     } else if (decoder == FX_BSTRC("DCTDecode") || decoder == FX_BSTRC("DCT")) {
300         ICodec_ScanlineDecoder* pDecoder = CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder(
301                                                src_buf, limit, width, height, 0, pParam ? pParam->GetInteger(FX_BSTRC("ColorTransform"), 1) : 1);
302         return _DecodeAllScanlines(pDecoder, dest_buf, dest_size);
303     } else if (decoder == FX_BSTRC("RunLengthDecode") || decoder == FX_BSTRC("RL")) {
304         return RunLengthDecode(src_buf, limit, dest_buf, dest_size);
305     }
306     dest_size = 0;
307     dest_buf = 0;
308     return (FX_DWORD) - 1;
309 }
ReadInlineStream(CPDF_Document * pDoc,CPDF_Dictionary * pDict,CPDF_Object * pCSObj,FX_BOOL bDecode)310 CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc, CPDF_Dictionary* pDict, CPDF_Object* pCSObj, FX_BOOL bDecode)
311 {
312     if (m_Pos == m_Size) {
313         return NULL;
314     }
315     if (PDF_CharType[m_pBuf[m_Pos]] == 'W') {
316         m_Pos ++;
317     }
318     CFX_ByteString Decoder;
319     CPDF_Dictionary* pParam = NULL;
320     CPDF_Object* pFilter = pDict->GetElementValue(FX_BSTRC("Filter"));
321     if (pFilter == NULL) {
322     } else if (pFilter->GetType() == PDFOBJ_ARRAY) {
323         Decoder = ((CPDF_Array*)pFilter)->GetString(0);
324         CPDF_Array* pParams = pDict->GetArray(FX_BSTRC("DecodeParms"));
325         if (pParams) {
326             pParam = pParams->GetDict(0);
327         }
328     } else {
329         Decoder = pFilter->GetString();
330         pParam = pDict->GetDict(FX_BSTRC("DecodeParms"));
331     }
332     FX_DWORD width = pDict->GetInteger(FX_BSTRC("Width"));
333     FX_DWORD height = pDict->GetInteger(FX_BSTRC("Height"));
334     FX_DWORD OrigSize = 0;
335     if (pCSObj != NULL) {
336         FX_DWORD bpc = pDict->GetInteger(FX_BSTRC("BitsPerComponent"));
337         FX_DWORD nComponents = 1;
338         CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj);
339         if (pCS == NULL) {
340             nComponents = 3;
341         } else {
342             nComponents = pCS->CountComponents();
343             pDoc->GetPageData()->ReleaseColorSpace(pCSObj);
344         }
345         FX_DWORD pitch = width;
346         if (bpc && pitch > INT_MAX / bpc) {
347             return NULL;
348         }
349         pitch *= bpc;
350         if (nComponents && pitch > INT_MAX / nComponents) {
351             return NULL;
352         }
353         pitch *= nComponents;
354         if (pitch > INT_MAX - 7) {
355             return NULL;
356         }
357         pitch += 7;
358         pitch /= 8;
359         OrigSize = pitch;
360     } else {
361         if (width > INT_MAX - 7) {
362             return NULL;
363         }
364         OrigSize = ((width + 7) / 8);
365     }
366     if (height && OrigSize > INT_MAX / height) {
367         return NULL;
368     }
369     OrigSize *= height;
370     FX_LPBYTE pData = NULL;
371     FX_DWORD dwStreamSize;
372     if (Decoder.IsEmpty()) {
373         if (OrigSize > m_Size - m_Pos) {
374             OrigSize = m_Size - m_Pos;
375         }
376         pData = FX_Alloc(FX_BYTE, OrigSize);
377         FXSYS_memcpy32(pData, m_pBuf + m_Pos, OrigSize);
378         dwStreamSize = OrigSize;
379         m_Pos += OrigSize;
380     } else {
381         FX_DWORD dwDestSize = OrigSize;
382         dwStreamSize = PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height, Decoder, pParam,
383                                               pData, dwDestSize);
384         if ((int)dwStreamSize < 0) {
385             return NULL;
386         }
387         if (bDecode) {
388             m_Pos += dwStreamSize;
389             dwStreamSize = dwDestSize;
390             if (pFilter->GetType() == PDFOBJ_ARRAY) {
391                 ((CPDF_Array*)pFilter)->RemoveAt(0);
392                 CPDF_Array* pParams = pDict->GetArray(FX_BSTRC("DecodeParms"));
393                 if (pParams) {
394                     pParams->RemoveAt(0);
395                 }
396             } else {
397                 pDict->RemoveAt(FX_BSTRC("Filter"));
398                 pDict->RemoveAt(FX_BSTRC("DecodeParms"));
399             }
400         } else {
401             if (pData) {
402                 FX_Free(pData);
403             }
404             FX_DWORD dwSavePos = m_Pos;
405             m_Pos += dwStreamSize;
406             while (1) {
407                 FX_DWORD dwPrevPos = m_Pos;
408                 CPDF_StreamParser::SyntaxType type = ParseNextElement();
409                 if (type == CPDF_StreamParser::EndOfData) {
410                     break;
411                 }
412                 if (type != CPDF_StreamParser::Keyword) {
413                     dwStreamSize += m_Pos - dwPrevPos;
414                     continue;
415                 }
416                 if (GetWordSize() == 2 && GetWordBuf()[0] == 'E' &&
417                         GetWordBuf()[1] == 'I') {
418                     m_Pos = dwPrevPos;
419                     break;
420                 }
421                 dwStreamSize += m_Pos - dwPrevPos;
422             }
423             m_Pos = dwSavePos;
424             pData = FX_Alloc(FX_BYTE, dwStreamSize);
425             FXSYS_memcpy32(pData, m_pBuf + m_Pos, dwStreamSize);
426             m_Pos += dwStreamSize;
427         }
428     }
429     pDict->SetAtInteger(FX_BSTRC("Length"), (int)dwStreamSize);
430     return CPDF_Stream::Create(pData, dwStreamSize, pDict);
431 }
432 #define MAX_WORD_BUFFER 256
433 #define MAX_STRING_LENGTH	32767
434 #define FXDWORD_TRUE FXDWORD_FROM_LSBFIRST(0x65757274)
435 #define FXDWORD_NULL FXDWORD_FROM_LSBFIRST(0x6c6c756e)
436 #define FXDWORD_FALS FXDWORD_FROM_LSBFIRST(0x736c6166)
ParseNextElement()437 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement()
438 {
439     if (m_pLastObj) {
440         m_pLastObj->Release();
441         m_pLastObj = NULL;
442     }
443     m_WordSize = 0;
444     FX_BOOL bIsNumber = TRUE;
445     if (m_Pos >= m_Size) {
446         return EndOfData;
447     }
448     int ch = m_pBuf[m_Pos++];
449     int type = PDF_CharType[ch];
450     while (1) {
451         while (type == 'W') {
452             if (m_Size <= m_Pos) {
453                 return EndOfData;
454             }
455             ch = m_pBuf[m_Pos++];
456             type = PDF_CharType[ch];
457         }
458         if (ch != '%') {
459             break;
460         }
461         while (1) {
462             if (m_Size <= m_Pos) {
463                 return EndOfData;
464             }
465             ch = m_pBuf[m_Pos++];
466             if (ch == '\r' || ch == '\n') {
467                 break;
468             }
469         }
470         type = PDF_CharType[ch];
471     }
472     if (type == 'D' && ch != '/') {
473         m_Pos --;
474         m_pLastObj = ReadNextObject();
475         return Others;
476     }
477     while (1) {
478         if (m_WordSize < MAX_WORD_BUFFER) {
479             m_WordBuffer[m_WordSize++] = ch;
480         }
481         if (type != 'N') {
482             bIsNumber = FALSE;
483         }
484         if (m_Size <= m_Pos) {
485             break;
486         }
487         ch = m_pBuf[m_Pos++];
488         type = PDF_CharType[ch];
489         if (type == 'D' || type == 'W') {
490             m_Pos --;
491             break;
492         }
493     }
494     m_WordBuffer[m_WordSize] = 0;
495     if (bIsNumber) {
496         return Number;
497     }
498     if (m_WordBuffer[0] == '/') {
499         return Name;
500     }
501     if (m_WordSize == 4) {
502         if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) {
503             m_pLastObj = CPDF_Boolean::Create(TRUE);
504             return Others;
505         }
506         if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) {
507             m_pLastObj = CPDF_Null::Create();
508             return Others;
509         }
510     } else if (m_WordSize == 5) {
511         if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') {
512             m_pLastObj = CPDF_Boolean::Create(FALSE);
513             return Others;
514         }
515     }
516     return Keyword;
517 }
SkipPathObject()518 void CPDF_StreamParser::SkipPathObject()
519 {
520     FX_DWORD command_startpos = m_Pos;
521     if (m_Pos >= m_Size) {
522         return;
523     }
524     int ch = m_pBuf[m_Pos++];
525     int type = PDF_CharType[ch];
526     while (1) {
527         while (type == 'W') {
528             if (m_Pos >= m_Size) {
529                 return;
530             }
531             ch = m_pBuf[m_Pos++];
532             type = PDF_CharType[ch];
533         }
534         if (type != 'N') {
535             m_Pos = command_startpos;
536             return;
537         }
538         while (1) {
539             while (type != 'W') {
540                 if (m_Pos >= m_Size) {
541                     return;
542                 }
543                 ch = m_pBuf[m_Pos++];
544                 type = PDF_CharType[ch];
545             }
546             while (type == 'W') {
547                 if (m_Pos >= m_Size) {
548                     return;
549                 }
550                 ch = m_pBuf[m_Pos++];
551                 type = PDF_CharType[ch];
552             }
553             if (type == 'N') {
554                 continue;
555             }
556             FX_DWORD op_startpos = m_Pos - 1;
557             while (type != 'W' && type != 'D') {
558                 if (m_Pos >= m_Size) {
559                     return;
560                 }
561                 ch = m_pBuf[m_Pos++];
562                 type = PDF_CharType[ch];
563             }
564             if (m_Pos - op_startpos == 2) {
565                 int op = m_pBuf[op_startpos];
566                 if (op == 'm' || op == 'l' || op == 'c' || op == 'v' || op == 'y') {
567                     command_startpos = m_Pos;
568                     break;
569                 }
570             } else if (m_Pos - op_startpos == 3) {
571                 if (m_pBuf[op_startpos] == 'r' && m_pBuf[op_startpos + 1] == 'e') {
572                     command_startpos = m_Pos;
573                     break;
574                 }
575             }
576             m_Pos = command_startpos;
577             return;
578         }
579     }
580 }
ReadNextObject(FX_BOOL bAllowNestedArray,FX_BOOL bInArray)581 CPDF_Object* CPDF_StreamParser::ReadNextObject(FX_BOOL bAllowNestedArray, FX_BOOL bInArray)
582 {
583     FX_BOOL bIsNumber;
584     GetNextWord(bIsNumber);
585     if (m_WordSize == 0) {
586         return NULL;
587     }
588     if (bIsNumber) {
589         m_WordBuffer[m_WordSize] = 0;
590         return CPDF_Number::Create(CFX_ByteStringC(m_WordBuffer, m_WordSize));
591     }
592     int first_char = m_WordBuffer[0];
593     if (first_char == '/') {
594         return CPDF_Name::Create(PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
595     }
596     if (first_char == '(') {
597         return CPDF_String::Create(ReadString());
598     }
599     if (first_char == '<') {
600         if (m_WordSize == 1) {
601             return CPDF_String::Create(ReadHexString(), TRUE);
602         }
603         CPDF_Dictionary* pDict = CPDF_Dictionary::Create();
604         while (1) {
605             GetNextWord(bIsNumber);
606             if (m_WordSize == 0) {
607                 pDict->Release();
608                 return NULL;
609             }
610             if (m_WordSize == 2 && m_WordBuffer[0] == '>') {
611                 break;
612             }
613             if (m_WordBuffer[0] != '/') {
614                 pDict->Release();
615                 return NULL;
616             }
617             CFX_ByteString key = PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1));
618             CPDF_Object* pObj = ReadNextObject(TRUE);
619             if (pObj == NULL) {
620                 if (pDict) {
621                     pDict->Release();
622                 }
623                 return NULL;
624             }
625             if (!key.IsEmpty()) {
626                 pDict->SetAt(key, pObj);
627             } else {
628                 pObj->Release();
629             }
630         }
631         return pDict;
632     }
633     if (first_char == '[') {
634         if (!bAllowNestedArray && bInArray) {
635             return NULL;
636         }
637         CPDF_Array* pArray = CPDF_Array::Create();
638         while (1) {
639             CPDF_Object* pObj = ReadNextObject(bAllowNestedArray, TRUE);
640             if (pObj == NULL) {
641                 if (m_WordSize == 0 || m_WordBuffer[0] == ']') {
642                     return pArray;
643                 }
644                 if (m_WordBuffer[0] == '[') {
645                     continue;
646                 }
647             } else {
648                 pArray->Add(pObj);
649             }
650         }
651     }
652     if (m_WordSize == 4) {
653         if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) {
654             return CPDF_Boolean::Create(TRUE);
655         }
656         if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) {
657             return CPDF_Null::Create();
658         }
659     } else if (m_WordSize == 5) {
660         if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') {
661             return CPDF_Boolean::Create(FALSE);
662         }
663     }
664     return NULL;
665 }
GetNextWord(FX_BOOL & bIsNumber)666 void CPDF_StreamParser::GetNextWord(FX_BOOL& bIsNumber)
667 {
668     m_WordSize = 0;
669     bIsNumber = TRUE;
670     if (m_Size <= m_Pos) {
671         return;
672     }
673     int ch = m_pBuf[m_Pos++];
674     int type = PDF_CharType[ch];
675     while (1) {
676         while (type == 'W') {
677             if (m_Size <= m_Pos) {
678                 return;
679             }
680             ch = m_pBuf[m_Pos++];
681             type = PDF_CharType[ch];
682         }
683         if (ch != '%') {
684             break;
685         }
686         while (1) {
687             if (m_Size <= m_Pos) {
688                 return;
689             }
690             ch = m_pBuf[m_Pos++];
691             if (ch == '\r' || ch == '\n') {
692                 break;
693             }
694         }
695         type = PDF_CharType[ch];
696     }
697     if (type == 'D') {
698         bIsNumber = FALSE;
699         m_WordBuffer[m_WordSize++] = ch;
700         if (ch == '/') {
701             while (1) {
702                 if (m_Size <= m_Pos) {
703                     return;
704                 }
705                 ch = m_pBuf[m_Pos++];
706                 type = PDF_CharType[ch];
707                 if (type != 'R' && type != 'N') {
708                     m_Pos --;
709                     return;
710                 }
711                 if (m_WordSize < MAX_WORD_BUFFER) {
712                     m_WordBuffer[m_WordSize++] = ch;
713                 }
714             }
715         } else if (ch == '<') {
716             if (m_Size <= m_Pos) {
717                 return;
718             }
719             ch = m_pBuf[m_Pos++];
720             if (ch == '<') {
721                 m_WordBuffer[m_WordSize++] = ch;
722             } else {
723                 m_Pos --;
724             }
725         } else if (ch == '>') {
726             if (m_Size <= m_Pos) {
727                 return;
728             }
729             ch = m_pBuf[m_Pos++];
730             if (ch == '>') {
731                 m_WordBuffer[m_WordSize++] = ch;
732             } else {
733                 m_Pos --;
734             }
735         }
736         return;
737     }
738     while (1) {
739         if (m_WordSize < MAX_WORD_BUFFER) {
740             m_WordBuffer[m_WordSize++] = ch;
741         }
742         if (type != 'N') {
743             bIsNumber = FALSE;
744         }
745         if (m_Size <= m_Pos) {
746             return;
747         }
748         ch = m_pBuf[m_Pos++];
749         type = PDF_CharType[ch];
750         if (type == 'D' || type == 'W') {
751             m_Pos --;
752             break;
753         }
754     }
755 }
ReadString()756 CFX_ByteString CPDF_StreamParser::ReadString()
757 {
758     if (m_Size <= m_Pos) {
759         return CFX_ByteString();
760     }
761     int ch = m_pBuf[m_Pos++];
762     CFX_ByteTextBuf buf;
763     int parlevel = 0;
764     int status = 0, iEscCode = 0;
765     while (1) {
766         switch (status) {
767             case 0:
768                 if (ch == ')') {
769                     if (parlevel == 0) {
770                         if (buf.GetLength() > MAX_STRING_LENGTH) {
771                             return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
772                         }
773                         return buf.GetByteString();
774                     }
775                     parlevel --;
776                     buf.AppendChar(')');
777                 } else if (ch == '(') {
778                     parlevel ++;
779                     buf.AppendChar('(');
780                 } else if (ch == '\\') {
781                     status = 1;
782                 } else {
783                     buf.AppendChar((char)ch);
784                 }
785                 break;
786             case 1:
787                 if (ch >= '0' && ch <= '7') {
788                     iEscCode = ch - '0';
789                     status = 2;
790                     break;
791                 }
792                 if (ch == 'n') {
793                     buf.AppendChar('\n');
794                 } else if (ch == 'r') {
795                     buf.AppendChar('\r');
796                 } else if (ch == 't') {
797                     buf.AppendChar('\t');
798                 } else if (ch == 'b') {
799                     buf.AppendChar('\b');
800                 } else if (ch == 'f') {
801                     buf.AppendChar('\f');
802                 } else if (ch == '\r') {
803                     status = 4;
804                     break;
805                 } else if (ch == '\n') {
806                 } else {
807                     buf.AppendChar(ch);
808                 }
809                 status = 0;
810                 break;
811             case 2:
812                 if (ch >= '0' && ch <= '7') {
813                     iEscCode = iEscCode * 8 + ch - '0';
814                     status = 3;
815                 } else {
816                     buf.AppendChar(iEscCode);
817                     status = 0;
818                     continue;
819                 }
820                 break;
821             case 3:
822                 if (ch >= '0' && ch <= '7') {
823                     iEscCode = iEscCode * 8 + ch - '0';
824                     buf.AppendChar(iEscCode);
825                     status = 0;
826                 } else {
827                     buf.AppendChar(iEscCode);
828                     status = 0;
829                     continue;
830                 }
831                 break;
832             case 4:
833                 status = 0;
834                 if (ch != '\n') {
835                     continue;
836                 }
837                 break;
838         }
839         if (m_Size <= m_Pos) {
840             break;
841         }
842         ch = m_pBuf[m_Pos++];
843     }
844     if (m_Size > m_Pos) {
845         ch = m_pBuf[m_Pos++];
846     }
847     if (buf.GetLength() > MAX_STRING_LENGTH) {
848         return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
849     }
850     return buf.GetByteString();
851 }
ReadHexString()852 CFX_ByteString CPDF_StreamParser::ReadHexString()
853 {
854     if (m_Size <= m_Pos) {
855         return CFX_ByteString();
856     }
857     int ch = m_pBuf[m_Pos++];
858     CFX_ByteTextBuf buf;
859     FX_BOOL bFirst = TRUE;
860     int code = 0;
861     while (1) {
862         if (ch == '>') {
863             break;
864         }
865         if (ch >= '0' && ch <= '9') {
866             if (bFirst) {
867                 code = (ch - '0') * 16;
868             } else {
869                 code += ch - '0';
870                 buf.AppendChar((char)code);
871             }
872             bFirst = !bFirst;
873         } else if (ch >= 'A' && ch <= 'F') {
874             if (bFirst) {
875                 code = (ch - 'A' + 10) * 16;
876             } else {
877                 code += ch - 'A' + 10;
878                 buf.AppendChar((char)code);
879             }
880             bFirst = !bFirst;
881         } else if (ch >= 'a' && ch <= 'f') {
882             if (bFirst) {
883                 code = (ch - 'a' + 10) * 16;
884             } else {
885                 code += ch - 'a' + 10;
886                 buf.AppendChar((char)code);
887             }
888             bFirst = !bFirst;
889         }
890         if (m_Size <= m_Pos) {
891             break;
892         }
893         ch = m_pBuf[m_Pos++];
894     }
895     if (!bFirst) {
896         buf.AppendChar((char)code);
897     }
898     if (buf.GetLength() > MAX_STRING_LENGTH) {
899         return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
900     }
901     return buf.GetByteString();
902 }
903 #define PAGEPARSE_STAGE_GETCONTENT		1
904 #define PAGEPARSE_STAGE_PARSE			2
905 #define PAGEPARSE_STAGE_CHECKCLIP		3
CPDF_ContentParser()906 CPDF_ContentParser::CPDF_ContentParser()
907 {
908     m_pParser = NULL;
909     m_pStreamArray = NULL;
910     m_pSingleStream = NULL;
911     m_pData = NULL;
912     m_Status = Ready;
913     m_pType3Char = NULL;
914 }
~CPDF_ContentParser()915 CPDF_ContentParser::~CPDF_ContentParser()
916 {
917     Clear();
918 }
Clear()919 void CPDF_ContentParser::Clear()
920 {
921     if (m_pParser) {
922         delete m_pParser;
923     }
924     if (m_pSingleStream) {
925         delete m_pSingleStream;
926     }
927     if (m_pStreamArray) {
928         for (FX_DWORD i = 0; i < m_nStreams; i ++)
929             if (m_pStreamArray[i]) {
930                 delete m_pStreamArray[i];
931             }
932         FX_Free(m_pStreamArray);
933     }
934     if (m_pData && m_pSingleStream == NULL) {
935         FX_Free((void*)m_pData);
936     }
937     m_pParser = NULL;
938     m_pStreamArray = NULL;
939     m_pSingleStream = NULL;
940     m_pData = NULL;
941     m_Status = Ready;
942 }
Start(CPDF_Page * pPage,CPDF_ParseOptions * pOptions)943 void CPDF_ContentParser::Start(CPDF_Page* pPage, CPDF_ParseOptions* pOptions)
944 {
945     if (m_Status != Ready || pPage == NULL || pPage->m_pDocument == NULL || pPage->m_pFormDict == NULL) {
946         m_Status = Done;
947         return;
948     }
949     m_pObjects = pPage;
950     m_bForm = FALSE;
951     if (pOptions) {
952         m_Options = *pOptions;
953     }
954     m_Status = ToBeContinued;
955     m_InternalStage = PAGEPARSE_STAGE_GETCONTENT;
956     m_CurrentOffset = 0;
957     CPDF_Object* pContent = pPage->m_pFormDict->GetElementValue(FX_BSTRC("Contents"));
958     if (pContent == NULL) {
959         m_Status = Done;
960         return;
961     }
962     if (pContent->GetType() == PDFOBJ_STREAM) {
963         m_nStreams = 0;
964         m_pSingleStream = new CPDF_StreamAcc;
965         m_pSingleStream->LoadAllData((CPDF_Stream*)pContent, FALSE);
966     } else if (pContent->GetType() == PDFOBJ_ARRAY) {
967         CPDF_Array* pArray = (CPDF_Array*)pContent;
968         m_nStreams = pArray->GetCount();
969         if (m_nStreams == 0) {
970             m_Status = Done;
971             return;
972         }
973         m_pStreamArray = FX_Alloc(CPDF_StreamAcc*, m_nStreams);
974     } else {
975         m_Status = Done;
976         return;
977     }
978 }
Start(CPDF_Form * pForm,CPDF_AllStates * pGraphicStates,CFX_AffineMatrix * pParentMatrix,CPDF_Type3Char * pType3Char,CPDF_ParseOptions * pOptions,int level)979 void CPDF_ContentParser::Start(CPDF_Form* pForm, CPDF_AllStates* pGraphicStates,
980                                CFX_AffineMatrix* pParentMatrix, CPDF_Type3Char* pType3Char, CPDF_ParseOptions* pOptions, int level)
981 {
982     m_pType3Char = pType3Char;
983     m_pObjects = pForm;
984     m_bForm = TRUE;
985     CFX_AffineMatrix form_matrix = pForm->m_pFormDict->GetMatrix(FX_BSTRC("Matrix"));
986     if (pGraphicStates) {
987         form_matrix.Concat(pGraphicStates->m_CTM);
988     }
989     CPDF_Array* pBBox = pForm->m_pFormDict->GetArray(FX_BSTRC("BBox"));
990     CFX_FloatRect form_bbox;
991     CPDF_Path ClipPath;
992     if (pBBox) {
993         form_bbox = pBBox->GetRect();
994         ClipPath.New();
995         ClipPath.AppendRect(form_bbox.left, form_bbox.bottom, form_bbox.right, form_bbox.top);
996         ClipPath.Transform(&form_matrix);
997         if (pParentMatrix) {
998             ClipPath.Transform(pParentMatrix);
999         }
1000         form_bbox.Transform(&form_matrix);
1001         if (pParentMatrix) {
1002             form_bbox.Transform(pParentMatrix);
1003         }
1004     }
1005     CPDF_Dictionary* pResources = pForm->m_pFormDict->GetDict(FX_BSTRC("Resources"));
1006     m_pParser = new CPDF_StreamContentParser;
1007     m_pParser->Initialize();
1008     m_pParser->PrepareParse(pForm->m_pDocument, pForm->m_pPageResources, pForm->m_pResources, pParentMatrix, pForm,
1009                             pResources, &form_bbox, pOptions, pGraphicStates, level);
1010     m_pParser->m_pCurStates->m_CTM = form_matrix;
1011     m_pParser->m_pCurStates->m_ParentMatrix = form_matrix;
1012     if (ClipPath.NotNull()) {
1013         m_pParser->m_pCurStates->m_ClipPath.AppendPath(ClipPath, FXFILL_WINDING, TRUE);
1014     }
1015     if (pForm->m_Transparency & PDFTRANS_GROUP) {
1016         CPDF_GeneralStateData* pData = m_pParser->m_pCurStates->m_GeneralState.GetModify();
1017         pData->m_BlendType = FXDIB_BLEND_NORMAL;
1018         pData->m_StrokeAlpha = 1.0f;
1019         pData->m_FillAlpha = 1.0f;
1020         pData->m_pSoftMask = NULL;
1021     }
1022     m_nStreams = 0;
1023     m_pSingleStream = new CPDF_StreamAcc;
1024     if (pForm->m_pDocument) {
1025         m_pSingleStream->LoadAllData(pForm->m_pFormStream, FALSE);
1026     } else {
1027         m_pSingleStream->LoadAllData(pForm->m_pFormStream, FALSE);
1028     }
1029     m_pData = (FX_LPBYTE)m_pSingleStream->GetData();
1030     m_Size = m_pSingleStream->GetSize();
1031     m_Status = ToBeContinued;
1032     m_InternalStage = PAGEPARSE_STAGE_PARSE;
1033     m_CurrentOffset = 0;
1034 }
Continue(IFX_Pause * pPause)1035 void CPDF_ContentParser::Continue(IFX_Pause* pPause)
1036 {
1037     int steps = 0;
1038     while (m_Status == ToBeContinued) {
1039         if (m_InternalStage == PAGEPARSE_STAGE_GETCONTENT) {
1040             if (m_CurrentOffset == m_nStreams) {
1041                 if (m_pStreamArray) {
1042                     m_Size = 0;
1043                     FX_DWORD i;
1044                     for (i = 0; i < m_nStreams; i ++) {
1045                         FX_DWORD size = m_pStreamArray[i]->GetSize();
1046                         if (m_Size + size + 1 <= m_Size) {
1047                             m_Status = Done;
1048                             return;
1049                         }
1050                         m_Size += size + 1;
1051                     }
1052                     m_pData = FX_Alloc(FX_BYTE, m_Size);
1053                     FX_DWORD pos = 0;
1054                     for (i = 0; i < m_nStreams; i ++) {
1055                         FXSYS_memcpy32(m_pData + pos, m_pStreamArray[i]->GetData(), m_pStreamArray[i]->GetSize());
1056                         pos += m_pStreamArray[i]->GetSize() + 1;
1057                         m_pData[pos - 1] = ' ';
1058                         delete m_pStreamArray[i];
1059                     }
1060                     FX_Free(m_pStreamArray);
1061                     m_pStreamArray = NULL;
1062                 } else {
1063                     m_pData = (FX_LPBYTE)m_pSingleStream->GetData();
1064                     m_Size = m_pSingleStream->GetSize();
1065                 }
1066                 m_InternalStage = PAGEPARSE_STAGE_PARSE;
1067                 m_CurrentOffset = 0;
1068             } else {
1069                 CPDF_Array* pContent = m_pObjects->m_pFormDict->GetArray(FX_BSTRC("Contents"));
1070                 m_pStreamArray[m_CurrentOffset] = new CPDF_StreamAcc;
1071                 CPDF_Stream* pStreamObj = (CPDF_Stream*)(pContent ? pContent->GetElementValue(m_CurrentOffset) : NULL);
1072                 m_pStreamArray[m_CurrentOffset]->LoadAllData(pStreamObj, FALSE);
1073                 m_CurrentOffset ++;
1074             }
1075         }
1076         if (m_InternalStage == PAGEPARSE_STAGE_PARSE) {
1077             if (m_pParser == NULL) {
1078                 m_pParser = new CPDF_StreamContentParser;
1079                 m_pParser->Initialize();
1080                 m_pParser->PrepareParse(m_pObjects->m_pDocument, m_pObjects->m_pPageResources, NULL, NULL, m_pObjects,
1081                                         m_pObjects->m_pResources, &m_pObjects->m_BBox, &m_Options, NULL, 0);
1082                 m_pParser->m_pCurStates->m_ColorState.GetModify()->Default();
1083             }
1084             if (m_CurrentOffset >= m_Size) {
1085                 m_InternalStage = PAGEPARSE_STAGE_CHECKCLIP;
1086             } else {
1087                 m_CurrentOffset += m_pParser->Parse(m_pData + m_CurrentOffset, m_Size - m_CurrentOffset, PARSE_STEP_LIMIT);
1088                 if (m_pParser->m_bAbort) {
1089                     m_InternalStage = PAGEPARSE_STAGE_CHECKCLIP;
1090                     continue;
1091                 }
1092             }
1093         }
1094         if (m_InternalStage == PAGEPARSE_STAGE_CHECKCLIP) {
1095             if (m_pType3Char) {
1096                 m_pType3Char->m_bColored = m_pParser->m_bColored;
1097                 m_pType3Char->m_Width = FXSYS_round(m_pParser->m_Type3Data[0] * 1000);
1098                 m_pType3Char->m_BBox.left = FXSYS_round(m_pParser->m_Type3Data[2] * 1000);
1099                 m_pType3Char->m_BBox.bottom = FXSYS_round(m_pParser->m_Type3Data[3] * 1000);
1100                 m_pType3Char->m_BBox.right = FXSYS_round(m_pParser->m_Type3Data[4] * 1000);
1101                 m_pType3Char->m_BBox.top = FXSYS_round(m_pParser->m_Type3Data[5] * 1000);
1102             }
1103             FX_POSITION pos = m_pObjects->m_ObjectList.GetHeadPosition();
1104             while (pos) {
1105                 CPDF_PageObject* pObj = (CPDF_PageObject*)m_pObjects->m_ObjectList.GetNext(pos);
1106                 if (pObj->m_ClipPath.IsNull()) {
1107                     continue;
1108                 }
1109                 if (pObj->m_ClipPath.GetPathCount() != 1) {
1110                     continue;
1111                 }
1112                 if (pObj->m_ClipPath.GetTextCount()) {
1113                     continue;
1114                 }
1115                 CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0);
1116                 if (!ClipPath.IsRect() || pObj->m_Type == PDFPAGE_SHADING) {
1117                     continue;
1118                 }
1119                 CFX_FloatRect old_rect(ClipPath.GetPointX(0), ClipPath.GetPointY(0),
1120                                        ClipPath.GetPointX(2), ClipPath.GetPointY(2));
1121                 CFX_FloatRect obj_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, pObj->m_Top);
1122                 if (old_rect.Contains(obj_rect)) {
1123                     pObj->m_ClipPath.SetNull();
1124                 }
1125             }
1126             m_Status = Done;
1127             return;
1128         }
1129         steps ++;
1130         if (pPause && pPause->NeedToPauseNow()) {
1131             break;
1132         }
1133     }
1134 }
EstimateProgress()1135 int CPDF_ContentParser::EstimateProgress()
1136 {
1137     if (m_Status == Ready) {
1138         return 0;
1139     }
1140     if (m_Status == Done) {
1141         return 100;
1142     }
1143     if (m_InternalStage == PAGEPARSE_STAGE_GETCONTENT) {
1144         return 10;
1145     }
1146     if (m_InternalStage == PAGEPARSE_STAGE_CHECKCLIP) {
1147         return 90;
1148     }
1149     return 10 + 80 * m_CurrentOffset / m_Size;
1150 }
1151