1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "../../../include/fpdfapi/fpdf_page.h"
8 #include "../../../include/fpdfapi/fpdf_module.h"
9 #include "../../../include/fxcodec/fx_codec.h"
10 #include "pageint.h"
11 #include <limits.h>
12 extern const FX_LPCSTR _PDF_OpCharType =
13 "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
14 "IIVIIIIVIIVIIIIIVVIIIIIIIIIIIIII"
15 "IIVVVVVVIVVVVVVIVVVVVIIVVIIIIIII"
16 "IIVVVVVVVVVVVVVVIVVVIIVVIVVIIIII"
17 "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
18 "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
19 "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"
20 "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII";
_PDF_HasInvalidOpChar(FX_LPCSTR op)21 FX_BOOL _PDF_HasInvalidOpChar(FX_LPCSTR op)
22 {
23 if(!op) {
24 return FALSE;
25 }
26 FX_BYTE ch;
27 while((ch = *op++)) {
28 if(_PDF_OpCharType[ch] == 'I') {
29 return TRUE;
30 }
31 }
32 return FALSE;
33 }
34 class CPDF_StreamParserAutoClearer {
35 public:
CPDF_StreamParserAutoClearer(CPDF_StreamParser ** scoped_variable,CPDF_StreamParser * new_parser)36 CPDF_StreamParserAutoClearer(CPDF_StreamParser** scoped_variable, CPDF_StreamParser* new_parser)
37 : scoped_variable_(scoped_variable) {
38 *scoped_variable_ = new_parser;
39 }
~CPDF_StreamParserAutoClearer()40 ~CPDF_StreamParserAutoClearer() { *scoped_variable_ = NULL; }
41 private:
42 CPDF_StreamParser** scoped_variable_;
43 };
Parse(FX_LPCBYTE pData,FX_DWORD dwSize,FX_DWORD max_cost)44 FX_DWORD CPDF_StreamContentParser::Parse(FX_LPCBYTE pData, FX_DWORD dwSize, FX_DWORD max_cost)
45 {
46 if (m_Level > _FPDF_MAX_FORM_LEVEL_) {
47 return dwSize;
48 }
49 FX_DWORD InitObjCount = m_pObjectList->CountObjects();
50 CPDF_StreamParser syntax(pData, dwSize);
51 CPDF_StreamParserAutoClearer auto_clearer(&m_pSyntax, &syntax);
52 m_CompatCount = 0;
53 while (1) {
54 FX_DWORD cost = m_pObjectList->CountObjects() - InitObjCount;
55 if (max_cost && cost >= max_cost) {
56 break;
57 }
58 switch (syntax.ParseNextElement()) {
59 case CPDF_StreamParser::EndOfData:
60 return m_pSyntax->GetPos();
61 case CPDF_StreamParser::Keyword:
62 if(!OnOperator((char*)syntax.GetWordBuf()) && _PDF_HasInvalidOpChar((char*)syntax.GetWordBuf())) {
63 m_bAbort = TRUE;
64 }
65 if (m_bAbort) {
66 return m_pSyntax->GetPos();
67 }
68 ClearAllParams();
69 break;
70 case CPDF_StreamParser::Number:
71 AddNumberParam((char*)syntax.GetWordBuf(), syntax.GetWordSize());
72 break;
73 case CPDF_StreamParser::Name:
74 AddNameParam((FX_LPCSTR)syntax.GetWordBuf() + 1, syntax.GetWordSize() - 1);
75 break;
76 default:
77 AddObjectParam(syntax.GetObject());
78 }
79 }
80 return m_pSyntax->GetPos();
81 }
82 void _PDF_ReplaceAbbr(CPDF_Object* pObj);
Handle_BeginImage()83 void CPDF_StreamContentParser::Handle_BeginImage()
84 {
85 FX_FILESIZE savePos = m_pSyntax->GetPos();
86 CPDF_Dictionary* pDict = CPDF_Dictionary::Create();
87 while (1) {
88 CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
89 if (type == CPDF_StreamParser::Keyword) {
90 CFX_ByteString bsKeyword(m_pSyntax->GetWordBuf(), m_pSyntax->GetWordSize());
91 if (bsKeyword != FX_BSTRC("ID")) {
92 m_pSyntax->SetPos(savePos);
93 pDict->Release();
94 return;
95 }
96 }
97 if (type != CPDF_StreamParser::Name) {
98 break;
99 }
100 CFX_ByteString key((FX_LPCSTR)m_pSyntax->GetWordBuf() + 1, m_pSyntax->GetWordSize() - 1);
101 CPDF_Object* pObj = m_pSyntax->ReadNextObject();
102 if (!key.IsEmpty()) {
103 pDict->SetAt(key, pObj, m_pDocument);
104 } else if (pObj) {
105 pObj->Release();
106 }
107 }
108 _PDF_ReplaceAbbr(pDict);
109 CPDF_Object* pCSObj = NULL;
110 if (pDict->KeyExist(FX_BSTRC("ColorSpace"))) {
111 pCSObj = pDict->GetElementValue(FX_BSTRC("ColorSpace"));
112 if (pCSObj->GetType() == PDFOBJ_NAME) {
113 CFX_ByteString name = pCSObj->GetString();
114 if (name != FX_BSTRC("DeviceRGB") && name != FX_BSTRC("DeviceGray") && name != FX_BSTRC("DeviceCMYK")) {
115 pCSObj = FindResourceObj(FX_BSTRC("ColorSpace"), name);
116 if (pCSObj && !pCSObj->GetObjNum()) {
117 pCSObj = pCSObj->Clone();
118 pDict->SetAt(FX_BSTRC("ColorSpace"), pCSObj, m_pDocument);
119 }
120 }
121 }
122 }
123 CPDF_Stream* pStream = m_pSyntax->ReadInlineStream(m_pDocument, pDict, pCSObj, m_Options.m_bDecodeInlineImage);
124 while (1) {
125 CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
126 if (type == CPDF_StreamParser::EndOfData) {
127 break;
128 }
129 if (type != CPDF_StreamParser::Keyword) {
130 continue;
131 }
132 if (m_pSyntax->GetWordSize() == 2 && m_pSyntax->GetWordBuf()[0] == 'E' &&
133 m_pSyntax->GetWordBuf()[1] == 'I') {
134 break;
135 }
136 }
137 if (m_Options.m_bTextOnly) {
138 if (pStream) {
139 pStream->Release();
140 } else {
141 pDict->Release();
142 }
143 return;
144 }
145 pDict->SetAtName(FX_BSTRC("Subtype"), FX_BSTRC("Image"));
146 CPDF_ImageObject *pImgObj = AddImage(pStream, NULL, TRUE);
147 if (!pImgObj) {
148 if (pStream) {
149 pStream->Release();
150 } else {
151 pDict->Release();
152 }
153 }
154 }
ParsePathObject()155 void CPDF_StreamContentParser::ParsePathObject()
156 {
157 FX_FLOAT params[6] = {0};
158 int nParams = 0;
159 int last_pos = m_pSyntax->GetPos();
160 while (1) {
161 CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
162 FX_BOOL bProcessed = TRUE;
163 switch (type) {
164 case CPDF_StreamParser::EndOfData:
165 return;
166 case CPDF_StreamParser::Keyword: {
167 int len = m_pSyntax->GetWordSize();
168 if (len == 1) {
169 switch (m_pSyntax->GetWordBuf()[0]) {
170 case 'm':
171 AddPathPoint(params[0], params[1], FXPT_MOVETO);
172 nParams = 0;
173 break;
174 case 'l':
175 AddPathPoint(params[0], params[1], FXPT_LINETO);
176 nParams = 0;
177 break;
178 case 'c':
179 AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
180 AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
181 AddPathPoint(params[4], params[5], FXPT_BEZIERTO);
182 nParams = 0;
183 break;
184 case 'v':
185 AddPathPoint(m_PathCurrentX, m_PathCurrentY, FXPT_BEZIERTO);
186 AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
187 AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
188 nParams = 0;
189 break;
190 case 'y':
191 AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
192 AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
193 AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
194 nParams = 0;
195 break;
196 case 'h':
197 Handle_ClosePath();
198 nParams = 0;
199 break;
200 default:
201 bProcessed = FALSE;
202 break;
203 }
204 } else if (len == 2) {
205 if (m_pSyntax->GetWordBuf()[0] == 'r' && m_pSyntax->GetWordBuf()[1] == 'e') {
206 AddPathRect(params[0], params[1], params[2], params[3]);
207 nParams = 0;
208 } else {
209 bProcessed = FALSE;
210 }
211 } else {
212 bProcessed = FALSE;
213 }
214 if (bProcessed) {
215 last_pos = m_pSyntax->GetPos();
216 }
217 break;
218 }
219 case CPDF_StreamParser::Number: {
220 if (nParams == 6) {
221 break;
222 }
223 FX_BOOL bInteger;
224 int value;
225 FX_atonum(CFX_ByteStringC(m_pSyntax->GetWordBuf(), m_pSyntax->GetWordSize()), bInteger, &value);
226 params[nParams++] = bInteger ? (FX_FLOAT)value : *(FX_FLOAT*)&value;
227 break;
228 }
229 default:
230 bProcessed = FALSE;
231 }
232 if (!bProcessed) {
233 m_pSyntax->SetPos(last_pos);
234 return;
235 }
236 }
237 }
CPDF_StreamParser(const FX_BYTE * pData,FX_DWORD dwSize)238 CPDF_StreamParser::CPDF_StreamParser(const FX_BYTE* pData, FX_DWORD dwSize)
239 {
240 m_pBuf = pData;
241 m_Size = dwSize;
242 m_Pos = 0;
243 m_pLastObj = NULL;
244 }
~CPDF_StreamParser()245 CPDF_StreamParser::~CPDF_StreamParser()
246 {
247 if (m_pLastObj) {
248 m_pLastObj->Release();
249 }
250 }
_DecodeAllScanlines(ICodec_ScanlineDecoder * pDecoder,FX_LPBYTE & dest_buf,FX_DWORD & dest_size)251 FX_DWORD _DecodeAllScanlines(ICodec_ScanlineDecoder* pDecoder, FX_LPBYTE& dest_buf, FX_DWORD& dest_size)
252 {
253 if (pDecoder == NULL) {
254 return (FX_DWORD) - 1;
255 }
256 int ncomps = pDecoder->CountComps();
257 int bpc = pDecoder->GetBPC();
258 int width = pDecoder->GetWidth();
259 int height = pDecoder->GetHeight();
260 int pitch = (width * ncomps * bpc + 7) / 8;
261 if (height == 0 || pitch > (1 << 30) / height) {
262 delete pDecoder;
263 return -1;
264 }
265 dest_buf = FX_Alloc2D(FX_BYTE, pitch, height);
266 dest_size = pitch * height; // Safe since checked alloc returned.
267 for (int row = 0; row < height; row ++) {
268 FX_LPBYTE pLine = pDecoder->GetScanline(row);
269 if (pLine == NULL) {
270 break;
271 }
272 FXSYS_memcpy32(dest_buf + row * pitch, pLine, pitch);
273 }
274 FX_DWORD srcoff = pDecoder->GetSrcOffset();
275 delete pDecoder;
276 return srcoff;
277 }
278 ICodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder(FX_LPCBYTE src_buf, FX_DWORD src_size, int width, int height,
279 const CPDF_Dictionary* pParams);
280 FX_DWORD _A85Decode(const FX_BYTE* src_buf, FX_DWORD src_size, FX_LPBYTE& dest_buf, FX_DWORD& dest_size);
281 FX_DWORD _HexDecode(const FX_BYTE* src_buf, FX_DWORD src_size, FX_LPBYTE& dest_buf, FX_DWORD& dest_size);
282 FX_DWORD FPDFAPI_FlateOrLZWDecode(FX_BOOL bLZW, const FX_BYTE* src_buf, FX_DWORD src_size, CPDF_Dictionary* pParams,
283 FX_DWORD estimated_size, FX_LPBYTE& dest_buf, FX_DWORD& dest_size);
PDF_DecodeInlineStream(const FX_BYTE * src_buf,FX_DWORD limit,int width,int height,CFX_ByteString & decoder,CPDF_Dictionary * pParam,FX_LPBYTE & dest_buf,FX_DWORD & dest_size)284 FX_DWORD PDF_DecodeInlineStream(const FX_BYTE* src_buf, FX_DWORD limit,
285 int width, int height, CFX_ByteString& decoder,
286 CPDF_Dictionary* pParam, FX_LPBYTE& dest_buf, FX_DWORD& dest_size)
287 {
288 if (decoder == FX_BSTRC("CCITTFaxDecode") || decoder == FX_BSTRC("CCF")) {
289 ICodec_ScanlineDecoder* pDecoder = FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam);
290 return _DecodeAllScanlines(pDecoder, dest_buf, dest_size);
291 } else if (decoder == FX_BSTRC("ASCII85Decode") || decoder == FX_BSTRC("A85")) {
292 return _A85Decode(src_buf, limit, dest_buf, dest_size);
293 } else if (decoder == FX_BSTRC("ASCIIHexDecode") || decoder == FX_BSTRC("AHx")) {
294 return _HexDecode(src_buf, limit, dest_buf, dest_size);
295 } else if (decoder == FX_BSTRC("FlateDecode") || decoder == FX_BSTRC("Fl")) {
296 return FPDFAPI_FlateOrLZWDecode(FALSE, src_buf, limit, pParam, dest_size, dest_buf, dest_size);
297 } else if (decoder == FX_BSTRC("LZWDecode") || decoder == FX_BSTRC("LZW")) {
298 return FPDFAPI_FlateOrLZWDecode(TRUE, src_buf, limit, pParam, 0, dest_buf, dest_size);
299 } else if (decoder == FX_BSTRC("DCTDecode") || decoder == FX_BSTRC("DCT")) {
300 ICodec_ScanlineDecoder* pDecoder = CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder(
301 src_buf, limit, width, height, 0, pParam ? pParam->GetInteger(FX_BSTRC("ColorTransform"), 1) : 1);
302 return _DecodeAllScanlines(pDecoder, dest_buf, dest_size);
303 } else if (decoder == FX_BSTRC("RunLengthDecode") || decoder == FX_BSTRC("RL")) {
304 return RunLengthDecode(src_buf, limit, dest_buf, dest_size);
305 }
306 dest_size = 0;
307 dest_buf = 0;
308 return (FX_DWORD) - 1;
309 }
ReadInlineStream(CPDF_Document * pDoc,CPDF_Dictionary * pDict,CPDF_Object * pCSObj,FX_BOOL bDecode)310 CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc, CPDF_Dictionary* pDict, CPDF_Object* pCSObj, FX_BOOL bDecode)
311 {
312 if (m_Pos == m_Size) {
313 return NULL;
314 }
315 if (PDF_CharType[m_pBuf[m_Pos]] == 'W') {
316 m_Pos ++;
317 }
318 CFX_ByteString Decoder;
319 CPDF_Dictionary* pParam = NULL;
320 CPDF_Object* pFilter = pDict->GetElementValue(FX_BSTRC("Filter"));
321 if (pFilter == NULL) {
322 } else if (pFilter->GetType() == PDFOBJ_ARRAY) {
323 Decoder = ((CPDF_Array*)pFilter)->GetString(0);
324 CPDF_Array* pParams = pDict->GetArray(FX_BSTRC("DecodeParms"));
325 if (pParams) {
326 pParam = pParams->GetDict(0);
327 }
328 } else {
329 Decoder = pFilter->GetString();
330 pParam = pDict->GetDict(FX_BSTRC("DecodeParms"));
331 }
332 FX_DWORD width = pDict->GetInteger(FX_BSTRC("Width"));
333 FX_DWORD height = pDict->GetInteger(FX_BSTRC("Height"));
334 FX_DWORD OrigSize = 0;
335 if (pCSObj != NULL) {
336 FX_DWORD bpc = pDict->GetInteger(FX_BSTRC("BitsPerComponent"));
337 FX_DWORD nComponents = 1;
338 CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj);
339 if (pCS == NULL) {
340 nComponents = 3;
341 } else {
342 nComponents = pCS->CountComponents();
343 pDoc->GetPageData()->ReleaseColorSpace(pCSObj);
344 }
345 FX_DWORD pitch = width;
346 if (bpc && pitch > INT_MAX / bpc) {
347 return NULL;
348 }
349 pitch *= bpc;
350 if (nComponents && pitch > INT_MAX / nComponents) {
351 return NULL;
352 }
353 pitch *= nComponents;
354 if (pitch > INT_MAX - 7) {
355 return NULL;
356 }
357 pitch += 7;
358 pitch /= 8;
359 OrigSize = pitch;
360 } else {
361 if (width > INT_MAX - 7) {
362 return NULL;
363 }
364 OrigSize = ((width + 7) / 8);
365 }
366 if (height && OrigSize > INT_MAX / height) {
367 return NULL;
368 }
369 OrigSize *= height;
370 FX_LPBYTE pData = NULL;
371 FX_DWORD dwStreamSize;
372 if (Decoder.IsEmpty()) {
373 if (OrigSize > m_Size - m_Pos) {
374 OrigSize = m_Size - m_Pos;
375 }
376 pData = FX_Alloc(FX_BYTE, OrigSize);
377 FXSYS_memcpy32(pData, m_pBuf + m_Pos, OrigSize);
378 dwStreamSize = OrigSize;
379 m_Pos += OrigSize;
380 } else {
381 FX_DWORD dwDestSize = OrigSize;
382 dwStreamSize = PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height, Decoder, pParam,
383 pData, dwDestSize);
384 if ((int)dwStreamSize < 0) {
385 return NULL;
386 }
387 if (bDecode) {
388 m_Pos += dwStreamSize;
389 dwStreamSize = dwDestSize;
390 if (pFilter->GetType() == PDFOBJ_ARRAY) {
391 ((CPDF_Array*)pFilter)->RemoveAt(0);
392 CPDF_Array* pParams = pDict->GetArray(FX_BSTRC("DecodeParms"));
393 if (pParams) {
394 pParams->RemoveAt(0);
395 }
396 } else {
397 pDict->RemoveAt(FX_BSTRC("Filter"));
398 pDict->RemoveAt(FX_BSTRC("DecodeParms"));
399 }
400 } else {
401 if (pData) {
402 FX_Free(pData);
403 }
404 FX_DWORD dwSavePos = m_Pos;
405 m_Pos += dwStreamSize;
406 while (1) {
407 FX_DWORD dwPrevPos = m_Pos;
408 CPDF_StreamParser::SyntaxType type = ParseNextElement();
409 if (type == CPDF_StreamParser::EndOfData) {
410 break;
411 }
412 if (type != CPDF_StreamParser::Keyword) {
413 dwStreamSize += m_Pos - dwPrevPos;
414 continue;
415 }
416 if (GetWordSize() == 2 && GetWordBuf()[0] == 'E' &&
417 GetWordBuf()[1] == 'I') {
418 m_Pos = dwPrevPos;
419 break;
420 }
421 dwStreamSize += m_Pos - dwPrevPos;
422 }
423 m_Pos = dwSavePos;
424 pData = FX_Alloc(FX_BYTE, dwStreamSize);
425 FXSYS_memcpy32(pData, m_pBuf + m_Pos, dwStreamSize);
426 m_Pos += dwStreamSize;
427 }
428 }
429 pDict->SetAtInteger(FX_BSTRC("Length"), (int)dwStreamSize);
430 return CPDF_Stream::Create(pData, dwStreamSize, pDict);
431 }
432 #define MAX_WORD_BUFFER 256
433 #define MAX_STRING_LENGTH 32767
434 #define FXDWORD_TRUE FXDWORD_FROM_LSBFIRST(0x65757274)
435 #define FXDWORD_NULL FXDWORD_FROM_LSBFIRST(0x6c6c756e)
436 #define FXDWORD_FALS FXDWORD_FROM_LSBFIRST(0x736c6166)
ParseNextElement()437 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement()
438 {
439 if (m_pLastObj) {
440 m_pLastObj->Release();
441 m_pLastObj = NULL;
442 }
443 m_WordSize = 0;
444 FX_BOOL bIsNumber = TRUE;
445 if (m_Pos >= m_Size) {
446 return EndOfData;
447 }
448 int ch = m_pBuf[m_Pos++];
449 int type = PDF_CharType[ch];
450 while (1) {
451 while (type == 'W') {
452 if (m_Size <= m_Pos) {
453 return EndOfData;
454 }
455 ch = m_pBuf[m_Pos++];
456 type = PDF_CharType[ch];
457 }
458 if (ch != '%') {
459 break;
460 }
461 while (1) {
462 if (m_Size <= m_Pos) {
463 return EndOfData;
464 }
465 ch = m_pBuf[m_Pos++];
466 if (ch == '\r' || ch == '\n') {
467 break;
468 }
469 }
470 type = PDF_CharType[ch];
471 }
472 if (type == 'D' && ch != '/') {
473 m_Pos --;
474 m_pLastObj = ReadNextObject();
475 return Others;
476 }
477 while (1) {
478 if (m_WordSize < MAX_WORD_BUFFER) {
479 m_WordBuffer[m_WordSize++] = ch;
480 }
481 if (type != 'N') {
482 bIsNumber = FALSE;
483 }
484 if (m_Size <= m_Pos) {
485 break;
486 }
487 ch = m_pBuf[m_Pos++];
488 type = PDF_CharType[ch];
489 if (type == 'D' || type == 'W') {
490 m_Pos --;
491 break;
492 }
493 }
494 m_WordBuffer[m_WordSize] = 0;
495 if (bIsNumber) {
496 return Number;
497 }
498 if (m_WordBuffer[0] == '/') {
499 return Name;
500 }
501 if (m_WordSize == 4) {
502 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) {
503 m_pLastObj = CPDF_Boolean::Create(TRUE);
504 return Others;
505 }
506 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) {
507 m_pLastObj = CPDF_Null::Create();
508 return Others;
509 }
510 } else if (m_WordSize == 5) {
511 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') {
512 m_pLastObj = CPDF_Boolean::Create(FALSE);
513 return Others;
514 }
515 }
516 return Keyword;
517 }
SkipPathObject()518 void CPDF_StreamParser::SkipPathObject()
519 {
520 FX_DWORD command_startpos = m_Pos;
521 if (m_Pos >= m_Size) {
522 return;
523 }
524 int ch = m_pBuf[m_Pos++];
525 int type = PDF_CharType[ch];
526 while (1) {
527 while (type == 'W') {
528 if (m_Pos >= m_Size) {
529 return;
530 }
531 ch = m_pBuf[m_Pos++];
532 type = PDF_CharType[ch];
533 }
534 if (type != 'N') {
535 m_Pos = command_startpos;
536 return;
537 }
538 while (1) {
539 while (type != 'W') {
540 if (m_Pos >= m_Size) {
541 return;
542 }
543 ch = m_pBuf[m_Pos++];
544 type = PDF_CharType[ch];
545 }
546 while (type == 'W') {
547 if (m_Pos >= m_Size) {
548 return;
549 }
550 ch = m_pBuf[m_Pos++];
551 type = PDF_CharType[ch];
552 }
553 if (type == 'N') {
554 continue;
555 }
556 FX_DWORD op_startpos = m_Pos - 1;
557 while (type != 'W' && type != 'D') {
558 if (m_Pos >= m_Size) {
559 return;
560 }
561 ch = m_pBuf[m_Pos++];
562 type = PDF_CharType[ch];
563 }
564 if (m_Pos - op_startpos == 2) {
565 int op = m_pBuf[op_startpos];
566 if (op == 'm' || op == 'l' || op == 'c' || op == 'v' || op == 'y') {
567 command_startpos = m_Pos;
568 break;
569 }
570 } else if (m_Pos - op_startpos == 3) {
571 if (m_pBuf[op_startpos] == 'r' && m_pBuf[op_startpos + 1] == 'e') {
572 command_startpos = m_Pos;
573 break;
574 }
575 }
576 m_Pos = command_startpos;
577 return;
578 }
579 }
580 }
ReadNextObject(FX_BOOL bAllowNestedArray,FX_BOOL bInArray)581 CPDF_Object* CPDF_StreamParser::ReadNextObject(FX_BOOL bAllowNestedArray, FX_BOOL bInArray)
582 {
583 FX_BOOL bIsNumber;
584 GetNextWord(bIsNumber);
585 if (m_WordSize == 0) {
586 return NULL;
587 }
588 if (bIsNumber) {
589 m_WordBuffer[m_WordSize] = 0;
590 return CPDF_Number::Create(CFX_ByteStringC(m_WordBuffer, m_WordSize));
591 }
592 int first_char = m_WordBuffer[0];
593 if (first_char == '/') {
594 return CPDF_Name::Create(PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
595 }
596 if (first_char == '(') {
597 return CPDF_String::Create(ReadString());
598 }
599 if (first_char == '<') {
600 if (m_WordSize == 1) {
601 return CPDF_String::Create(ReadHexString(), TRUE);
602 }
603 CPDF_Dictionary* pDict = CPDF_Dictionary::Create();
604 while (1) {
605 GetNextWord(bIsNumber);
606 if (m_WordSize == 0) {
607 pDict->Release();
608 return NULL;
609 }
610 if (m_WordSize == 2 && m_WordBuffer[0] == '>') {
611 break;
612 }
613 if (m_WordBuffer[0] != '/') {
614 pDict->Release();
615 return NULL;
616 }
617 CFX_ByteString key = PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1));
618 CPDF_Object* pObj = ReadNextObject(TRUE);
619 if (pObj == NULL) {
620 if (pDict) {
621 pDict->Release();
622 }
623 return NULL;
624 }
625 if (!key.IsEmpty()) {
626 pDict->SetAt(key, pObj);
627 } else {
628 pObj->Release();
629 }
630 }
631 return pDict;
632 }
633 if (first_char == '[') {
634 if (!bAllowNestedArray && bInArray) {
635 return NULL;
636 }
637 CPDF_Array* pArray = CPDF_Array::Create();
638 while (1) {
639 CPDF_Object* pObj = ReadNextObject(bAllowNestedArray, TRUE);
640 if (pObj == NULL) {
641 if (m_WordSize == 0 || m_WordBuffer[0] == ']') {
642 return pArray;
643 }
644 if (m_WordBuffer[0] == '[') {
645 continue;
646 }
647 } else {
648 pArray->Add(pObj);
649 }
650 }
651 }
652 if (m_WordSize == 4) {
653 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) {
654 return CPDF_Boolean::Create(TRUE);
655 }
656 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) {
657 return CPDF_Null::Create();
658 }
659 } else if (m_WordSize == 5) {
660 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') {
661 return CPDF_Boolean::Create(FALSE);
662 }
663 }
664 return NULL;
665 }
GetNextWord(FX_BOOL & bIsNumber)666 void CPDF_StreamParser::GetNextWord(FX_BOOL& bIsNumber)
667 {
668 m_WordSize = 0;
669 bIsNumber = TRUE;
670 if (m_Size <= m_Pos) {
671 return;
672 }
673 int ch = m_pBuf[m_Pos++];
674 int type = PDF_CharType[ch];
675 while (1) {
676 while (type == 'W') {
677 if (m_Size <= m_Pos) {
678 return;
679 }
680 ch = m_pBuf[m_Pos++];
681 type = PDF_CharType[ch];
682 }
683 if (ch != '%') {
684 break;
685 }
686 while (1) {
687 if (m_Size <= m_Pos) {
688 return;
689 }
690 ch = m_pBuf[m_Pos++];
691 if (ch == '\r' || ch == '\n') {
692 break;
693 }
694 }
695 type = PDF_CharType[ch];
696 }
697 if (type == 'D') {
698 bIsNumber = FALSE;
699 m_WordBuffer[m_WordSize++] = ch;
700 if (ch == '/') {
701 while (1) {
702 if (m_Size <= m_Pos) {
703 return;
704 }
705 ch = m_pBuf[m_Pos++];
706 type = PDF_CharType[ch];
707 if (type != 'R' && type != 'N') {
708 m_Pos --;
709 return;
710 }
711 if (m_WordSize < MAX_WORD_BUFFER) {
712 m_WordBuffer[m_WordSize++] = ch;
713 }
714 }
715 } else if (ch == '<') {
716 if (m_Size <= m_Pos) {
717 return;
718 }
719 ch = m_pBuf[m_Pos++];
720 if (ch == '<') {
721 m_WordBuffer[m_WordSize++] = ch;
722 } else {
723 m_Pos --;
724 }
725 } else if (ch == '>') {
726 if (m_Size <= m_Pos) {
727 return;
728 }
729 ch = m_pBuf[m_Pos++];
730 if (ch == '>') {
731 m_WordBuffer[m_WordSize++] = ch;
732 } else {
733 m_Pos --;
734 }
735 }
736 return;
737 }
738 while (1) {
739 if (m_WordSize < MAX_WORD_BUFFER) {
740 m_WordBuffer[m_WordSize++] = ch;
741 }
742 if (type != 'N') {
743 bIsNumber = FALSE;
744 }
745 if (m_Size <= m_Pos) {
746 return;
747 }
748 ch = m_pBuf[m_Pos++];
749 type = PDF_CharType[ch];
750 if (type == 'D' || type == 'W') {
751 m_Pos --;
752 break;
753 }
754 }
755 }
ReadString()756 CFX_ByteString CPDF_StreamParser::ReadString()
757 {
758 if (m_Size <= m_Pos) {
759 return CFX_ByteString();
760 }
761 int ch = m_pBuf[m_Pos++];
762 CFX_ByteTextBuf buf;
763 int parlevel = 0;
764 int status = 0, iEscCode = 0;
765 while (1) {
766 switch (status) {
767 case 0:
768 if (ch == ')') {
769 if (parlevel == 0) {
770 if (buf.GetLength() > MAX_STRING_LENGTH) {
771 return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
772 }
773 return buf.GetByteString();
774 }
775 parlevel --;
776 buf.AppendChar(')');
777 } else if (ch == '(') {
778 parlevel ++;
779 buf.AppendChar('(');
780 } else if (ch == '\\') {
781 status = 1;
782 } else {
783 buf.AppendChar((char)ch);
784 }
785 break;
786 case 1:
787 if (ch >= '0' && ch <= '7') {
788 iEscCode = ch - '0';
789 status = 2;
790 break;
791 }
792 if (ch == 'n') {
793 buf.AppendChar('\n');
794 } else if (ch == 'r') {
795 buf.AppendChar('\r');
796 } else if (ch == 't') {
797 buf.AppendChar('\t');
798 } else if (ch == 'b') {
799 buf.AppendChar('\b');
800 } else if (ch == 'f') {
801 buf.AppendChar('\f');
802 } else if (ch == '\r') {
803 status = 4;
804 break;
805 } else if (ch == '\n') {
806 } else {
807 buf.AppendChar(ch);
808 }
809 status = 0;
810 break;
811 case 2:
812 if (ch >= '0' && ch <= '7') {
813 iEscCode = iEscCode * 8 + ch - '0';
814 status = 3;
815 } else {
816 buf.AppendChar(iEscCode);
817 status = 0;
818 continue;
819 }
820 break;
821 case 3:
822 if (ch >= '0' && ch <= '7') {
823 iEscCode = iEscCode * 8 + ch - '0';
824 buf.AppendChar(iEscCode);
825 status = 0;
826 } else {
827 buf.AppendChar(iEscCode);
828 status = 0;
829 continue;
830 }
831 break;
832 case 4:
833 status = 0;
834 if (ch != '\n') {
835 continue;
836 }
837 break;
838 }
839 if (m_Size <= m_Pos) {
840 break;
841 }
842 ch = m_pBuf[m_Pos++];
843 }
844 if (m_Size > m_Pos) {
845 ch = m_pBuf[m_Pos++];
846 }
847 if (buf.GetLength() > MAX_STRING_LENGTH) {
848 return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
849 }
850 return buf.GetByteString();
851 }
ReadHexString()852 CFX_ByteString CPDF_StreamParser::ReadHexString()
853 {
854 if (m_Size <= m_Pos) {
855 return CFX_ByteString();
856 }
857 int ch = m_pBuf[m_Pos++];
858 CFX_ByteTextBuf buf;
859 FX_BOOL bFirst = TRUE;
860 int code = 0;
861 while (1) {
862 if (ch == '>') {
863 break;
864 }
865 if (ch >= '0' && ch <= '9') {
866 if (bFirst) {
867 code = (ch - '0') * 16;
868 } else {
869 code += ch - '0';
870 buf.AppendChar((char)code);
871 }
872 bFirst = !bFirst;
873 } else if (ch >= 'A' && ch <= 'F') {
874 if (bFirst) {
875 code = (ch - 'A' + 10) * 16;
876 } else {
877 code += ch - 'A' + 10;
878 buf.AppendChar((char)code);
879 }
880 bFirst = !bFirst;
881 } else if (ch >= 'a' && ch <= 'f') {
882 if (bFirst) {
883 code = (ch - 'a' + 10) * 16;
884 } else {
885 code += ch - 'a' + 10;
886 buf.AppendChar((char)code);
887 }
888 bFirst = !bFirst;
889 }
890 if (m_Size <= m_Pos) {
891 break;
892 }
893 ch = m_pBuf[m_Pos++];
894 }
895 if (!bFirst) {
896 buf.AppendChar((char)code);
897 }
898 if (buf.GetLength() > MAX_STRING_LENGTH) {
899 return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
900 }
901 return buf.GetByteString();
902 }
903 #define PAGEPARSE_STAGE_GETCONTENT 1
904 #define PAGEPARSE_STAGE_PARSE 2
905 #define PAGEPARSE_STAGE_CHECKCLIP 3
CPDF_ContentParser()906 CPDF_ContentParser::CPDF_ContentParser()
907 {
908 m_pParser = NULL;
909 m_pStreamArray = NULL;
910 m_pSingleStream = NULL;
911 m_pData = NULL;
912 m_Status = Ready;
913 m_pType3Char = NULL;
914 }
~CPDF_ContentParser()915 CPDF_ContentParser::~CPDF_ContentParser()
916 {
917 Clear();
918 }
Clear()919 void CPDF_ContentParser::Clear()
920 {
921 if (m_pParser) {
922 delete m_pParser;
923 }
924 if (m_pSingleStream) {
925 delete m_pSingleStream;
926 }
927 if (m_pStreamArray) {
928 for (FX_DWORD i = 0; i < m_nStreams; i ++)
929 if (m_pStreamArray[i]) {
930 delete m_pStreamArray[i];
931 }
932 FX_Free(m_pStreamArray);
933 }
934 if (m_pData && m_pSingleStream == NULL) {
935 FX_Free((void*)m_pData);
936 }
937 m_pParser = NULL;
938 m_pStreamArray = NULL;
939 m_pSingleStream = NULL;
940 m_pData = NULL;
941 m_Status = Ready;
942 }
Start(CPDF_Page * pPage,CPDF_ParseOptions * pOptions)943 void CPDF_ContentParser::Start(CPDF_Page* pPage, CPDF_ParseOptions* pOptions)
944 {
945 if (m_Status != Ready || pPage == NULL || pPage->m_pDocument == NULL || pPage->m_pFormDict == NULL) {
946 m_Status = Done;
947 return;
948 }
949 m_pObjects = pPage;
950 m_bForm = FALSE;
951 if (pOptions) {
952 m_Options = *pOptions;
953 }
954 m_Status = ToBeContinued;
955 m_InternalStage = PAGEPARSE_STAGE_GETCONTENT;
956 m_CurrentOffset = 0;
957 CPDF_Object* pContent = pPage->m_pFormDict->GetElementValue(FX_BSTRC("Contents"));
958 if (pContent == NULL) {
959 m_Status = Done;
960 return;
961 }
962 if (pContent->GetType() == PDFOBJ_STREAM) {
963 m_nStreams = 0;
964 m_pSingleStream = new CPDF_StreamAcc;
965 m_pSingleStream->LoadAllData((CPDF_Stream*)pContent, FALSE);
966 } else if (pContent->GetType() == PDFOBJ_ARRAY) {
967 CPDF_Array* pArray = (CPDF_Array*)pContent;
968 m_nStreams = pArray->GetCount();
969 if (m_nStreams == 0) {
970 m_Status = Done;
971 return;
972 }
973 m_pStreamArray = FX_Alloc(CPDF_StreamAcc*, m_nStreams);
974 } else {
975 m_Status = Done;
976 return;
977 }
978 }
Start(CPDF_Form * pForm,CPDF_AllStates * pGraphicStates,CFX_AffineMatrix * pParentMatrix,CPDF_Type3Char * pType3Char,CPDF_ParseOptions * pOptions,int level)979 void CPDF_ContentParser::Start(CPDF_Form* pForm, CPDF_AllStates* pGraphicStates,
980 CFX_AffineMatrix* pParentMatrix, CPDF_Type3Char* pType3Char, CPDF_ParseOptions* pOptions, int level)
981 {
982 m_pType3Char = pType3Char;
983 m_pObjects = pForm;
984 m_bForm = TRUE;
985 CFX_AffineMatrix form_matrix = pForm->m_pFormDict->GetMatrix(FX_BSTRC("Matrix"));
986 if (pGraphicStates) {
987 form_matrix.Concat(pGraphicStates->m_CTM);
988 }
989 CPDF_Array* pBBox = pForm->m_pFormDict->GetArray(FX_BSTRC("BBox"));
990 CFX_FloatRect form_bbox;
991 CPDF_Path ClipPath;
992 if (pBBox) {
993 form_bbox = pBBox->GetRect();
994 ClipPath.New();
995 ClipPath.AppendRect(form_bbox.left, form_bbox.bottom, form_bbox.right, form_bbox.top);
996 ClipPath.Transform(&form_matrix);
997 if (pParentMatrix) {
998 ClipPath.Transform(pParentMatrix);
999 }
1000 form_bbox.Transform(&form_matrix);
1001 if (pParentMatrix) {
1002 form_bbox.Transform(pParentMatrix);
1003 }
1004 }
1005 CPDF_Dictionary* pResources = pForm->m_pFormDict->GetDict(FX_BSTRC("Resources"));
1006 m_pParser = new CPDF_StreamContentParser;
1007 m_pParser->Initialize();
1008 m_pParser->PrepareParse(pForm->m_pDocument, pForm->m_pPageResources, pForm->m_pResources, pParentMatrix, pForm,
1009 pResources, &form_bbox, pOptions, pGraphicStates, level);
1010 m_pParser->m_pCurStates->m_CTM = form_matrix;
1011 m_pParser->m_pCurStates->m_ParentMatrix = form_matrix;
1012 if (ClipPath.NotNull()) {
1013 m_pParser->m_pCurStates->m_ClipPath.AppendPath(ClipPath, FXFILL_WINDING, TRUE);
1014 }
1015 if (pForm->m_Transparency & PDFTRANS_GROUP) {
1016 CPDF_GeneralStateData* pData = m_pParser->m_pCurStates->m_GeneralState.GetModify();
1017 pData->m_BlendType = FXDIB_BLEND_NORMAL;
1018 pData->m_StrokeAlpha = 1.0f;
1019 pData->m_FillAlpha = 1.0f;
1020 pData->m_pSoftMask = NULL;
1021 }
1022 m_nStreams = 0;
1023 m_pSingleStream = new CPDF_StreamAcc;
1024 if (pForm->m_pDocument) {
1025 m_pSingleStream->LoadAllData(pForm->m_pFormStream, FALSE);
1026 } else {
1027 m_pSingleStream->LoadAllData(pForm->m_pFormStream, FALSE);
1028 }
1029 m_pData = (FX_LPBYTE)m_pSingleStream->GetData();
1030 m_Size = m_pSingleStream->GetSize();
1031 m_Status = ToBeContinued;
1032 m_InternalStage = PAGEPARSE_STAGE_PARSE;
1033 m_CurrentOffset = 0;
1034 }
Continue(IFX_Pause * pPause)1035 void CPDF_ContentParser::Continue(IFX_Pause* pPause)
1036 {
1037 int steps = 0;
1038 while (m_Status == ToBeContinued) {
1039 if (m_InternalStage == PAGEPARSE_STAGE_GETCONTENT) {
1040 if (m_CurrentOffset == m_nStreams) {
1041 if (m_pStreamArray) {
1042 m_Size = 0;
1043 FX_DWORD i;
1044 for (i = 0; i < m_nStreams; i ++) {
1045 FX_DWORD size = m_pStreamArray[i]->GetSize();
1046 if (m_Size + size + 1 <= m_Size) {
1047 m_Status = Done;
1048 return;
1049 }
1050 m_Size += size + 1;
1051 }
1052 m_pData = FX_Alloc(FX_BYTE, m_Size);
1053 FX_DWORD pos = 0;
1054 for (i = 0; i < m_nStreams; i ++) {
1055 FXSYS_memcpy32(m_pData + pos, m_pStreamArray[i]->GetData(), m_pStreamArray[i]->GetSize());
1056 pos += m_pStreamArray[i]->GetSize() + 1;
1057 m_pData[pos - 1] = ' ';
1058 delete m_pStreamArray[i];
1059 }
1060 FX_Free(m_pStreamArray);
1061 m_pStreamArray = NULL;
1062 } else {
1063 m_pData = (FX_LPBYTE)m_pSingleStream->GetData();
1064 m_Size = m_pSingleStream->GetSize();
1065 }
1066 m_InternalStage = PAGEPARSE_STAGE_PARSE;
1067 m_CurrentOffset = 0;
1068 } else {
1069 CPDF_Array* pContent = m_pObjects->m_pFormDict->GetArray(FX_BSTRC("Contents"));
1070 m_pStreamArray[m_CurrentOffset] = new CPDF_StreamAcc;
1071 CPDF_Stream* pStreamObj = (CPDF_Stream*)(pContent ? pContent->GetElementValue(m_CurrentOffset) : NULL);
1072 m_pStreamArray[m_CurrentOffset]->LoadAllData(pStreamObj, FALSE);
1073 m_CurrentOffset ++;
1074 }
1075 }
1076 if (m_InternalStage == PAGEPARSE_STAGE_PARSE) {
1077 if (m_pParser == NULL) {
1078 m_pParser = new CPDF_StreamContentParser;
1079 m_pParser->Initialize();
1080 m_pParser->PrepareParse(m_pObjects->m_pDocument, m_pObjects->m_pPageResources, NULL, NULL, m_pObjects,
1081 m_pObjects->m_pResources, &m_pObjects->m_BBox, &m_Options, NULL, 0);
1082 m_pParser->m_pCurStates->m_ColorState.GetModify()->Default();
1083 }
1084 if (m_CurrentOffset >= m_Size) {
1085 m_InternalStage = PAGEPARSE_STAGE_CHECKCLIP;
1086 } else {
1087 m_CurrentOffset += m_pParser->Parse(m_pData + m_CurrentOffset, m_Size - m_CurrentOffset, PARSE_STEP_LIMIT);
1088 if (m_pParser->m_bAbort) {
1089 m_InternalStage = PAGEPARSE_STAGE_CHECKCLIP;
1090 continue;
1091 }
1092 }
1093 }
1094 if (m_InternalStage == PAGEPARSE_STAGE_CHECKCLIP) {
1095 if (m_pType3Char) {
1096 m_pType3Char->m_bColored = m_pParser->m_bColored;
1097 m_pType3Char->m_Width = FXSYS_round(m_pParser->m_Type3Data[0] * 1000);
1098 m_pType3Char->m_BBox.left = FXSYS_round(m_pParser->m_Type3Data[2] * 1000);
1099 m_pType3Char->m_BBox.bottom = FXSYS_round(m_pParser->m_Type3Data[3] * 1000);
1100 m_pType3Char->m_BBox.right = FXSYS_round(m_pParser->m_Type3Data[4] * 1000);
1101 m_pType3Char->m_BBox.top = FXSYS_round(m_pParser->m_Type3Data[5] * 1000);
1102 }
1103 FX_POSITION pos = m_pObjects->m_ObjectList.GetHeadPosition();
1104 while (pos) {
1105 CPDF_PageObject* pObj = (CPDF_PageObject*)m_pObjects->m_ObjectList.GetNext(pos);
1106 if (pObj->m_ClipPath.IsNull()) {
1107 continue;
1108 }
1109 if (pObj->m_ClipPath.GetPathCount() != 1) {
1110 continue;
1111 }
1112 if (pObj->m_ClipPath.GetTextCount()) {
1113 continue;
1114 }
1115 CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0);
1116 if (!ClipPath.IsRect() || pObj->m_Type == PDFPAGE_SHADING) {
1117 continue;
1118 }
1119 CFX_FloatRect old_rect(ClipPath.GetPointX(0), ClipPath.GetPointY(0),
1120 ClipPath.GetPointX(2), ClipPath.GetPointY(2));
1121 CFX_FloatRect obj_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, pObj->m_Top);
1122 if (old_rect.Contains(obj_rect)) {
1123 pObj->m_ClipPath.SetNull();
1124 }
1125 }
1126 m_Status = Done;
1127 return;
1128 }
1129 steps ++;
1130 if (pPause && pPause->NeedToPauseNow()) {
1131 break;
1132 }
1133 }
1134 }
EstimateProgress()1135 int CPDF_ContentParser::EstimateProgress()
1136 {
1137 if (m_Status == Ready) {
1138 return 0;
1139 }
1140 if (m_Status == Done) {
1141 return 100;
1142 }
1143 if (m_InternalStage == PAGEPARSE_STAGE_GETCONTENT) {
1144 return 10;
1145 }
1146 if (m_InternalStage == PAGEPARSE_STAGE_CHECKCLIP) {
1147 return 90;
1148 }
1149 return 10 + 80 * m_CurrentOffset / m_Size;
1150 }
1151