1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/src/fpdfapi/fpdf_page/pageint.h"
8
9 #include <limits.h>
10
11 #include "core/include/fpdfapi/fpdf_module.h"
12 #include "core/include/fpdfapi/fpdf_page.h"
13 #include "core/include/fxcodec/fx_codec.h"
14 #include "core/include/fxcrt/fx_ext.h"
15 #include "core/include/fxcrt/fx_safe_types.h"
16
17 namespace {
18
19 const char kPathOperatorSubpath = 'm';
20 const char kPathOperatorLine = 'l';
21 const char kPathOperatorCubicBezier1 = 'c';
22 const char kPathOperatorCubicBezier2 = 'v';
23 const char kPathOperatorCubicBezier3 = 'y';
24 const char kPathOperatorClosePath = 'h';
25 const char kPathOperatorRectangle[] = "re";
26
27 } // namespace
28
29 class CPDF_StreamParserAutoClearer {
30 public:
CPDF_StreamParserAutoClearer(CPDF_StreamParser ** scoped_variable,CPDF_StreamParser * new_parser)31 CPDF_StreamParserAutoClearer(CPDF_StreamParser** scoped_variable,
32 CPDF_StreamParser* new_parser)
33 : scoped_variable_(scoped_variable) {
34 *scoped_variable_ = new_parser;
35 }
~CPDF_StreamParserAutoClearer()36 ~CPDF_StreamParserAutoClearer() { *scoped_variable_ = NULL; }
37
38 private:
39 CPDF_StreamParser** scoped_variable_;
40 };
Parse(const uint8_t * pData,FX_DWORD dwSize,FX_DWORD max_cost)41 FX_DWORD CPDF_StreamContentParser::Parse(const uint8_t* pData,
42 FX_DWORD dwSize,
43 FX_DWORD max_cost) {
44 if (m_Level > _FPDF_MAX_FORM_LEVEL_) {
45 return dwSize;
46 }
47 FX_DWORD InitObjCount = m_pObjectList->CountObjects();
48 CPDF_StreamParser syntax(pData, dwSize);
49 CPDF_StreamParserAutoClearer auto_clearer(&m_pSyntax, &syntax);
50 m_CompatCount = 0;
51 while (1) {
52 FX_DWORD cost = m_pObjectList->CountObjects() - InitObjCount;
53 if (max_cost && cost >= max_cost) {
54 break;
55 }
56 switch (syntax.ParseNextElement()) {
57 case CPDF_StreamParser::EndOfData:
58 return m_pSyntax->GetPos();
59 case CPDF_StreamParser::Keyword:
60 OnOperator((char*)syntax.GetWordBuf());
61 ClearAllParams();
62 break;
63 case CPDF_StreamParser::Number:
64 AddNumberParam((char*)syntax.GetWordBuf(), syntax.GetWordSize());
65 break;
66 case CPDF_StreamParser::Name:
67 AddNameParam((const FX_CHAR*)syntax.GetWordBuf() + 1,
68 syntax.GetWordSize() - 1);
69 break;
70 default:
71 AddObjectParam(syntax.GetObject());
72 }
73 }
74 return m_pSyntax->GetPos();
75 }
76
Handle_BeginImage()77 void CPDF_StreamContentParser::Handle_BeginImage() {
78 FX_FILESIZE savePos = m_pSyntax->GetPos();
79 CPDF_Dictionary* pDict = new CPDF_Dictionary;
80 while (1) {
81 CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
82 if (type == CPDF_StreamParser::Keyword) {
83 CFX_ByteString bsKeyword(m_pSyntax->GetWordBuf(),
84 m_pSyntax->GetWordSize());
85 if (bsKeyword != "ID") {
86 m_pSyntax->SetPos(savePos);
87 pDict->Release();
88 return;
89 }
90 }
91 if (type != CPDF_StreamParser::Name) {
92 break;
93 }
94 CFX_ByteString key((const FX_CHAR*)m_pSyntax->GetWordBuf() + 1,
95 m_pSyntax->GetWordSize() - 1);
96 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj(
97 m_pSyntax->ReadNextObject());
98 if (!key.IsEmpty()) {
99 FX_DWORD dwObjNum = pObj ? pObj->GetObjNum() : 0;
100 if (dwObjNum)
101 pDict->SetAtReference(key, m_pDocument, dwObjNum);
102 else
103 pDict->SetAt(key, pObj.release());
104 }
105 }
106 PDF_ReplaceAbbr(pDict);
107 CPDF_Object* pCSObj = NULL;
108 if (pDict->KeyExist("ColorSpace")) {
109 pCSObj = pDict->GetElementValue("ColorSpace");
110 if (pCSObj->IsName()) {
111 CFX_ByteString name = pCSObj->GetString();
112 if (name != "DeviceRGB" && name != "DeviceGray" && name != "DeviceCMYK") {
113 pCSObj = FindResourceObj("ColorSpace", name);
114 if (pCSObj && !pCSObj->GetObjNum()) {
115 pCSObj = pCSObj->Clone();
116 pDict->SetAt("ColorSpace", pCSObj);
117 }
118 }
119 }
120 }
121 CPDF_Stream* pStream = m_pSyntax->ReadInlineStream(
122 m_pDocument, pDict, pCSObj, m_Options.m_bDecodeInlineImage);
123 while (1) {
124 CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
125 if (type == CPDF_StreamParser::EndOfData) {
126 break;
127 }
128 if (type != CPDF_StreamParser::Keyword) {
129 continue;
130 }
131 if (m_pSyntax->GetWordSize() == 2 && m_pSyntax->GetWordBuf()[0] == 'E' &&
132 m_pSyntax->GetWordBuf()[1] == 'I') {
133 break;
134 }
135 }
136 if (m_Options.m_bTextOnly) {
137 if (pStream) {
138 pStream->Release();
139 } else {
140 pDict->Release();
141 }
142 return;
143 }
144 pDict->SetAtName("Subtype", "Image");
145 CPDF_ImageObject* pImgObj = AddImage(pStream, NULL, TRUE);
146 if (!pImgObj) {
147 if (pStream) {
148 pStream->Release();
149 } else {
150 pDict->Release();
151 }
152 }
153 }
ParsePathObject()154 void CPDF_StreamContentParser::ParsePathObject() {
155 FX_FLOAT params[6] = {};
156 int nParams = 0;
157 int last_pos = m_pSyntax->GetPos();
158 while (1) {
159 CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
160 FX_BOOL bProcessed = TRUE;
161 switch (type) {
162 case CPDF_StreamParser::EndOfData:
163 return;
164 case CPDF_StreamParser::Keyword: {
165 int len = m_pSyntax->GetWordSize();
166 if (len == 1) {
167 switch (m_pSyntax->GetWordBuf()[0]) {
168 case kPathOperatorSubpath:
169 AddPathPoint(params[0], params[1], FXPT_MOVETO);
170 nParams = 0;
171 break;
172 case kPathOperatorLine:
173 AddPathPoint(params[0], params[1], FXPT_LINETO);
174 nParams = 0;
175 break;
176 case kPathOperatorCubicBezier1:
177 AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
178 AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
179 AddPathPoint(params[4], params[5], FXPT_BEZIERTO);
180 nParams = 0;
181 break;
182 case kPathOperatorCubicBezier2:
183 AddPathPoint(m_PathCurrentX, m_PathCurrentY, FXPT_BEZIERTO);
184 AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
185 AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
186 nParams = 0;
187 break;
188 case kPathOperatorCubicBezier3:
189 AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
190 AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
191 AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
192 nParams = 0;
193 break;
194 case kPathOperatorClosePath:
195 Handle_ClosePath();
196 nParams = 0;
197 break;
198 default:
199 bProcessed = FALSE;
200 break;
201 }
202 } else if (len == 2) {
203 if (m_pSyntax->GetWordBuf()[0] == kPathOperatorRectangle[0] &&
204 m_pSyntax->GetWordBuf()[1] == kPathOperatorRectangle[1]) {
205 AddPathRect(params[0], params[1], params[2], params[3]);
206 nParams = 0;
207 } else {
208 bProcessed = FALSE;
209 }
210 } else {
211 bProcessed = FALSE;
212 }
213 if (bProcessed) {
214 last_pos = m_pSyntax->GetPos();
215 }
216 break;
217 }
218 case CPDF_StreamParser::Number: {
219 if (nParams == 6) {
220 break;
221 }
222 FX_BOOL bInteger;
223 int value;
224 FX_atonum(
225 CFX_ByteStringC(m_pSyntax->GetWordBuf(), m_pSyntax->GetWordSize()),
226 bInteger, &value);
227 params[nParams++] = bInteger ? (FX_FLOAT)value : *(FX_FLOAT*)&value;
228 break;
229 }
230 default:
231 bProcessed = FALSE;
232 }
233 if (!bProcessed) {
234 m_pSyntax->SetPos(last_pos);
235 return;
236 }
237 }
238 }
CPDF_StreamParser(const uint8_t * pData,FX_DWORD dwSize)239 CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, FX_DWORD dwSize) {
240 m_pBuf = pData;
241 m_Size = dwSize;
242 m_Pos = 0;
243 m_pLastObj = NULL;
244 }
~CPDF_StreamParser()245 CPDF_StreamParser::~CPDF_StreamParser() {
246 if (m_pLastObj) {
247 m_pLastObj->Release();
248 }
249 }
_DecodeAllScanlines(ICodec_ScanlineDecoder * pDecoder,uint8_t * & dest_buf,FX_DWORD & dest_size)250 FX_DWORD _DecodeAllScanlines(ICodec_ScanlineDecoder* pDecoder,
251 uint8_t*& dest_buf,
252 FX_DWORD& dest_size) {
253 if (!pDecoder) {
254 return (FX_DWORD)-1;
255 }
256 int ncomps = pDecoder->CountComps();
257 int bpc = pDecoder->GetBPC();
258 int width = pDecoder->GetWidth();
259 int height = pDecoder->GetHeight();
260 int pitch = (width * ncomps * bpc + 7) / 8;
261 if (height == 0 || pitch > (1 << 30) / height) {
262 delete pDecoder;
263 return -1;
264 }
265 dest_buf = FX_Alloc2D(uint8_t, pitch, height);
266 dest_size = pitch * height; // Safe since checked alloc returned.
267 for (int row = 0; row < height; row++) {
268 const uint8_t* pLine = pDecoder->GetScanline(row);
269 if (!pLine)
270 break;
271
272 FXSYS_memcpy(dest_buf + row * pitch, pLine, pitch);
273 }
274 FX_DWORD srcoff = pDecoder->GetSrcOffset();
275 delete pDecoder;
276 return srcoff;
277 }
278 ICodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder(
279 const uint8_t* src_buf,
280 FX_DWORD src_size,
281 int width,
282 int height,
283 const CPDF_Dictionary* pParams);
284
PDF_DecodeInlineStream(const uint8_t * src_buf,FX_DWORD limit,int width,int height,CFX_ByteString & decoder,CPDF_Dictionary * pParam,uint8_t * & dest_buf,FX_DWORD & dest_size)285 FX_DWORD PDF_DecodeInlineStream(const uint8_t* src_buf,
286 FX_DWORD limit,
287 int width,
288 int height,
289 CFX_ByteString& decoder,
290 CPDF_Dictionary* pParam,
291 uint8_t*& dest_buf,
292 FX_DWORD& dest_size) {
293 if (decoder == "CCITTFaxDecode" || decoder == "CCF") {
294 ICodec_ScanlineDecoder* pDecoder =
295 FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam);
296 return _DecodeAllScanlines(pDecoder, dest_buf, dest_size);
297 }
298 if (decoder == "ASCII85Decode" || decoder == "A85") {
299 return A85Decode(src_buf, limit, dest_buf, dest_size);
300 }
301 if (decoder == "ASCIIHexDecode" || decoder == "AHx") {
302 return HexDecode(src_buf, limit, dest_buf, dest_size);
303 }
304 if (decoder == "FlateDecode" || decoder == "Fl") {
305 return FPDFAPI_FlateOrLZWDecode(FALSE, src_buf, limit, pParam, dest_size,
306 dest_buf, dest_size);
307 }
308 if (decoder == "LZWDecode" || decoder == "LZW") {
309 return FPDFAPI_FlateOrLZWDecode(TRUE, src_buf, limit, pParam, 0, dest_buf,
310 dest_size);
311 }
312 if (decoder == "DCTDecode" || decoder == "DCT") {
313 ICodec_ScanlineDecoder* pDecoder =
314 CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder(
315 src_buf, limit, width, height, 0,
316 pParam ? pParam->GetInteger("ColorTransform", 1) : 1);
317 return _DecodeAllScanlines(pDecoder, dest_buf, dest_size);
318 }
319 if (decoder == "RunLengthDecode" || decoder == "RL") {
320 return RunLengthDecode(src_buf, limit, dest_buf, dest_size);
321 }
322 dest_size = 0;
323 dest_buf = 0;
324 return (FX_DWORD)-1;
325 }
ReadInlineStream(CPDF_Document * pDoc,CPDF_Dictionary * pDict,CPDF_Object * pCSObj,FX_BOOL bDecode)326 CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc,
327 CPDF_Dictionary* pDict,
328 CPDF_Object* pCSObj,
329 FX_BOOL bDecode) {
330 if (m_Pos == m_Size)
331 return nullptr;
332
333 if (PDFCharIsWhitespace(m_pBuf[m_Pos]))
334 m_Pos++;
335
336 CFX_ByteString Decoder;
337 CPDF_Dictionary* pParam = nullptr;
338 CPDF_Object* pFilter = pDict->GetElementValue("Filter");
339 if (pFilter) {
340 if (CPDF_Array* pArray = pFilter->AsArray()) {
341 Decoder = pArray->GetString(0);
342 CPDF_Array* pParams = pDict->GetArray("DecodeParms");
343 if (pParams)
344 pParam = pParams->GetDict(0);
345 } else {
346 Decoder = pFilter->GetString();
347 pParam = pDict->GetDict("DecodeParms");
348 }
349 }
350 FX_DWORD width = pDict->GetInteger("Width");
351 FX_DWORD height = pDict->GetInteger("Height");
352 FX_DWORD OrigSize = 0;
353 if (pCSObj) {
354 FX_DWORD bpc = pDict->GetInteger("BitsPerComponent");
355 FX_DWORD nComponents = 1;
356 CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj);
357 if (!pCS) {
358 nComponents = 3;
359 } else {
360 nComponents = pCS->CountComponents();
361 pDoc->GetPageData()->ReleaseColorSpace(pCSObj);
362 }
363 FX_DWORD pitch = width;
364 if (bpc && pitch > INT_MAX / bpc) {
365 return NULL;
366 }
367 pitch *= bpc;
368 if (nComponents && pitch > INT_MAX / nComponents) {
369 return NULL;
370 }
371 pitch *= nComponents;
372 if (pitch > INT_MAX - 7) {
373 return NULL;
374 }
375 pitch += 7;
376 pitch /= 8;
377 OrigSize = pitch;
378 } else {
379 if (width > INT_MAX - 7) {
380 return NULL;
381 }
382 OrigSize = ((width + 7) / 8);
383 }
384 if (height && OrigSize > INT_MAX / height) {
385 return NULL;
386 }
387 OrigSize *= height;
388 uint8_t* pData = NULL;
389 FX_DWORD dwStreamSize;
390 if (Decoder.IsEmpty()) {
391 if (OrigSize > m_Size - m_Pos) {
392 OrigSize = m_Size - m_Pos;
393 }
394 pData = FX_Alloc(uint8_t, OrigSize);
395 FXSYS_memcpy(pData, m_pBuf + m_Pos, OrigSize);
396 dwStreamSize = OrigSize;
397 m_Pos += OrigSize;
398 } else {
399 FX_DWORD dwDestSize = OrigSize;
400 dwStreamSize =
401 PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height,
402 Decoder, pParam, pData, dwDestSize);
403 if ((int)dwStreamSize < 0) {
404 FX_Free(pData);
405 return NULL;
406 }
407 if (bDecode) {
408 m_Pos += dwStreamSize;
409 dwStreamSize = dwDestSize;
410 if (CPDF_Array* pArray = pFilter->AsArray()) {
411 pArray->RemoveAt(0);
412 CPDF_Array* pParams = pDict->GetArray("DecodeParms");
413 if (pParams)
414 pParams->RemoveAt(0);
415 } else {
416 pDict->RemoveAt("Filter");
417 pDict->RemoveAt("DecodeParms");
418 }
419 } else {
420 FX_Free(pData);
421 FX_DWORD dwSavePos = m_Pos;
422 m_Pos += dwStreamSize;
423 while (1) {
424 FX_DWORD dwPrevPos = m_Pos;
425 CPDF_StreamParser::SyntaxType type = ParseNextElement();
426 if (type == CPDF_StreamParser::EndOfData) {
427 break;
428 }
429 if (type != CPDF_StreamParser::Keyword) {
430 dwStreamSize += m_Pos - dwPrevPos;
431 continue;
432 }
433 if (GetWordSize() == 2 && GetWordBuf()[0] == 'E' &&
434 GetWordBuf()[1] == 'I') {
435 m_Pos = dwPrevPos;
436 break;
437 }
438 dwStreamSize += m_Pos - dwPrevPos;
439 }
440 m_Pos = dwSavePos;
441 pData = FX_Alloc(uint8_t, dwStreamSize);
442 FXSYS_memcpy(pData, m_pBuf + m_Pos, dwStreamSize);
443 m_Pos += dwStreamSize;
444 }
445 }
446 pDict->SetAtInteger("Length", (int)dwStreamSize);
447 return new CPDF_Stream(pData, dwStreamSize, pDict);
448 }
449
450 #define MAX_WORD_BUFFER 256
451 #define MAX_STRING_LENGTH 32767
452 #define FXDWORD_TRUE FXDWORD_FROM_LSBFIRST(0x65757274)
453 #define FXDWORD_NULL FXDWORD_FROM_LSBFIRST(0x6c6c756e)
454 #define FXDWORD_FALS FXDWORD_FROM_LSBFIRST(0x736c6166)
ParseNextElement()455 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() {
456 if (m_pLastObj) {
457 m_pLastObj->Release();
458 m_pLastObj = nullptr;
459 }
460
461 m_WordSize = 0;
462 FX_BOOL bIsNumber = TRUE;
463 if (!PositionIsInBounds())
464 return EndOfData;
465
466 int ch = m_pBuf[m_Pos++];
467 while (1) {
468 while (PDFCharIsWhitespace(ch)) {
469 if (!PositionIsInBounds())
470 return EndOfData;
471
472 ch = m_pBuf[m_Pos++];
473 }
474
475 if (ch != '%')
476 break;
477
478 while (1) {
479 if (!PositionIsInBounds())
480 return EndOfData;
481
482 ch = m_pBuf[m_Pos++];
483 if (PDFCharIsLineEnding(ch))
484 break;
485 }
486 }
487
488 if (PDFCharIsDelimiter(ch) && ch != '/') {
489 m_Pos--;
490 m_pLastObj = ReadNextObject();
491 return Others;
492 }
493
494 while (1) {
495 if (m_WordSize < MAX_WORD_BUFFER)
496 m_WordBuffer[m_WordSize++] = ch;
497
498 if (!PDFCharIsNumeric(ch))
499 bIsNumber = FALSE;
500
501 if (!PositionIsInBounds())
502 break;
503
504 ch = m_pBuf[m_Pos++];
505
506 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
507 m_Pos--;
508 break;
509 }
510 }
511
512 m_WordBuffer[m_WordSize] = 0;
513 if (bIsNumber)
514 return Number;
515 if (m_WordBuffer[0] == '/')
516 return Name;
517
518 if (m_WordSize == 4) {
519 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) {
520 m_pLastObj = new CPDF_Boolean(TRUE);
521 return Others;
522 }
523 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) {
524 m_pLastObj = new CPDF_Null;
525 return Others;
526 }
527 } else if (m_WordSize == 5) {
528 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') {
529 m_pLastObj = new CPDF_Boolean(FALSE);
530 return Others;
531 }
532 }
533 return Keyword;
534 }
535
SkipPathObject()536 void CPDF_StreamParser::SkipPathObject() {
537 FX_DWORD command_startpos = m_Pos;
538 if (!PositionIsInBounds())
539 return;
540
541 int ch = m_pBuf[m_Pos++];
542 while (1) {
543 while (PDFCharIsWhitespace(ch)) {
544 if (!PositionIsInBounds())
545 return;
546 ch = m_pBuf[m_Pos++];
547 }
548
549 if (!PDFCharIsNumeric(ch)) {
550 m_Pos = command_startpos;
551 return;
552 }
553
554 while (1) {
555 while (!PDFCharIsWhitespace(ch)) {
556 if (!PositionIsInBounds())
557 return;
558 ch = m_pBuf[m_Pos++];
559 }
560
561 while (PDFCharIsWhitespace(ch)) {
562 if (!PositionIsInBounds())
563 return;
564 ch = m_pBuf[m_Pos++];
565 }
566
567 if (PDFCharIsNumeric(ch))
568 continue;
569
570 FX_DWORD op_startpos = m_Pos - 1;
571 while (!PDFCharIsWhitespace(ch) && !PDFCharIsDelimiter(ch)) {
572 if (!PositionIsInBounds())
573 return;
574 ch = m_pBuf[m_Pos++];
575 }
576
577 if (m_Pos - op_startpos == 2) {
578 int op = m_pBuf[op_startpos];
579 if (op == kPathOperatorSubpath || op == kPathOperatorLine ||
580 op == kPathOperatorCubicBezier1 ||
581 op == kPathOperatorCubicBezier2 ||
582 op == kPathOperatorCubicBezier3) {
583 command_startpos = m_Pos;
584 break;
585 }
586 } else if (m_Pos - op_startpos == 3) {
587 if (m_pBuf[op_startpos] == kPathOperatorRectangle[0] &&
588 m_pBuf[op_startpos + 1] == kPathOperatorRectangle[1]) {
589 command_startpos = m_Pos;
590 break;
591 }
592 }
593 m_Pos = command_startpos;
594 return;
595 }
596 }
597 }
ReadNextObject(FX_BOOL bAllowNestedArray,FX_BOOL bInArray)598 CPDF_Object* CPDF_StreamParser::ReadNextObject(FX_BOOL bAllowNestedArray,
599 FX_BOOL bInArray) {
600 FX_BOOL bIsNumber;
601 GetNextWord(bIsNumber);
602 if (m_WordSize == 0) {
603 return NULL;
604 }
605 if (bIsNumber) {
606 m_WordBuffer[m_WordSize] = 0;
607 return new CPDF_Number(CFX_ByteStringC(m_WordBuffer, m_WordSize));
608 }
609 int first_char = m_WordBuffer[0];
610 if (first_char == '/') {
611 return new CPDF_Name(
612 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
613 }
614 if (first_char == '(') {
615 return new CPDF_String(ReadString(), FALSE);
616 }
617 if (first_char == '<') {
618 if (m_WordSize == 1) {
619 return new CPDF_String(ReadHexString(), TRUE);
620 }
621 CPDF_Dictionary* pDict = new CPDF_Dictionary;
622 while (1) {
623 GetNextWord(bIsNumber);
624 if (m_WordSize == 0) {
625 pDict->Release();
626 return nullptr;
627 }
628 if (m_WordSize == 2 && m_WordBuffer[0] == '>') {
629 break;
630 }
631 if (m_WordBuffer[0] != '/') {
632 pDict->Release();
633 return nullptr;
634 }
635 CFX_ByteString key =
636 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1));
637 CPDF_Object* pObj = ReadNextObject(TRUE);
638 if (!pObj) {
639 pDict->Release();
640 return nullptr;
641 }
642 if (!key.IsEmpty()) {
643 pDict->SetAt(key, pObj);
644 } else {
645 pObj->Release();
646 }
647 }
648 return pDict;
649 }
650 if (first_char == '[') {
651 if (!bAllowNestedArray && bInArray) {
652 return NULL;
653 }
654 CPDF_Array* pArray = new CPDF_Array;
655 while (1) {
656 CPDF_Object* pObj = ReadNextObject(bAllowNestedArray, TRUE);
657 if (pObj) {
658 pArray->Add(pObj);
659 continue;
660 }
661
662 if (m_WordSize == 0 || m_WordBuffer[0] == ']')
663 break;
664 }
665 return pArray;
666 }
667 if (m_WordSize == 4) {
668 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) {
669 return new CPDF_Boolean(TRUE);
670 }
671 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) {
672 return new CPDF_Null;
673 }
674 } else if (m_WordSize == 5) {
675 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') {
676 return new CPDF_Boolean(FALSE);
677 }
678 }
679 return NULL;
680 }
GetNextWord(FX_BOOL & bIsNumber)681 void CPDF_StreamParser::GetNextWord(FX_BOOL& bIsNumber) {
682 m_WordSize = 0;
683 bIsNumber = TRUE;
684 if (!PositionIsInBounds())
685 return;
686
687 int ch = m_pBuf[m_Pos++];
688 while (1) {
689 while (PDFCharIsWhitespace(ch)) {
690 if (!PositionIsInBounds()) {
691 return;
692 }
693 ch = m_pBuf[m_Pos++];
694 }
695
696 if (ch != '%')
697 break;
698
699 while (1) {
700 if (!PositionIsInBounds())
701 return;
702 ch = m_pBuf[m_Pos++];
703 if (PDFCharIsLineEnding(ch))
704 break;
705 }
706 }
707
708 if (PDFCharIsDelimiter(ch)) {
709 bIsNumber = FALSE;
710 m_WordBuffer[m_WordSize++] = ch;
711 if (ch == '/') {
712 while (1) {
713 if (!PositionIsInBounds())
714 return;
715 ch = m_pBuf[m_Pos++];
716 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
717 m_Pos--;
718 return;
719 }
720
721 if (m_WordSize < MAX_WORD_BUFFER)
722 m_WordBuffer[m_WordSize++] = ch;
723 }
724 } else if (ch == '<') {
725 if (!PositionIsInBounds())
726 return;
727 ch = m_pBuf[m_Pos++];
728 if (ch == '<')
729 m_WordBuffer[m_WordSize++] = ch;
730 else
731 m_Pos--;
732 } else if (ch == '>') {
733 if (!PositionIsInBounds())
734 return;
735 ch = m_pBuf[m_Pos++];
736 if (ch == '>')
737 m_WordBuffer[m_WordSize++] = ch;
738 else
739 m_Pos--;
740 }
741 return;
742 }
743
744 while (1) {
745 if (m_WordSize < MAX_WORD_BUFFER)
746 m_WordBuffer[m_WordSize++] = ch;
747 if (!PDFCharIsNumeric(ch))
748 bIsNumber = FALSE;
749
750 if (!PositionIsInBounds())
751 return;
752 ch = m_pBuf[m_Pos++];
753 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
754 m_Pos--;
755 break;
756 }
757 }
758 }
759
ReadString()760 CFX_ByteString CPDF_StreamParser::ReadString() {
761 if (!PositionIsInBounds())
762 return CFX_ByteString();
763
764 int ch = m_pBuf[m_Pos++];
765 CFX_ByteTextBuf buf;
766 int parlevel = 0;
767 int status = 0, iEscCode = 0;
768 while (1) {
769 switch (status) {
770 case 0:
771 if (ch == ')') {
772 if (parlevel == 0) {
773 if (buf.GetLength() > MAX_STRING_LENGTH) {
774 return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
775 }
776 return buf.GetByteString();
777 }
778 parlevel--;
779 buf.AppendChar(')');
780 } else if (ch == '(') {
781 parlevel++;
782 buf.AppendChar('(');
783 } else if (ch == '\\') {
784 status = 1;
785 } else {
786 buf.AppendChar((char)ch);
787 }
788 break;
789 case 1:
790 if (ch >= '0' && ch <= '7') {
791 iEscCode = FXSYS_toDecimalDigit(ch);
792 status = 2;
793 break;
794 }
795 if (ch == 'n') {
796 buf.AppendChar('\n');
797 } else if (ch == 'r') {
798 buf.AppendChar('\r');
799 } else if (ch == 't') {
800 buf.AppendChar('\t');
801 } else if (ch == 'b') {
802 buf.AppendChar('\b');
803 } else if (ch == 'f') {
804 buf.AppendChar('\f');
805 } else if (ch == '\r') {
806 status = 4;
807 break;
808 } else if (ch == '\n') {
809 } else {
810 buf.AppendChar(ch);
811 }
812 status = 0;
813 break;
814 case 2:
815 if (ch >= '0' && ch <= '7') {
816 iEscCode = iEscCode * 8 + FXSYS_toDecimalDigit(ch);
817 status = 3;
818 } else {
819 buf.AppendChar(iEscCode);
820 status = 0;
821 continue;
822 }
823 break;
824 case 3:
825 if (ch >= '0' && ch <= '7') {
826 iEscCode = iEscCode * 8 + FXSYS_toDecimalDigit(ch);
827 buf.AppendChar(iEscCode);
828 status = 0;
829 } else {
830 buf.AppendChar(iEscCode);
831 status = 0;
832 continue;
833 }
834 break;
835 case 4:
836 status = 0;
837 if (ch != '\n') {
838 continue;
839 }
840 break;
841 }
842 if (!PositionIsInBounds())
843 break;
844
845 ch = m_pBuf[m_Pos++];
846 }
847 if (PositionIsInBounds())
848 ch = m_pBuf[m_Pos++];
849
850 if (buf.GetLength() > MAX_STRING_LENGTH) {
851 return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
852 }
853 return buf.GetByteString();
854 }
ReadHexString()855 CFX_ByteString CPDF_StreamParser::ReadHexString() {
856 if (!PositionIsInBounds())
857 return CFX_ByteString();
858
859 CFX_ByteTextBuf buf;
860 bool bFirst = true;
861 int code = 0;
862 while (PositionIsInBounds()) {
863 int ch = m_pBuf[m_Pos++];
864
865 if (ch == '>')
866 break;
867
868 if (!std::isxdigit(ch))
869 continue;
870
871 int val = FXSYS_toHexDigit(ch);
872 if (bFirst) {
873 code = val * 16;
874 } else {
875 code += val;
876 buf.AppendByte((uint8_t)code);
877 }
878 bFirst = !bFirst;
879 }
880 if (!bFirst)
881 buf.AppendChar((char)code);
882
883 if (buf.GetLength() > MAX_STRING_LENGTH)
884 return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
885
886 return buf.GetByteString();
887 }
888
PositionIsInBounds() const889 bool CPDF_StreamParser::PositionIsInBounds() const {
890 return m_Pos < m_Size;
891 }
892
CPDF_ContentParser()893 CPDF_ContentParser::CPDF_ContentParser()
894 : m_Status(Ready),
895 m_InternalStage(STAGE_GETCONTENT),
896 m_pObjects(nullptr),
897 m_bForm(false),
898 m_pType3Char(nullptr),
899 m_pData(nullptr),
900 m_Size(0),
901 m_CurrentOffset(0) {}
~CPDF_ContentParser()902 CPDF_ContentParser::~CPDF_ContentParser() {
903 if (!m_pSingleStream)
904 FX_Free(m_pData);
905 }
Start(CPDF_Page * pPage,CPDF_ParseOptions * pOptions)906 void CPDF_ContentParser::Start(CPDF_Page* pPage, CPDF_ParseOptions* pOptions) {
907 if (m_Status != Ready || !pPage || !pPage->m_pDocument ||
908 !pPage->m_pFormDict) {
909 m_Status = Done;
910 return;
911 }
912 m_pObjects = pPage;
913 m_bForm = FALSE;
914 if (pOptions) {
915 m_Options = *pOptions;
916 }
917 m_Status = ToBeContinued;
918 m_InternalStage = STAGE_GETCONTENT;
919 m_CurrentOffset = 0;
920
921 CPDF_Object* pContent = pPage->m_pFormDict->GetElementValue("Contents");
922 if (!pContent) {
923 m_Status = Done;
924 return;
925 }
926 if (CPDF_Stream* pStream = pContent->AsStream()) {
927 m_nStreams = 0;
928 m_pSingleStream.reset(new CPDF_StreamAcc);
929 m_pSingleStream->LoadAllData(pStream, FALSE);
930 } else if (CPDF_Array* pArray = pContent->AsArray()) {
931 m_nStreams = pArray->GetCount();
932 if (m_nStreams)
933 m_StreamArray.resize(m_nStreams);
934 else
935 m_Status = Done;
936 } else {
937 m_Status = Done;
938 }
939 }
Start(CPDF_Form * pForm,CPDF_AllStates * pGraphicStates,CFX_Matrix * pParentMatrix,CPDF_Type3Char * pType3Char,CPDF_ParseOptions * pOptions,int level)940 void CPDF_ContentParser::Start(CPDF_Form* pForm,
941 CPDF_AllStates* pGraphicStates,
942 CFX_Matrix* pParentMatrix,
943 CPDF_Type3Char* pType3Char,
944 CPDF_ParseOptions* pOptions,
945 int level) {
946 m_pType3Char = pType3Char;
947 m_pObjects = pForm;
948 m_bForm = TRUE;
949 CFX_Matrix form_matrix = pForm->m_pFormDict->GetMatrix("Matrix");
950 if (pGraphicStates) {
951 form_matrix.Concat(pGraphicStates->m_CTM);
952 }
953 CPDF_Array* pBBox = pForm->m_pFormDict->GetArray("BBox");
954 CFX_FloatRect form_bbox;
955 CPDF_Path ClipPath;
956 if (pBBox) {
957 form_bbox = pBBox->GetRect();
958 ClipPath.New();
959 ClipPath.AppendRect(form_bbox.left, form_bbox.bottom, form_bbox.right,
960 form_bbox.top);
961 ClipPath.Transform(&form_matrix);
962 if (pParentMatrix) {
963 ClipPath.Transform(pParentMatrix);
964 }
965 form_bbox.Transform(&form_matrix);
966 if (pParentMatrix) {
967 form_bbox.Transform(pParentMatrix);
968 }
969 }
970 CPDF_Dictionary* pResources = pForm->m_pFormDict->GetDict("Resources");
971 m_pParser.reset(new CPDF_StreamContentParser(
972 pForm->m_pDocument, pForm->m_pPageResources, pForm->m_pResources,
973 pParentMatrix, pForm, pResources, &form_bbox, pOptions, pGraphicStates,
974 level));
975 m_pParser->GetCurStates()->m_CTM = form_matrix;
976 m_pParser->GetCurStates()->m_ParentMatrix = form_matrix;
977 if (ClipPath.NotNull()) {
978 m_pParser->GetCurStates()->m_ClipPath.AppendPath(ClipPath, FXFILL_WINDING,
979 TRUE);
980 }
981 if (pForm->m_Transparency & PDFTRANS_GROUP) {
982 CPDF_GeneralStateData* pData =
983 m_pParser->GetCurStates()->m_GeneralState.GetModify();
984 pData->m_BlendType = FXDIB_BLEND_NORMAL;
985 pData->m_StrokeAlpha = 1.0f;
986 pData->m_FillAlpha = 1.0f;
987 pData->m_pSoftMask = NULL;
988 }
989 m_nStreams = 0;
990 m_pSingleStream.reset(new CPDF_StreamAcc);
991 m_pSingleStream->LoadAllData(pForm->m_pFormStream, FALSE);
992 m_pData = (uint8_t*)m_pSingleStream->GetData();
993 m_Size = m_pSingleStream->GetSize();
994 m_Status = ToBeContinued;
995 m_InternalStage = STAGE_PARSE;
996 m_CurrentOffset = 0;
997 }
Continue(IFX_Pause * pPause)998 void CPDF_ContentParser::Continue(IFX_Pause* pPause) {
999 int steps = 0;
1000 while (m_Status == ToBeContinued) {
1001 if (m_InternalStage == STAGE_GETCONTENT) {
1002 if (m_CurrentOffset == m_nStreams) {
1003 if (!m_StreamArray.empty()) {
1004 FX_SAFE_DWORD safeSize = 0;
1005 for (const auto& stream : m_StreamArray) {
1006 safeSize += stream->GetSize();
1007 safeSize += 1;
1008 }
1009 if (!safeSize.IsValid()) {
1010 m_Status = Done;
1011 return;
1012 }
1013 m_Size = safeSize.ValueOrDie();
1014 m_pData = FX_Alloc(uint8_t, m_Size);
1015 FX_DWORD pos = 0;
1016 for (const auto& stream : m_StreamArray) {
1017 FXSYS_memcpy(m_pData + pos, stream->GetData(), stream->GetSize());
1018 pos += stream->GetSize();
1019 m_pData[pos++] = ' ';
1020 }
1021 m_StreamArray.clear();
1022 } else {
1023 m_pData = (uint8_t*)m_pSingleStream->GetData();
1024 m_Size = m_pSingleStream->GetSize();
1025 }
1026 m_InternalStage = STAGE_PARSE;
1027 m_CurrentOffset = 0;
1028 } else {
1029 CPDF_Array* pContent = m_pObjects->m_pFormDict->GetArray("Contents");
1030 m_StreamArray[m_CurrentOffset].reset(new CPDF_StreamAcc);
1031 CPDF_Stream* pStreamObj = ToStream(
1032 pContent ? pContent->GetElementValue(m_CurrentOffset) : nullptr);
1033 m_StreamArray[m_CurrentOffset]->LoadAllData(pStreamObj, FALSE);
1034 m_CurrentOffset++;
1035 }
1036 }
1037 if (m_InternalStage == STAGE_PARSE) {
1038 if (!m_pParser) {
1039 m_pParser.reset(new CPDF_StreamContentParser(
1040 m_pObjects->m_pDocument, m_pObjects->m_pPageResources, nullptr,
1041 nullptr, m_pObjects, m_pObjects->m_pResources, &m_pObjects->m_BBox,
1042 &m_Options, nullptr, 0));
1043 m_pParser->GetCurStates()->m_ColorState.GetModify()->Default();
1044 }
1045 if (m_CurrentOffset >= m_Size) {
1046 m_InternalStage = STAGE_CHECKCLIP;
1047 } else {
1048 m_CurrentOffset +=
1049 m_pParser->Parse(m_pData + m_CurrentOffset,
1050 m_Size - m_CurrentOffset, PARSE_STEP_LIMIT);
1051 }
1052 }
1053 if (m_InternalStage == STAGE_CHECKCLIP) {
1054 if (m_pType3Char) {
1055 m_pType3Char->m_bColored = m_pParser->IsColored();
1056 m_pType3Char->m_Width =
1057 FXSYS_round(m_pParser->GetType3Data()[0] * 1000);
1058 m_pType3Char->m_BBox.left =
1059 FXSYS_round(m_pParser->GetType3Data()[2] * 1000);
1060 m_pType3Char->m_BBox.bottom =
1061 FXSYS_round(m_pParser->GetType3Data()[3] * 1000);
1062 m_pType3Char->m_BBox.right =
1063 FXSYS_round(m_pParser->GetType3Data()[4] * 1000);
1064 m_pType3Char->m_BBox.top =
1065 FXSYS_round(m_pParser->GetType3Data()[5] * 1000);
1066 }
1067 FX_POSITION pos = m_pObjects->m_ObjectList.GetHeadPosition();
1068 while (pos) {
1069 CPDF_PageObject* pObj =
1070 (CPDF_PageObject*)m_pObjects->m_ObjectList.GetNext(pos);
1071 if (pObj->m_ClipPath.IsNull()) {
1072 continue;
1073 }
1074 if (pObj->m_ClipPath.GetPathCount() != 1) {
1075 continue;
1076 }
1077 if (pObj->m_ClipPath.GetTextCount()) {
1078 continue;
1079 }
1080 CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0);
1081 if (!ClipPath.IsRect() || pObj->m_Type == PDFPAGE_SHADING) {
1082 continue;
1083 }
1084 CFX_FloatRect old_rect(ClipPath.GetPointX(0), ClipPath.GetPointY(0),
1085 ClipPath.GetPointX(2), ClipPath.GetPointY(2));
1086 CFX_FloatRect obj_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right,
1087 pObj->m_Top);
1088 if (old_rect.Contains(obj_rect)) {
1089 pObj->m_ClipPath.SetNull();
1090 }
1091 }
1092 m_Status = Done;
1093 return;
1094 }
1095 steps++;
1096 if (pPause && pPause->NeedToPauseNow()) {
1097 break;
1098 }
1099 }
1100 }
1101