1 // Copyright 2014 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "../../../include/fpdfapi/fpdf_parser.h"
8 const char PDF_CharType[256] = {
9 //NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI
10 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W', 'W', 'R', 'W', 'W', 'R', 'R',
11
12 //DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US
13 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
14
15 //SP ! " # $ % & ´ ( ) * + , - . /
16 'W', 'R', 'R', 'R', 'R', 'D', 'R', 'R', 'D', 'D', 'R', 'N', 'R', 'N', 'N', 'D',
17
18 // 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
19 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', 'R', 'R', 'D', 'R', 'D', 'R',
20
21 // @ A B C D E F G H I J K L M N O
22 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
23
24 // P Q R S T U V W X Y Z [ \ ] ^ _
25 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R', 'R',
26
27 // ` a b c d e f g h i j k l m n o
28 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
29
30 // p q r s t u v w x y z { | } ~ DEL
31 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'D', 'R', 'D', 'R', 'R',
32
33 'W', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
34 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
35 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
36 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
37 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
38 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
39 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R',
40 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'W'
41 };
42
43 #ifndef MAX_PATH
44 #define MAX_PATH 4096
45 #endif
CPDF_SimpleParser(FX_LPCBYTE pData,FX_DWORD dwSize)46 CPDF_SimpleParser::CPDF_SimpleParser(FX_LPCBYTE pData, FX_DWORD dwSize)
47 {
48 m_pData = pData;
49 m_dwSize = dwSize;
50 m_dwCurPos = 0;
51 }
CPDF_SimpleParser(FX_BSTR str)52 CPDF_SimpleParser::CPDF_SimpleParser(FX_BSTR str)
53 {
54 m_pData = str.GetPtr();
55 m_dwSize = str.GetLength();
56 m_dwCurPos = 0;
57 }
ParseWord(FX_LPCBYTE & pStart,FX_DWORD & dwSize,int & type)58 void CPDF_SimpleParser::ParseWord(FX_LPCBYTE& pStart, FX_DWORD& dwSize, int& type)
59 {
60 pStart = NULL;
61 dwSize = 0;
62 type = PDFWORD_EOF;
63 FX_BYTE ch;
64 char chartype;
65 while (1) {
66 if (m_dwSize <= m_dwCurPos) {
67 return;
68 }
69 ch = m_pData[m_dwCurPos++];
70 chartype = PDF_CharType[ch];
71 while (chartype == 'W') {
72 if (m_dwSize <= m_dwCurPos) {
73 return;
74 }
75 ch = m_pData[m_dwCurPos++];
76 chartype = PDF_CharType[ch];
77 }
78 if (ch != '%') {
79 break;
80 }
81 while (1) {
82 if (m_dwSize <= m_dwCurPos) {
83 return;
84 }
85 ch = m_pData[m_dwCurPos++];
86 if (ch == '\r' || ch == '\n') {
87 break;
88 }
89 }
90 chartype = PDF_CharType[ch];
91 }
92 FX_DWORD start_pos = m_dwCurPos - 1;
93 pStart = m_pData + start_pos;
94 if (chartype == 'D') {
95 if (ch == '/') {
96 while (1) {
97 if (m_dwSize <= m_dwCurPos) {
98 return;
99 }
100 ch = m_pData[m_dwCurPos++];
101 chartype = PDF_CharType[ch];
102 if (chartype != 'R' && chartype != 'N') {
103 m_dwCurPos --;
104 dwSize = m_dwCurPos - start_pos;
105 type = PDFWORD_NAME;
106 return;
107 }
108 }
109 } else {
110 type = PDFWORD_DELIMITER;
111 dwSize = 1;
112 if (ch == '<') {
113 if (m_dwSize <= m_dwCurPos) {
114 return;
115 }
116 ch = m_pData[m_dwCurPos++];
117 if (ch == '<') {
118 dwSize = 2;
119 } else {
120 m_dwCurPos --;
121 }
122 } else if (ch == '>') {
123 if (m_dwSize <= m_dwCurPos) {
124 return;
125 }
126 ch = m_pData[m_dwCurPos++];
127 if (ch == '>') {
128 dwSize = 2;
129 } else {
130 m_dwCurPos --;
131 }
132 }
133 }
134 return;
135 }
136 type = PDFWORD_NUMBER;
137 dwSize = 1;
138 while (1) {
139 if (chartype != 'N') {
140 type = PDFWORD_TEXT;
141 }
142 if (m_dwSize <= m_dwCurPos) {
143 return;
144 }
145 ch = m_pData[m_dwCurPos++];
146 chartype = PDF_CharType[ch];
147 if (chartype == 'D' || chartype == 'W') {
148 m_dwCurPos --;
149 break;
150 }
151 dwSize ++;
152 }
153 }
GetWord()154 CFX_ByteStringC CPDF_SimpleParser::GetWord()
155 {
156 FX_LPCBYTE pStart;
157 FX_DWORD dwSize;
158 int type;
159 ParseWord(pStart, dwSize, type);
160 if (dwSize == 1 && pStart[0] == '<') {
161 while (m_dwCurPos < m_dwSize && m_pData[m_dwCurPos] != '>') {
162 m_dwCurPos ++;
163 }
164 if (m_dwCurPos < m_dwSize) {
165 m_dwCurPos ++;
166 }
167 return CFX_ByteStringC(pStart, (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
168 } else if (dwSize == 1 && pStart[0] == '(') {
169 int level = 1;
170 while (m_dwCurPos < m_dwSize) {
171 if (m_pData[m_dwCurPos] == ')') {
172 level --;
173 if (level == 0) {
174 break;
175 }
176 }
177 if (m_pData[m_dwCurPos] == '\\') {
178 if (m_dwSize <= m_dwCurPos) {
179 break;
180 }
181 m_dwCurPos ++;
182 } else if (m_pData[m_dwCurPos] == '(') {
183 level ++;
184 }
185 if (m_dwSize <= m_dwCurPos) {
186 break;
187 }
188 m_dwCurPos ++;
189 }
190 if (m_dwCurPos < m_dwSize) {
191 m_dwCurPos ++;
192 }
193 return CFX_ByteStringC(pStart, (FX_STRSIZE)(m_dwCurPos - (pStart - m_pData)));
194 }
195 return CFX_ByteStringC(pStart, dwSize);
196 }
SearchToken(FX_BSTR token)197 FX_BOOL CPDF_SimpleParser::SearchToken(FX_BSTR token)
198 {
199 int token_len = token.GetLength();
200 while (m_dwCurPos < m_dwSize - token_len) {
201 if (FXSYS_memcmp32(m_pData + m_dwCurPos, token.GetPtr(), token_len) == 0) {
202 break;
203 }
204 m_dwCurPos ++;
205 }
206 if (m_dwCurPos == m_dwSize - token_len) {
207 return FALSE;
208 }
209 m_dwCurPos += token_len;
210 return TRUE;
211 }
SkipWord(FX_BSTR token)212 FX_BOOL CPDF_SimpleParser::SkipWord(FX_BSTR token)
213 {
214 while (1) {
215 CFX_ByteStringC word = GetWord();
216 if (word.IsEmpty()) {
217 return FALSE;
218 }
219 if (word == token) {
220 return TRUE;
221 }
222 }
223 return FALSE;
224 }
FindTagPair(FX_BSTR start_token,FX_BSTR end_token,FX_DWORD & start_pos,FX_DWORD & end_pos)225 FX_BOOL CPDF_SimpleParser::FindTagPair(FX_BSTR start_token, FX_BSTR end_token,
226 FX_DWORD& start_pos, FX_DWORD& end_pos)
227 {
228 if (!start_token.IsEmpty()) {
229 if (!SkipWord(start_token)) {
230 return FALSE;
231 }
232 start_pos = m_dwCurPos;
233 }
234 while (1) {
235 end_pos = m_dwCurPos;
236 CFX_ByteStringC word = GetWord();
237 if (word.IsEmpty()) {
238 return FALSE;
239 }
240 if (word == end_token) {
241 return TRUE;
242 }
243 }
244 return FALSE;
245 }
FindTagParam(FX_BSTR token,int nParams)246 FX_BOOL CPDF_SimpleParser::FindTagParam(FX_BSTR token, int nParams)
247 {
248 nParams ++;
249 FX_DWORD* pBuf = FX_Alloc(FX_DWORD, nParams);
250 int buf_index = 0;
251 int buf_count = 0;
252 while (1) {
253 pBuf[buf_index++] = m_dwCurPos;
254 if (buf_index == nParams) {
255 buf_index = 0;
256 }
257 buf_count ++;
258 if (buf_count > nParams) {
259 buf_count = nParams;
260 }
261 CFX_ByteStringC word = GetWord();
262 if (word.IsEmpty()) {
263 FX_Free(pBuf);
264 return FALSE;
265 }
266 if (word == token) {
267 if (buf_count < nParams) {
268 continue;
269 }
270 m_dwCurPos = pBuf[buf_index];
271 FX_Free(pBuf);
272 return TRUE;
273 }
274 }
275 return FALSE;
276 }
_hex2dec(char ch)277 static int _hex2dec(char ch)
278 {
279 if (ch >= '0' && ch <= '9') {
280 return ch - '0';
281 }
282 if (ch >= 'a' && ch <= 'f') {
283 return ch - 'a' + 10;
284 }
285 if (ch >= 'A' && ch <= 'F') {
286 return ch - 'A' + 10;
287 }
288 return 0;
289 }
PDF_NameDecode(FX_BSTR bstr)290 CFX_ByteString PDF_NameDecode(FX_BSTR bstr)
291 {
292 int size = bstr.GetLength();
293 FX_LPCSTR pSrc = bstr.GetCStr();
294 if (FXSYS_memchr(pSrc, '#', size) == NULL) {
295 return bstr;
296 }
297 CFX_ByteString result;
298 FX_LPSTR pDestStart = result.GetBuffer(size);
299 FX_LPSTR pDest = pDestStart;
300 for (int i = 0; i < size; i ++) {
301 if (pSrc[i] == '#' && i < size - 2) {
302 *pDest ++ = _hex2dec(pSrc[i + 1]) * 16 + _hex2dec(pSrc[i + 2]);
303 i += 2;
304 } else {
305 *pDest ++ = pSrc[i];
306 }
307 }
308 result.ReleaseBuffer((FX_STRSIZE)(pDest - pDestStart));
309 return result;
310 }
PDF_NameDecode(const CFX_ByteString & orig)311 CFX_ByteString PDF_NameDecode(const CFX_ByteString& orig)
312 {
313 if (FXSYS_memchr(orig.c_str(), '#', orig.GetLength()) == NULL) {
314 return orig;
315 }
316 return PDF_NameDecode(CFX_ByteStringC(orig));
317 }
PDF_NameEncode(const CFX_ByteString & orig)318 CFX_ByteString PDF_NameEncode(const CFX_ByteString& orig)
319 {
320 FX_LPBYTE src_buf = (FX_LPBYTE)orig.c_str();
321 int src_len = orig.GetLength();
322 int dest_len = 0;
323 int i;
324 for (i = 0; i < src_len; i ++) {
325 FX_BYTE ch = src_buf[i];
326 if (ch >= 0x80 || PDF_CharType[ch] == 'W' || ch == '#' ||
327 PDF_CharType[ch] == 'D') {
328 dest_len += 3;
329 } else {
330 dest_len ++;
331 }
332 }
333 if (dest_len == src_len) {
334 return orig;
335 }
336 CFX_ByteString res;
337 FX_LPSTR dest_buf = res.GetBuffer(dest_len);
338 dest_len = 0;
339 for (i = 0; i < src_len; i ++) {
340 FX_BYTE ch = src_buf[i];
341 if (ch >= 0x80 || PDF_CharType[ch] == 'W' || ch == '#' ||
342 PDF_CharType[ch] == 'D') {
343 dest_buf[dest_len++] = '#';
344 dest_buf[dest_len++] = "0123456789ABCDEF"[ch / 16];
345 dest_buf[dest_len++] = "0123456789ABCDEF"[ch % 16];
346 } else {
347 dest_buf[dest_len++] = ch;
348 }
349 }
350 dest_buf[dest_len] = 0;
351 res.ReleaseBuffer();
352 return res;
353 }
operator <<(CFX_ByteTextBuf & buf,const CPDF_Object * pObj)354 CFX_ByteTextBuf& operator << (CFX_ByteTextBuf& buf, const CPDF_Object* pObj)
355 {
356 if (pObj == NULL) {
357 buf << FX_BSTRC(" null");
358 return buf;
359 }
360 switch (pObj->GetType()) {
361 case PDFOBJ_NULL:
362 buf << FX_BSTRC(" null");
363 break;
364 case PDFOBJ_BOOLEAN:
365 case PDFOBJ_NUMBER:
366 buf << " " << pObj->GetString();
367 break;
368 case PDFOBJ_STRING: {
369 CFX_ByteString str = pObj->GetString();
370 FX_BOOL bHex = ((CPDF_String*)pObj)->IsHex();
371 buf << PDF_EncodeString(str, bHex);
372 break;
373 }
374 case PDFOBJ_NAME: {
375 CFX_ByteString str = pObj->GetString();
376 buf << FX_BSTRC("/") << PDF_NameEncode(str);
377 break;
378 }
379 case PDFOBJ_REFERENCE: {
380 CPDF_Reference* p = (CPDF_Reference*)pObj;
381 buf << " " << p->GetRefObjNum() << FX_BSTRC(" 0 R ");
382 break;
383 }
384 case PDFOBJ_ARRAY: {
385 CPDF_Array* p = (CPDF_Array*)pObj;
386 buf << FX_BSTRC("[");
387 for (FX_DWORD i = 0; i < p->GetCount(); i ++) {
388 CPDF_Object* pElement = p->GetElement(i);
389 if (pElement->GetObjNum()) {
390 buf << " " << pElement->GetObjNum() << FX_BSTRC(" 0 R");
391 } else {
392 buf << pElement;
393 }
394 }
395 buf << FX_BSTRC("]");
396 break;
397 }
398 case PDFOBJ_DICTIONARY: {
399 CPDF_Dictionary* p = (CPDF_Dictionary*)pObj;
400 buf << FX_BSTRC("<<");
401 FX_POSITION pos = p->GetStartPos();
402 while (pos) {
403 CFX_ByteString key;
404 CPDF_Object* pValue = p->GetNextElement(pos, key);
405 buf << FX_BSTRC("/") << PDF_NameEncode(key);
406 if (pValue->GetObjNum()) {
407 buf << " " << pValue->GetObjNum() << FX_BSTRC(" 0 R ");
408 } else {
409 buf << pValue;
410 }
411 }
412 buf << FX_BSTRC(">>");
413 break;
414 }
415 case PDFOBJ_STREAM: {
416 CPDF_Stream* p = (CPDF_Stream*)pObj;
417 buf << p->GetDict() << FX_BSTRC("stream\r\n");
418 CPDF_StreamAcc acc;
419 acc.LoadAllData(p, TRUE);
420 buf.AppendBlock(acc.GetData(), acc.GetSize());
421 buf << FX_BSTRC("\r\nendstream");
422 break;
423 }
424 default:
425 ASSERT(FALSE);
426 break;
427 }
428 return buf;
429 }
PDF_ClipFloat(FX_FLOAT f)430 FX_FLOAT PDF_ClipFloat(FX_FLOAT f)
431 {
432 if (f < 0) {
433 return 0;
434 }
435 if (f > 1.0f) {
436 return 1.0f;
437 }
438 return f;
439 }
SearchNumberNode(CPDF_Dictionary * pNode,int num)440 static CPDF_Object* SearchNumberNode(CPDF_Dictionary* pNode, int num)
441 {
442 CPDF_Array* pLimits = pNode->GetArray("Limits");
443 if (pLimits && (num < pLimits->GetInteger(0) || num > pLimits->GetInteger(1))) {
444 return NULL;
445 }
446 CPDF_Array* pNumbers = pNode->GetArray("Nums");
447 if (pNumbers) {
448 FX_DWORD dwCount = pNumbers->GetCount() / 2;
449 for (FX_DWORD i = 0; i < dwCount; i ++) {
450 int index = pNumbers->GetInteger(i * 2);
451 if (num == index) {
452 return pNumbers->GetElementValue(i * 2 + 1);
453 }
454 if (index > num) {
455 break;
456 }
457 }
458 return NULL;
459 }
460 CPDF_Array* pKids = pNode->GetArray("Kids");
461 if (pKids == NULL) {
462 return NULL;
463 }
464 for (FX_DWORD i = 0; i < pKids->GetCount(); i ++) {
465 CPDF_Dictionary* pKid = pKids->GetDict(i);
466 if (pKid == NULL) {
467 continue;
468 }
469 CPDF_Object* pFound = SearchNumberNode(pKid, num);
470 if (pFound) {
471 return pFound;
472 }
473 }
474 return NULL;
475 }
LookupValue(int num)476 CPDF_Object* CPDF_NumberTree::LookupValue(int num)
477 {
478 return SearchNumberNode(m_pRoot, num);
479 }
480