1 /*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "SkPdfNativeDoc.h"
9
10 #include <stdio.h>
11 #include <string.h>
12 #include <sys/types.h>
13 #include <sys/stat.h>
14
15 #include "SkPdfMapper_autogen.h"
16 #include "SkPdfNativeObject.h"
17 #include "SkPdfNativeTokenizer.h"
18 #include "SkPdfReporter.h"
19 #include "SkStream.h"
20
21 // TODO(edisonn): for some reason on mac these files are found here, but are found from headers
22 //#include "SkPdfFileTrailerDictionary_autogen.h"
23 //#include "SkPdfCatalogDictionary_autogen.h"
24 //#include "SkPdfPageObjectDictionary_autogen.h"
25 //#include "SkPdfPageTreeNodeDictionary_autogen.h"
26 #include "SkPdfHeaders_autogen.h"
27
getFileSize(const char * filename)28 static long getFileSize(const char* filename)
29 {
30 struct stat stat_buf;
31 int rc = stat(filename, &stat_buf);
32 return rc == 0 ? (long)stat_buf.st_size : -1;
33 }
34
lineHome(const unsigned char * start,const unsigned char * current)35 static const unsigned char* lineHome(const unsigned char* start, const unsigned char* current) {
36 while (current > start && !isPdfEOL(*(current - 1))) {
37 current--;
38 }
39 return current;
40 }
41
previousLineHome(const unsigned char * start,const unsigned char * current)42 static const unsigned char* previousLineHome(const unsigned char* start,
43 const unsigned char* current) {
44 if (current > start && isPdfEOL(*(current - 1))) {
45 current--;
46 }
47
48 // allows CR+LF, LF+CR but not two CR+CR or LF+LF
49 if (current > start && isPdfEOL(*(current - 1)) && *current != *(current - 1)) {
50 current--;
51 }
52
53 while (current > start && !isPdfEOL(*(current - 1))) {
54 current--;
55 }
56
57 return current;
58 }
59
ignoreLine(const unsigned char * current,const unsigned char * end)60 static const unsigned char* ignoreLine(const unsigned char* current, const unsigned char* end) {
61 while (current < end && !isPdfEOL(*current)) {
62 current++;
63 }
64 current++;
65 if (current < end && isPdfEOL(*current) && *current != *(current - 1)) {
66 current++;
67 }
68 return current;
69 }
70
71 SkPdfNativeDoc* gDoc = NULL;
72
SkPdfNativeDoc(SkStream * stream)73 SkPdfNativeDoc::SkPdfNativeDoc(SkStream* stream)
74 : fAllocator(new SkPdfAllocator())
75 , fFileContent(NULL)
76 , fContentLength(0)
77 , fRootCatalogRef(NULL)
78 , fRootCatalog(NULL) {
79 size_t size = stream->getLength();
80 void* ptr = sk_malloc_throw(size);
81 stream->read(ptr, size);
82
83 init(ptr, size);
84 }
85
SkPdfNativeDoc(const char * path)86 SkPdfNativeDoc::SkPdfNativeDoc(const char* path)
87 : fAllocator(new SkPdfAllocator())
88 , fFileContent(NULL)
89 , fContentLength(0)
90 , fRootCatalogRef(NULL)
91 , fRootCatalog(NULL) {
92 gDoc = this;
93 FILE* file = fopen(path, "r");
94 // TODO(edisonn): put this in a function that can return NULL
95 if (file) {
96 size_t size = getFileSize(path);
97 void* content = sk_malloc_throw(size);
98 bool ok = (0 != fread(content, size, 1, file));
99 fclose(file);
100 if (!ok) {
101 sk_free(content);
102 SkPdfReport(kFatalError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
103 "could not read file", NULL, NULL);
104 // TODO(edisonn): not nice to return like this from constructor, create a static
105 // function that can report NULL for failures.
106 return; // Doc will have 0 pages
107 }
108
109 init(content, size);
110 }
111 }
112
init(const void * bytes,size_t length)113 void SkPdfNativeDoc::init(const void* bytes, size_t length) {
114 fFileContent = (const unsigned char*)bytes;
115 fContentLength = length;
116 const unsigned char* eofLine = lineHome(fFileContent, fFileContent + fContentLength - 1);
117 const unsigned char* xrefByteOffsetLine = previousLineHome(fFileContent, eofLine);
118 const unsigned char* xrefstartKeywordLine = previousLineHome(fFileContent, xrefByteOffsetLine);
119
120 if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) {
121 SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue,
122 "Could not find startxref", NULL, NULL);
123 }
124
125 long xrefByteOffset = atol((const char*)xrefByteOffsetLine);
126
127 bool storeCatalog = true;
128 while (xrefByteOffset >= 0) {
129 const unsigned char* trailerStart = this->readCrossReferenceSection(fFileContent + xrefByteOffset,
130 xrefstartKeywordLine);
131 xrefByteOffset = -1;
132 if (trailerStart < xrefstartKeywordLine) {
133 this->readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog, &xrefByteOffset, false);
134 storeCatalog = false;
135 }
136 }
137
138 // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == fRefCatalogGeneration
139 // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed using mapper
140
141 if (fRootCatalogRef) {
142 fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
143 if (fRootCatalog != NULL && fRootCatalog->isDictionary() && fRootCatalog->valid()) {
144 SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
145 if (tree && tree->isDictionary() && tree->valid()) {
146 fillPages(tree);
147 }
148 }
149 }
150
151 if (pages() == 0) {
152 // TODO(edisonn): probably it would be better to return NULL and make a clean document.
153 loadWithoutXRef();
154 }
155
156 // TODO(edisonn): corrupted pdf, read it from beginning and rebuild
157 // (xref, trailer, or just read all objects)
158 }
159
loadWithoutXRef()160 void SkPdfNativeDoc::loadWithoutXRef() {
161 const unsigned char* current = fFileContent;
162 const unsigned char* end = fFileContent + fContentLength;
163
164 // TODO(edisonn): read pdf version
165 current = ignoreLine(current, end);
166
167 current = skipPdfWhiteSpaces(current, end);
168 while (current < end) {
169 SkPdfNativeObject token;
170 current = nextObject(current, end, &token, NULL, NULL);
171 if (token.isInteger()) {
172 int id = (int)token.intValue();
173
174 token.reset();
175 current = nextObject(current, end, &token, NULL, NULL);
176 // TODO(edisonn): generation ignored for now (used in pdfs with updates)
177 // int generation = (int)token.intValue();
178
179 token.reset();
180 current = nextObject(current, end, &token, NULL, NULL);
181 // TODO(edisonn): keywork must be "obj". Add ability to report error instead ignoring.
182 if (!token.isKeyword("obj")) {
183 SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue,
184 "Could not find obj", NULL, NULL);
185 continue;
186 }
187
188 while (fObjects.count() < id + 1) {
189 reset(fObjects.append());
190 }
191
192 fObjects[id].fOffset = SkToInt(current - fFileContent);
193
194 SkPdfNativeObject* obj = fAllocator->allocObject();
195 current = nextObject(current, end, obj, fAllocator, this);
196
197 fObjects[id].fResolvedReference = obj;
198 fObjects[id].fObj = obj;
199 fObjects[id].fIsReferenceResolved = true;
200 } else if (token.isKeyword("trailer")) {
201 long dummy;
202 current = readTrailer(current, end, true, &dummy, true);
203 } else if (token.isKeyword("startxref")) {
204 token.reset();
205 current = nextObject(current, end, &token, NULL, NULL); // ignore startxref
206 }
207
208 current = skipPdfWhiteSpaces(current, end);
209 }
210
211 // TODO(edisonn): quick hack, detect root catalog. When we implement linearized support we
212 // might not need it.
213 if (!fRootCatalogRef) {
214 for (unsigned int i = 0 ; i < objects(); i++) {
215 SkPdfNativeObject* obj = object(i);
216 SkPdfNativeObject* root = (obj && obj->isDictionary()) ? obj->get("Root") : NULL;
217 if (root && root->isReference()) {
218 fRootCatalogRef = root;
219 }
220 }
221 }
222
223 if (fRootCatalogRef) {
224 fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
225 if (fRootCatalog != NULL && fRootCatalog->isDictionary() && fRootCatalog->valid()) {
226 SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
227 if (tree && tree->isDictionary() && tree->valid()) {
228 fillPages(tree);
229 }
230 }
231 }
232
233
234 }
235
~SkPdfNativeDoc()236 SkPdfNativeDoc::~SkPdfNativeDoc() {
237 sk_free((void*)fFileContent);
238 delete fAllocator;
239 }
240
readCrossReferenceSection(const unsigned char * xrefStart,const unsigned char * trailerEnd)241 const unsigned char* SkPdfNativeDoc::readCrossReferenceSection(const unsigned char* xrefStart,
242 const unsigned char* trailerEnd) {
243 SkPdfNativeObject xref;
244 const unsigned char* current = nextObject(xrefStart, trailerEnd, &xref, NULL, NULL);
245
246 if (!xref.isKeyword("xref")) {
247 SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, "Could not find sref",
248 NULL, NULL);
249 return trailerEnd;
250 }
251
252 SkPdfNativeObject token;
253 while (current < trailerEnd) {
254 token.reset();
255 const unsigned char* previous = current;
256 current = nextObject(current, trailerEnd, &token, NULL, NULL);
257 if (!token.isInteger()) {
258 SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue,
259 "Done readCrossReferenceSection", NULL, NULL);
260 return previous;
261 }
262
263 int startId = (int)token.intValue();
264 token.reset();
265 current = nextObject(current, trailerEnd, &token, NULL, NULL);
266
267 if (!token.isInteger()) {
268 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection",
269 &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
270 return current;
271 }
272
273 int entries = (int)token.intValue();
274
275 for (int i = 0; i < entries; i++) {
276 token.reset();
277 current = nextObject(current, trailerEnd, &token, NULL, NULL);
278 if (!token.isInteger()) {
279 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
280 "readCrossReferenceSection",
281 &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
282 return current;
283 }
284 int offset = (int)token.intValue();
285
286 token.reset();
287 current = nextObject(current, trailerEnd, &token, NULL, NULL);
288 if (!token.isInteger()) {
289 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
290 "readCrossReferenceSection",
291 &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
292 return current;
293 }
294 int generation = (int)token.intValue();
295
296 token.reset();
297 current = nextObject(current, trailerEnd, &token, NULL, NULL);
298 if (!token.isKeyword() || token.lenstr() != 1 ||
299 (*token.c_str() != 'f' && *token.c_str() != 'n')) {
300 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
301 "readCrossReferenceSection: f or n expected",
302 &token, SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
303 return current;
304 }
305
306 this->addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f');
307 }
308 }
309 SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue,
310 "Unexpected end of readCrossReferenceSection", NULL, NULL);
311 return current;
312 }
313
readTrailer(const unsigned char * trailerStart,const unsigned char * trailerEnd,bool storeCatalog,long * prev,bool skipKeyword)314 const unsigned char* SkPdfNativeDoc::readTrailer(const unsigned char* trailerStart,
315 const unsigned char* trailerEnd,
316 bool storeCatalog, long* prev, bool skipKeyword) {
317 *prev = -1;
318
319 const unsigned char* current = trailerStart;
320 if (!skipKeyword) {
321 SkPdfNativeObject trailerKeyword;
322 // Use null allocator, and let it just fail if memory, it should not crash.
323 current = nextObject(current, trailerEnd, &trailerKeyword, NULL, NULL);
324
325 if (!trailerKeyword.isKeyword() || strlen("trailer") != trailerKeyword.lenstr() ||
326 strncmp(trailerKeyword.c_str(), "trailer", strlen("trailer")) != 0) {
327 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
328 "readTrailer: trailer keyword expected",
329 &trailerKeyword,
330 SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
331 return current;
332 }
333 }
334
335 SkPdfNativeObject token;
336 current = nextObject(current, trailerEnd, &token, fAllocator, NULL);
337 if (!token.isDictionary()) {
338 return current;
339 }
340 SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token;
341 if (!trailer->valid()) {
342 return current;
343 }
344
345 if (storeCatalog) {
346 SkPdfNativeObject* ref = trailer->Root(NULL);
347 if (ref == NULL || !ref->isReference()) {
348 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
349 "readTrailer: unexpected root reference",
350 ref, SkPdfNativeObject::kReference_PdfObjectType, NULL);
351 return current;
352 }
353 fRootCatalogRef = ref;
354 }
355
356 if (trailer->has_Prev()) {
357 *prev = (long)trailer->Prev(NULL);
358 }
359
360 return current;
361 }
362
addCrossSectionInfo(int id,int generation,int offset,bool isFreed)363 void SkPdfNativeDoc::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) {
364 // TODO(edisonn): security here, verify id
365 while (fObjects.count() < id + 1) {
366 this->reset(fObjects.append());
367 }
368
369 fObjects[id].fOffset = offset;
370 fObjects[id].fObj = NULL;
371 fObjects[id].fResolvedReference = NULL;
372 fObjects[id].fIsReferenceResolved = false;
373 }
374
readObject(int id)375 SkPdfNativeObject* SkPdfNativeDoc::readObject(int id/*, int expectedGeneration*/) {
376 long startOffset = fObjects[id].fOffset;
377 //long endOffset = fObjects[id].fOffsetEnd;
378 // TODO(edisonn): use hinted endOffset
379 const unsigned char* current = fFileContent + startOffset;
380 const unsigned char* end = fFileContent + fContentLength;
381
382 SkPdfNativeTokenizer tokenizer(current, (int) (end - current), fAllocator, this);
383
384 SkPdfNativeObject idObj;
385 SkPdfNativeObject generationObj;
386 SkPdfNativeObject objKeyword;
387 SkPdfNativeObject* dict = fAllocator->allocObject();
388
389 current = nextObject(current, end, &idObj, NULL, NULL);
390 if (current >= end) {
391 SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "reading id",
392 NULL, NULL);
393 return NULL;
394 }
395
396 current = nextObject(current, end, &generationObj, NULL, NULL);
397 if (current >= end) {
398 SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
399 "reading generation", NULL, NULL);
400 return NULL;
401 }
402
403 current = nextObject(current, end, &objKeyword, NULL, NULL);
404 if (current >= end) {
405 SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
406 "reading keyword obj", NULL, NULL);
407 return NULL;
408 }
409
410 if (!idObj.isInteger() || id != idObj.intValue()) {
411 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readObject: unexpected id",
412 &idObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
413 }
414
415 // TODO(edisonn): verify that the generation is the right one
416 if (!generationObj.isInteger() /* || generation != generationObj.intValue()*/) {
417 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
418 "readObject: unexpected generation",
419 &generationObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
420 }
421
422 if (!objKeyword.isKeyword() || strcmp(objKeyword.c_str(), "obj") != 0) {
423 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
424 "readObject: unexpected obj keyword",
425 &objKeyword, SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
426 }
427
428 current = nextObject(current, end, dict, fAllocator, this);
429
430 // TODO(edisonn): report warning/error - verify that the last token is endobj
431
432 return dict;
433 }
434
fillPages(SkPdfPageTreeNodeDictionary * tree)435 void SkPdfNativeDoc::fillPages(SkPdfPageTreeNodeDictionary* tree) {
436 SkPdfArray* kids = tree->Kids(this);
437 if (kids == NULL) {
438 *fPages.append() = (SkPdfPageObjectDictionary*)tree;
439 return;
440 }
441
442 int cnt = (int) kids->size();
443 for (int i = 0; i < cnt; i++) {
444 SkPdfNativeObject* obj = resolveReference(kids->objAtAIndex(i));
445 if (fMapper->mapPageObjectDictionary(obj) != kPageObjectDictionary_SkPdfNativeObjectType) {
446 *fPages.append() = (SkPdfPageObjectDictionary*)obj;
447 } else {
448 // TODO(edisonn): verify that it is a page tree indeed
449 fillPages((SkPdfPageTreeNodeDictionary*)obj);
450 }
451 }
452 }
453
pages() const454 int SkPdfNativeDoc::pages() const {
455 return fPages.count();
456 }
457
page(int page)458 SkPdfPageObjectDictionary* SkPdfNativeDoc::page(int page) {
459 SkASSERT(page >= 0 && page < fPages.count());
460 return fPages[page];
461 }
462
463
pageResources(int page)464 SkPdfResourceDictionary* SkPdfNativeDoc::pageResources(int page) {
465 SkASSERT(page >= 0 && page < fPages.count());
466 return fPages[page]->Resources(this);
467 }
468
469 // TODO(edisonn): Partial implemented.
470 // Move the logics directly in the code generator for inheritable and default values?
MediaBox(int page)471 SkRect SkPdfNativeDoc::MediaBox(int page) {
472 SkPdfPageObjectDictionary* current = fPages[page];
473 while (!current->has_MediaBox() && current->has_Parent()) {
474 current = (SkPdfPageObjectDictionary*)current->Parent(this);
475 }
476 if (current) {
477 return current->MediaBox(this);
478 }
479 return SkRect::MakeEmpty();
480 }
481
objects() const482 size_t SkPdfNativeDoc::objects() const {
483 return fObjects.count();
484 }
485
object(int i)486 SkPdfNativeObject* SkPdfNativeDoc::object(int i) {
487 SkASSERT(!(i < 0 || i > fObjects.count()));
488
489 if (i < 0 || i > fObjects.count()) {
490 return NULL;
491 }
492
493 if (fObjects[i].fObj == NULL) {
494 fObjects[i].fObj = readObject(i);
495 // TODO(edisonn): For perf, when we read the cross reference sections, we should take
496 // advantage of the boundaries of known objects, to minimize the risk of just parsing a bad
497 // stream, and fail quickly, in case we default to sequential stream read.
498 }
499
500 return fObjects[i].fObj;
501 }
502
mapper() const503 const SkPdfMapper* SkPdfNativeDoc::mapper() const {
504 return fMapper;
505 }
506
createReal(double value) const507 SkPdfReal* SkPdfNativeDoc::createReal(double value) const {
508 SkPdfNativeObject* obj = fAllocator->allocObject();
509 SkPdfNativeObject::makeReal(value, obj);
510 TRACK_OBJECT_SRC(obj);
511 return (SkPdfReal*)obj;
512 }
513
createInteger(int value) const514 SkPdfInteger* SkPdfNativeDoc::createInteger(int value) const {
515 SkPdfNativeObject* obj = fAllocator->allocObject();
516 SkPdfNativeObject::makeInteger(value, obj);
517 TRACK_OBJECT_SRC(obj);
518 return (SkPdfInteger*)obj;
519 }
520
createString(const unsigned char * sz,size_t len) const521 SkPdfString* SkPdfNativeDoc::createString(const unsigned char* sz, size_t len) const {
522 SkPdfNativeObject* obj = fAllocator->allocObject();
523 SkPdfNativeObject::makeString(sz, len, obj);
524 TRACK_OBJECT_SRC(obj);
525 return (SkPdfString*)obj;
526 }
527
allocator() const528 SkPdfAllocator* SkPdfNativeDoc::allocator() const {
529 return fAllocator;
530 }
531
resolveReference(SkPdfNativeObject * ref)532 SkPdfNativeObject* SkPdfNativeDoc::resolveReference(SkPdfNativeObject* ref) {
533 if (ref && ref->isReference()) {
534 int id = ref->referenceId();
535 // TODO(edisonn): generation/updates not supported now
536 //int gen = ref->referenceGeneration();
537
538 // TODO(edisonn): verify id and gen expected
539 if (id < 0 || id >= fObjects.count()) {
540 SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
541 "resolve reference id out of bounds", NULL, NULL);
542 return NULL;
543 }
544
545 if (fObjects[id].fIsReferenceResolved) {
546 SkPdfReportIf(!fObjects[id].fResolvedReference, kIgnoreError_SkPdfIssueSeverity,
547 kBadReference_SkPdfIssue, "ref is NULL", NULL, NULL);
548 return fObjects[id].fResolvedReference;
549 }
550
551 // TODO(edisonn): there are pdfs in the crashing suite that cause a stack overflow
552 // here unless we check for resolved reference on next line.
553 // Determine if the pdf is corrupted, or we have a bug here.
554
555 // Avoids recursive calls
556 fObjects[id].fIsReferenceResolved = true;
557
558 if (fObjects[id].fObj == NULL) {
559 fObjects[id].fObj = readObject(id);
560 }
561
562 if (fObjects[id].fObj != NULL && fObjects[id].fResolvedReference == NULL) {
563 if (!fObjects[id].fObj->isReference()) {
564 fObjects[id].fResolvedReference = fObjects[id].fObj;
565 } else {
566 fObjects[id].fResolvedReference = resolveReference(fObjects[id].fObj);
567 }
568 }
569
570 return fObjects[id].fResolvedReference;
571 }
572
573 return (SkPdfNativeObject*)ref;
574 }
575
bytesUsed() const576 size_t SkPdfNativeDoc::bytesUsed() const {
577 return fAllocator->bytesUsed() +
578 fContentLength +
579 fObjects.count() * sizeof(PublicObjectEntry) +
580 fPages.count() * sizeof(SkPdfPageObjectDictionary*) +
581 sizeof(*this);
582 }
583