1 /*
2  * Copyright 2013 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "SkPdfNativeDoc.h"
9 
10 #include <stdio.h>
11 #include <string.h>
12 #include <sys/types.h>
13 #include <sys/stat.h>
14 
15 #include "SkPdfMapper_autogen.h"
16 #include "SkPdfNativeObject.h"
17 #include "SkPdfNativeTokenizer.h"
18 #include "SkPdfReporter.h"
19 #include "SkStream.h"
20 
21 // TODO(edisonn): for some reason on mac these files are found here, but are found from headers
22 //#include "SkPdfFileTrailerDictionary_autogen.h"
23 //#include "SkPdfCatalogDictionary_autogen.h"
24 //#include "SkPdfPageObjectDictionary_autogen.h"
25 //#include "SkPdfPageTreeNodeDictionary_autogen.h"
26 #include "SkPdfHeaders_autogen.h"
27 
getFileSize(const char * filename)28 static long getFileSize(const char* filename)
29 {
30     struct stat stat_buf;
31     int rc = stat(filename, &stat_buf);
32     return rc == 0 ? (long)stat_buf.st_size : -1;
33 }
34 
lineHome(const unsigned char * start,const unsigned char * current)35 static const unsigned char* lineHome(const unsigned char* start, const unsigned char* current) {
36     while (current > start && !isPdfEOL(*(current - 1))) {
37         current--;
38     }
39     return current;
40 }
41 
previousLineHome(const unsigned char * start,const unsigned char * current)42 static const unsigned char* previousLineHome(const unsigned char* start,
43                                              const unsigned char* current) {
44     if (current > start && isPdfEOL(*(current - 1))) {
45         current--;
46     }
47 
48     // allows CR+LF, LF+CR but not two CR+CR or LF+LF
49     if (current > start && isPdfEOL(*(current - 1)) && *current != *(current - 1)) {
50         current--;
51     }
52 
53     while (current > start && !isPdfEOL(*(current - 1))) {
54         current--;
55     }
56 
57     return current;
58 }
59 
ignoreLine(const unsigned char * current,const unsigned char * end)60 static const unsigned char* ignoreLine(const unsigned char* current, const unsigned char* end) {
61     while (current < end && !isPdfEOL(*current)) {
62         current++;
63     }
64     current++;
65     if (current < end && isPdfEOL(*current) && *current != *(current - 1)) {
66         current++;
67     }
68     return current;
69 }
70 
71 SkPdfNativeDoc* gDoc = NULL;
72 
SkPdfNativeDoc(SkStream * stream)73 SkPdfNativeDoc::SkPdfNativeDoc(SkStream* stream)
74         : fAllocator(new SkPdfAllocator())
75         , fFileContent(NULL)
76         , fContentLength(0)
77         , fRootCatalogRef(NULL)
78         , fRootCatalog(NULL) {
79     size_t size = stream->getLength();
80     void* ptr = sk_malloc_throw(size);
81     stream->read(ptr, size);
82 
83     init(ptr, size);
84 }
85 
SkPdfNativeDoc(const char * path)86 SkPdfNativeDoc::SkPdfNativeDoc(const char* path)
87         : fAllocator(new SkPdfAllocator())
88         , fFileContent(NULL)
89         , fContentLength(0)
90         , fRootCatalogRef(NULL)
91         , fRootCatalog(NULL) {
92     gDoc = this;
93     FILE* file = fopen(path, "r");
94     // TODO(edisonn): put this in a function that can return NULL
95     if (file) {
96         size_t size = getFileSize(path);
97         void* content = sk_malloc_throw(size);
98         bool ok = (0 != fread(content, size, 1, file));
99         fclose(file);
100         if (!ok) {
101             sk_free(content);
102             SkPdfReport(kFatalError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
103                         "could not read file", NULL, NULL);
104             // TODO(edisonn): not nice to return like this from constructor, create a static
105             // function that can report NULL for failures.
106             return;  // Doc will have 0 pages
107         }
108 
109         init(content, size);
110     }
111 }
112 
init(const void * bytes,size_t length)113 void SkPdfNativeDoc::init(const void* bytes, size_t length) {
114     fFileContent = (const unsigned char*)bytes;
115     fContentLength = length;
116     const unsigned char* eofLine = lineHome(fFileContent, fFileContent + fContentLength - 1);
117     const unsigned char* xrefByteOffsetLine = previousLineHome(fFileContent, eofLine);
118     const unsigned char* xrefstartKeywordLine = previousLineHome(fFileContent, xrefByteOffsetLine);
119 
120     if (strcmp((char*)xrefstartKeywordLine, "startxref") != 0) {
121         SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue,
122                     "Could not find startxref", NULL, NULL);
123     }
124 
125     long xrefByteOffset = atol((const char*)xrefByteOffsetLine);
126 
127     bool storeCatalog = true;
128     while (xrefByteOffset >= 0) {
129         const unsigned char* trailerStart = this->readCrossReferenceSection(fFileContent + xrefByteOffset,
130                                                                             xrefstartKeywordLine);
131         xrefByteOffset = -1;
132         if (trailerStart < xrefstartKeywordLine) {
133             this->readTrailer(trailerStart, xrefstartKeywordLine, storeCatalog, &xrefByteOffset, false);
134             storeCatalog = false;
135         }
136     }
137 
138     // TODO(edisonn): warn/error expect fObjects[fRefCatalogId].fGeneration == fRefCatalogGeneration
139     // TODO(edisonn): security, verify that SkPdfCatalogDictionary is indeed using mapper
140 
141     if (fRootCatalogRef) {
142         fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
143         if (fRootCatalog != NULL && fRootCatalog->isDictionary() && fRootCatalog->valid()) {
144             SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
145             if (tree && tree->isDictionary() && tree->valid()) {
146                 fillPages(tree);
147             }
148         }
149     }
150 
151     if (pages() == 0) {
152         // TODO(edisonn): probably it would be better to return NULL and make a clean document.
153         loadWithoutXRef();
154     }
155 
156     // TODO(edisonn): corrupted pdf, read it from beginning and rebuild
157     // (xref, trailer, or just read all objects)
158 }
159 
loadWithoutXRef()160 void SkPdfNativeDoc::loadWithoutXRef() {
161     const unsigned char* current = fFileContent;
162     const unsigned char* end = fFileContent + fContentLength;
163 
164     // TODO(edisonn): read pdf version
165     current = ignoreLine(current, end);
166 
167     current = skipPdfWhiteSpaces(current, end);
168     while (current < end) {
169         SkPdfNativeObject token;
170         current = nextObject(current, end, &token, NULL, NULL);
171         if (token.isInteger()) {
172             int id = (int)token.intValue();
173 
174             token.reset();
175             current = nextObject(current, end, &token, NULL, NULL);
176             // TODO(edisonn): generation ignored for now (used in pdfs with updates)
177             // int generation = (int)token.intValue();
178 
179             token.reset();
180             current = nextObject(current, end, &token, NULL, NULL);
181             // TODO(edisonn): keywork must be "obj". Add ability to report error instead ignoring.
182             if (!token.isKeyword("obj")) {
183                 SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue,
184                             "Could not find obj", NULL, NULL);
185                 continue;
186             }
187 
188             while (fObjects.count() < id + 1) {
189                 reset(fObjects.append());
190             }
191 
192             fObjects[id].fOffset = SkToInt(current - fFileContent);
193 
194             SkPdfNativeObject* obj = fAllocator->allocObject();
195             current = nextObject(current, end, obj, fAllocator, this);
196 
197             fObjects[id].fResolvedReference = obj;
198             fObjects[id].fObj = obj;
199             fObjects[id].fIsReferenceResolved = true;
200         } else if (token.isKeyword("trailer")) {
201             long dummy;
202             current = readTrailer(current, end, true, &dummy, true);
203         } else if (token.isKeyword("startxref")) {
204             token.reset();
205             current = nextObject(current, end, &token, NULL, NULL);  // ignore startxref
206         }
207 
208         current = skipPdfWhiteSpaces(current, end);
209     }
210 
211     // TODO(edisonn): quick hack, detect root catalog. When we implement linearized support we
212     // might not need it.
213     if (!fRootCatalogRef) {
214         for (unsigned int i = 0 ; i < objects(); i++) {
215             SkPdfNativeObject* obj = object(i);
216             SkPdfNativeObject* root = (obj && obj->isDictionary()) ? obj->get("Root") : NULL;
217             if (root && root->isReference()) {
218                 fRootCatalogRef = root;
219             }
220         }
221     }
222 
223     if (fRootCatalogRef) {
224         fRootCatalog = (SkPdfCatalogDictionary*)resolveReference(fRootCatalogRef);
225         if (fRootCatalog != NULL && fRootCatalog->isDictionary() && fRootCatalog->valid()) {
226             SkPdfPageTreeNodeDictionary* tree = fRootCatalog->Pages(this);
227             if (tree && tree->isDictionary() && tree->valid()) {
228                 fillPages(tree);
229             }
230         }
231     }
232 
233 
234 }
235 
~SkPdfNativeDoc()236 SkPdfNativeDoc::~SkPdfNativeDoc() {
237     sk_free((void*)fFileContent);
238     delete fAllocator;
239 }
240 
readCrossReferenceSection(const unsigned char * xrefStart,const unsigned char * trailerEnd)241 const unsigned char* SkPdfNativeDoc::readCrossReferenceSection(const unsigned char* xrefStart,
242                                                                const unsigned char* trailerEnd) {
243     SkPdfNativeObject xref;
244     const unsigned char* current = nextObject(xrefStart, trailerEnd, &xref, NULL, NULL);
245 
246     if (!xref.isKeyword("xref")) {
247         SkPdfReport(kWarning_SkPdfIssueSeverity, kMissingToken_SkPdfIssue, "Could not find sref",
248                     NULL, NULL);
249         return trailerEnd;
250     }
251 
252     SkPdfNativeObject token;
253     while (current < trailerEnd) {
254         token.reset();
255         const unsigned char* previous = current;
256         current = nextObject(current, trailerEnd, &token, NULL, NULL);
257         if (!token.isInteger()) {
258             SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue,
259                         "Done readCrossReferenceSection", NULL, NULL);
260             return previous;
261         }
262 
263         int startId = (int)token.intValue();
264         token.reset();
265         current = nextObject(current, trailerEnd, &token, NULL, NULL);
266 
267         if (!token.isInteger()) {
268             SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readCrossReferenceSection",
269                                       &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
270             return current;
271         }
272 
273         int entries = (int)token.intValue();
274 
275         for (int i = 0; i < entries; i++) {
276             token.reset();
277             current = nextObject(current, trailerEnd, &token, NULL, NULL);
278             if (!token.isInteger()) {
279                 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
280                                           "readCrossReferenceSection",
281                                           &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
282                 return current;
283             }
284             int offset = (int)token.intValue();
285 
286             token.reset();
287             current = nextObject(current, trailerEnd, &token, NULL, NULL);
288             if (!token.isInteger()) {
289                 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
290                                           "readCrossReferenceSection",
291                                           &token, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
292                 return current;
293             }
294             int generation = (int)token.intValue();
295 
296             token.reset();
297             current = nextObject(current, trailerEnd, &token, NULL, NULL);
298             if (!token.isKeyword() || token.lenstr() != 1 ||
299                 (*token.c_str() != 'f' && *token.c_str() != 'n')) {
300                 SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
301                                           "readCrossReferenceSection: f or n expected",
302                                           &token, SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
303                 return current;
304             }
305 
306             this->addCrossSectionInfo(startId + i, generation, offset, *token.c_str() == 'f');
307         }
308     }
309     SkPdfReport(kInfo_SkPdfIssueSeverity, kNoIssue_SkPdfIssue,
310                 "Unexpected end of readCrossReferenceSection", NULL, NULL);
311     return current;
312 }
313 
readTrailer(const unsigned char * trailerStart,const unsigned char * trailerEnd,bool storeCatalog,long * prev,bool skipKeyword)314 const unsigned char* SkPdfNativeDoc::readTrailer(const unsigned char* trailerStart,
315                                                  const unsigned char* trailerEnd,
316                                                  bool storeCatalog, long* prev, bool skipKeyword) {
317     *prev = -1;
318 
319     const unsigned char* current = trailerStart;
320     if (!skipKeyword) {
321         SkPdfNativeObject trailerKeyword;
322         // Use null allocator, and let it just fail if memory, it should not crash.
323         current = nextObject(current, trailerEnd, &trailerKeyword, NULL, NULL);
324 
325         if (!trailerKeyword.isKeyword() || strlen("trailer") != trailerKeyword.lenstr() ||
326             strncmp(trailerKeyword.c_str(), "trailer", strlen("trailer")) != 0) {
327             SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
328                                       "readTrailer: trailer keyword expected",
329                                       &trailerKeyword,
330                                       SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
331             return current;
332         }
333     }
334 
335     SkPdfNativeObject token;
336     current = nextObject(current, trailerEnd, &token, fAllocator, NULL);
337     if (!token.isDictionary()) {
338         return current;
339     }
340     SkPdfFileTrailerDictionary* trailer = (SkPdfFileTrailerDictionary*)&token;
341     if (!trailer->valid()) {
342         return current;
343     }
344 
345     if (storeCatalog) {
346         SkPdfNativeObject* ref = trailer->Root(NULL);
347         if (ref == NULL || !ref->isReference()) {
348             SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
349                                       "readTrailer: unexpected root reference",
350                                       ref, SkPdfNativeObject::kReference_PdfObjectType, NULL);
351             return current;
352         }
353         fRootCatalogRef = ref;
354     }
355 
356     if (trailer->has_Prev()) {
357         *prev = (long)trailer->Prev(NULL);
358     }
359 
360     return current;
361 }
362 
addCrossSectionInfo(int id,int generation,int offset,bool isFreed)363 void SkPdfNativeDoc::addCrossSectionInfo(int id, int generation, int offset, bool isFreed) {
364     // TODO(edisonn): security here, verify id
365     while (fObjects.count() < id + 1) {
366         this->reset(fObjects.append());
367     }
368 
369     fObjects[id].fOffset = offset;
370     fObjects[id].fObj = NULL;
371     fObjects[id].fResolvedReference = NULL;
372     fObjects[id].fIsReferenceResolved = false;
373 }
374 
readObject(int id)375 SkPdfNativeObject* SkPdfNativeDoc::readObject(int id/*, int expectedGeneration*/) {
376     long startOffset = fObjects[id].fOffset;
377     //long endOffset = fObjects[id].fOffsetEnd;
378     // TODO(edisonn): use hinted endOffset
379     const unsigned char* current = fFileContent + startOffset;
380     const unsigned char* end = fFileContent + fContentLength;
381 
382     SkPdfNativeTokenizer tokenizer(current, (int) (end - current), fAllocator, this);
383 
384     SkPdfNativeObject idObj;
385     SkPdfNativeObject generationObj;
386     SkPdfNativeObject objKeyword;
387     SkPdfNativeObject* dict = fAllocator->allocObject();
388 
389     current = nextObject(current, end, &idObj, NULL, NULL);
390     if (current >= end) {
391         SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue, "reading id",
392                     NULL, NULL);
393         return NULL;
394     }
395 
396     current = nextObject(current, end, &generationObj, NULL, NULL);
397     if (current >= end) {
398         SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
399                     "reading generation", NULL, NULL);
400         return NULL;
401     }
402 
403     current = nextObject(current, end, &objKeyword, NULL, NULL);
404     if (current >= end) {
405         SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
406                     "reading keyword obj", NULL, NULL);
407         return NULL;
408     }
409 
410     if (!idObj.isInteger() || id != idObj.intValue()) {
411         SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity, "readObject: unexpected id",
412                                   &idObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
413     }
414 
415     // TODO(edisonn): verify that the generation is the right one
416     if (!generationObj.isInteger() /* || generation != generationObj.intValue()*/) {
417         SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
418                                   "readObject: unexpected generation",
419                                   &generationObj, SkPdfNativeObject::kInteger_PdfObjectType, NULL);
420     }
421 
422     if (!objKeyword.isKeyword() || strcmp(objKeyword.c_str(), "obj") != 0) {
423         SkPdfReportUnexpectedType(kIgnoreError_SkPdfIssueSeverity,
424                                   "readObject: unexpected obj keyword",
425                                   &objKeyword, SkPdfNativeObject::kKeyword_PdfObjectType, NULL);
426     }
427 
428     current = nextObject(current, end, dict, fAllocator, this);
429 
430     // TODO(edisonn): report warning/error - verify that the last token is endobj
431 
432     return dict;
433 }
434 
fillPages(SkPdfPageTreeNodeDictionary * tree)435 void SkPdfNativeDoc::fillPages(SkPdfPageTreeNodeDictionary* tree) {
436     SkPdfArray* kids = tree->Kids(this);
437     if (kids == NULL) {
438         *fPages.append() = (SkPdfPageObjectDictionary*)tree;
439         return;
440     }
441 
442     int cnt = (int) kids->size();
443     for (int i = 0; i < cnt; i++) {
444         SkPdfNativeObject* obj = resolveReference(kids->objAtAIndex(i));
445         if (fMapper->mapPageObjectDictionary(obj) != kPageObjectDictionary_SkPdfNativeObjectType) {
446             *fPages.append() = (SkPdfPageObjectDictionary*)obj;
447         } else {
448             // TODO(edisonn): verify that it is a page tree indeed
449             fillPages((SkPdfPageTreeNodeDictionary*)obj);
450         }
451     }
452 }
453 
pages() const454 int SkPdfNativeDoc::pages() const {
455     return fPages.count();
456 }
457 
page(int page)458 SkPdfPageObjectDictionary* SkPdfNativeDoc::page(int page) {
459     SkASSERT(page >= 0 && page < fPages.count());
460     return fPages[page];
461 }
462 
463 
pageResources(int page)464 SkPdfResourceDictionary* SkPdfNativeDoc::pageResources(int page) {
465     SkASSERT(page >= 0 && page < fPages.count());
466     return fPages[page]->Resources(this);
467 }
468 
469 // TODO(edisonn): Partial implemented.
470 // Move the logics directly in the code generator for inheritable and default values?
MediaBox(int page)471 SkRect SkPdfNativeDoc::MediaBox(int page) {
472     SkPdfPageObjectDictionary* current = fPages[page];
473     while (!current->has_MediaBox() && current->has_Parent()) {
474         current = (SkPdfPageObjectDictionary*)current->Parent(this);
475     }
476     if (current) {
477         return current->MediaBox(this);
478     }
479     return SkRect::MakeEmpty();
480 }
481 
objects() const482 size_t SkPdfNativeDoc::objects() const {
483     return fObjects.count();
484 }
485 
object(int i)486 SkPdfNativeObject* SkPdfNativeDoc::object(int i) {
487     SkASSERT(!(i < 0 || i > fObjects.count()));
488 
489     if (i < 0 || i > fObjects.count()) {
490         return NULL;
491     }
492 
493     if (fObjects[i].fObj == NULL) {
494         fObjects[i].fObj = readObject(i);
495         // TODO(edisonn): For perf, when we read the cross reference sections, we should take
496         // advantage of the boundaries of known objects, to minimize the risk of just parsing a bad
497         // stream, and fail quickly, in case we default to sequential stream read.
498     }
499 
500     return fObjects[i].fObj;
501 }
502 
mapper() const503 const SkPdfMapper* SkPdfNativeDoc::mapper() const {
504     return fMapper;
505 }
506 
createReal(double value) const507 SkPdfReal* SkPdfNativeDoc::createReal(double value) const {
508     SkPdfNativeObject* obj = fAllocator->allocObject();
509     SkPdfNativeObject::makeReal(value, obj);
510     TRACK_OBJECT_SRC(obj);
511     return (SkPdfReal*)obj;
512 }
513 
createInteger(int value) const514 SkPdfInteger* SkPdfNativeDoc::createInteger(int value) const {
515     SkPdfNativeObject* obj = fAllocator->allocObject();
516     SkPdfNativeObject::makeInteger(value, obj);
517     TRACK_OBJECT_SRC(obj);
518     return (SkPdfInteger*)obj;
519 }
520 
createString(const unsigned char * sz,size_t len) const521 SkPdfString* SkPdfNativeDoc::createString(const unsigned char* sz, size_t len) const {
522     SkPdfNativeObject* obj = fAllocator->allocObject();
523     SkPdfNativeObject::makeString(sz, len, obj);
524     TRACK_OBJECT_SRC(obj);
525     return (SkPdfString*)obj;
526 }
527 
allocator() const528 SkPdfAllocator* SkPdfNativeDoc::allocator() const {
529     return fAllocator;
530 }
531 
resolveReference(SkPdfNativeObject * ref)532 SkPdfNativeObject* SkPdfNativeDoc::resolveReference(SkPdfNativeObject* ref) {
533     if (ref && ref->isReference()) {
534         int id = ref->referenceId();
535         // TODO(edisonn): generation/updates not supported now
536         //int gen = ref->referenceGeneration();
537 
538         // TODO(edisonn): verify id and gen expected
539         if (id < 0 || id >= fObjects.count()) {
540             SkPdfReport(kIgnoreError_SkPdfIssueSeverity, kReadStreamError_SkPdfIssue,
541                         "resolve reference id out of bounds", NULL, NULL);
542             return NULL;
543         }
544 
545         if (fObjects[id].fIsReferenceResolved) {
546             SkPdfReportIf(!fObjects[id].fResolvedReference, kIgnoreError_SkPdfIssueSeverity,
547                           kBadReference_SkPdfIssue, "ref is NULL", NULL, NULL);
548             return fObjects[id].fResolvedReference;
549         }
550 
551         // TODO(edisonn): there are pdfs in the crashing suite that cause a stack overflow
552         // here unless we check for resolved reference on next line.
553         // Determine if the pdf is corrupted, or we have a bug here.
554 
555         // Avoids recursive calls
556         fObjects[id].fIsReferenceResolved = true;
557 
558         if (fObjects[id].fObj == NULL) {
559             fObjects[id].fObj = readObject(id);
560         }
561 
562         if (fObjects[id].fObj != NULL && fObjects[id].fResolvedReference == NULL) {
563             if (!fObjects[id].fObj->isReference()) {
564                 fObjects[id].fResolvedReference = fObjects[id].fObj;
565             } else {
566                 fObjects[id].fResolvedReference = resolveReference(fObjects[id].fObj);
567             }
568         }
569 
570         return fObjects[id].fResolvedReference;
571     }
572 
573     return (SkPdfNativeObject*)ref;
574 }
575 
bytesUsed() const576 size_t SkPdfNativeDoc::bytesUsed() const {
577     return fAllocator->bytesUsed() +
578            fContentLength +
579            fObjects.count() * sizeof(PublicObjectEntry) +
580            fPages.count() * sizeof(SkPdfPageObjectDictionary*) +
581            sizeof(*this);
582 }
583