1 /*
2  * Copyright 2013 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef SkPdfNativeDoc_DEFINED
9 #define SkPdfNativeDoc_DEFINED
10 
11 #include "SkRect.h"
12 #include "SkTDArray.h"
13 
14 class SkCanvas;
15 
16 class SkPdfAllocator;
17 class SkPdfMapper;
18 class SkPdfNativeObject;
19 class SkPdfReal;
20 class SkPdfInteger;
21 class SkPdfString;
22 class SkPdfResourceDictionary;
23 class SkPdfCatalogDictionary;
24 class SkPdfPageObjectDictionary;
25 class SkPdfPageTreeNodeDictionary;
26 
27 class SkPdfNativeTokenizer;
28 
29 class SkStream;
30 
31 // TODO(edisonn): Implement a smart stream that can seek, and that can also fall back to reading
32 // the bytes in order. For example, we can try to read the stream optimistically, but if there
33 // are issues in the pdf, we must read the pdf from the beginning, and fix whatever errors we can.
34 // This would be useful to show quickly page 100 in a pdf (www.example.com/foo.pdf#page100)
35 // But if the pdf is missing the xref, then we will have to read most of pdf to be able to render
36 // page 100.
37 
38 /** \class SkPdfNativeDoc
39  *
40  *  The SkPdfNativeDoc class is used to load a PDF in memory and it represents a PDF Document.
41  *
42  */
43 class SkPdfNativeDoc {
44 private:
45     // Information about public objects in pdf that can be referenced with ID GEN R
46     struct PublicObjectEntry {
47         // Offset in the file where the object starts.
48         long fOffset;
49 
50         // Offset in file where the object ends. Could be used to quickly fail if there is a
51         // problem in pdf structure.
52         // long endOffset;  // TODO(edisonn): determine the end of the object,
53                             // to be used when the doc is corrupted, for fast failure.
54 
55         // Refered object.
56         SkPdfNativeObject* fObj;
57 
58         // If refered object is a reference, we resolve recursively the reference until we find
59         // the real object.
60         SkPdfNativeObject* fResolvedReference;
61 
62         // Used to break a recursive reference to itself.
63         bool fIsReferenceResolved;
64     };
65 
66 public:
67     // TODO(edisonn) should be deprecated
68     SkPdfNativeDoc(const char* path);
69 
70     // TODO(edisonn) should be deprecated
71     // FIXME: Untested.
72     // Does not affect ownership of stream.
73     SkPdfNativeDoc(SkStream* stream);
74 
75     ~SkPdfNativeDoc();
76 
77     // returns the number of pages in the pdf
78     int pages() const;
79 
80     // returns the page resources
81     SkPdfResourceDictionary* pageResources(int page);
82 
83     // returns the page's mediabox i points - the page physical boundaries.
84     SkRect MediaBox(int page);
85 
86     //returns objects that are references and can be queried.
87     size_t objects() const;
88 
89     // returns an object.
90     // TODO(edisonn): pdf updates are not supported yet.
91     //                add generation parameter to support page updates.
92     SkPdfNativeObject* object(int id /*, int generation*/ );
93 
94     // returns the object that holds all the page informnation
95     // TODO(edisonn): pdf updates are not supported yet.
96     //                add generation parameter to support page updates.
97     SkPdfPageObjectDictionary* page(int page/*, int generation*/);
98 
99     // TODO(edisonn): deprecate the mapper - was used when we supported multiple
100     // parsers (podofo)
101     // The mapper maps allows an object to be mapped to a different dictionary type
102     // and it could verify the integrity of the object.
103     const SkPdfMapper* mapper() const;
104 
105     // Allocator of the pdf - this holds all objects that are publicly referenced
106     // and all the objects that they refer
107     SkPdfAllocator* allocator() const;
108 
109     // Allows a renderer to create values to be dumped on the stack for operators to process them.
110     SkPdfReal* createReal(double value) const;
111     SkPdfInteger* createInteger(int value) const;
112     // the string does not own the char*
113     SkPdfString* createString(const unsigned char* sz, size_t len) const;
114 
115     // Resolve a reference object. Will recursively resolve the reference
116     // until a real object is found
117     SkPdfNativeObject* resolveReference(SkPdfNativeObject* ref);
118 
119     // Reports an approximation of all the memory usage.
120     size_t bytesUsed() const;
121 
122 private:
123 
124     // Takes ownership of bytes.
125     void init(const void* bytes, size_t length);
126 
127     // loads a pdf that has missing xref
128     void loadWithoutXRef();
129 
130     const unsigned char* readCrossReferenceSection(const unsigned char* xrefStart,
131                                                    const unsigned char* trailerEnd);
132     const unsigned char* readTrailer(const unsigned char* trailerStart,
133                                      const unsigned char* trailerEnd,
134                                      bool storeCatalog, long* prev, bool skipKeyword);
135 
136     // TODO(edisonn): pdfs with updates not supported right now, generation ignored.
137     void addCrossSectionInfo(int id, int generation, int offset, bool isFreed);
reset(PublicObjectEntry * obj)138     static void reset(PublicObjectEntry* obj) {
139         obj->fObj = NULL;
140         obj->fResolvedReference = NULL;
141         obj->fOffset = -1;
142         obj->fIsReferenceResolved = false;
143     }
144 
145     SkPdfNativeObject* readObject(int id/*, int generation*/);
146 
147     void fillPages(SkPdfPageTreeNodeDictionary* tree);
148 
149     SkPdfAllocator* fAllocator;
150     SkPdfMapper* fMapper;
151     const unsigned char* fFileContent;
152     size_t fContentLength;
153     SkPdfNativeObject* fRootCatalogRef;
154     SkPdfCatalogDictionary* fRootCatalog;
155 
156     mutable SkTDArray<PublicObjectEntry> fObjects;
157     SkTDArray<SkPdfPageObjectDictionary*> fPages;
158 };
159 
160 #endif  // SkPdfNativeDoc_DEFINED
161