1 /*
2  * Copyright 2013 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef SkPdfNativeTokenizer_DEFINED
9 #define SkPdfNativeTokenizer_DEFINED
10 
11 #include <math.h>
12 #include <string.h>
13 
14 #include "SkPdfConfig.h"
15 #include "SkTDArray.h"
16 #include "SkTDict.h"
17 
18 // All these constants are defined by the PDF 1.4 Spec.
19 
20 class SkPdfDictionary;
21 class SkPdfImageDictionary;
22 class SkPdfNativeDoc;
23 class SkPdfNativeObject;
24 
25 
26 // White Spaces
27 #define kNUL_PdfWhiteSpace '\x00'
28 #define kHT_PdfWhiteSpace  '\x09'
29 #define kLF_PdfWhiteSpace  '\x0A'
30 #define kFF_PdfWhiteSpace  '\x0C'
31 #define kCR_PdfWhiteSpace  '\x0D'
32 #define kSP_PdfWhiteSpace  '\x20'
33 
34 // PdfDelimiters
35 #define kOpenedRoundBracket_PdfDelimiter        '('
36 #define kClosedRoundBracket_PdfDelimiter        ')'
37 #define kOpenedInequityBracket_PdfDelimiter     '<'
38 #define kClosedInequityBracket_PdfDelimiter     '>'
39 #define kOpenedSquareBracket_PdfDelimiter       '['
40 #define kClosedSquareBracket_PdfDelimiter       ']'
41 #define kOpenedCurlyBracket_PdfDelimiter        '{'
42 #define kClosedCurlyBracket_PdfDelimiter        '}'
43 #define kNamed_PdfDelimiter                     '/'
44 #define kComment_PdfDelimiter                   '%'
45 
46 #define kEscape_PdfSpecial                      '\\'
47 #define kBackspace_PdfSpecial                   '\x08'
48 
49 // TODO(edisonn): what is the faster way for compiler/machine type to evaluate this expressions?
50 // we should evaluate all options. might be even different from one machine to another
51 // 1) expand expression, let compiler optimize it
52 // 2) binary search
53 // 3) linear search in array
54 // 4) vector (e.f. T type[256] .. return type[ch] ...
55 // 5) manually build the expression with least number of operators, e.g. for consecutive
56 // chars, we can use an binary equal ignoring last bit
57 #define isPdfWhiteSpace(ch) (((ch)==kNUL_PdfWhiteSpace)|| \
58                              ((ch)==kHT_PdfWhiteSpace)|| \
59                              ((ch)==kLF_PdfWhiteSpace)|| \
60                              ((ch)==kFF_PdfWhiteSpace)|| \
61                              ((ch)==kCR_PdfWhiteSpace)|| \
62                              ((ch)==kSP_PdfWhiteSpace))
63 
64 #define isPdfEOL(ch) (((ch)==kLF_PdfWhiteSpace)||((ch)==kCR_PdfWhiteSpace))
65 
66 
67 #define isPdfDelimiter(ch) (((ch)==kOpenedRoundBracket_PdfDelimiter)||\
68                             ((ch)==kClosedRoundBracket_PdfDelimiter)||\
69                             ((ch)==kOpenedInequityBracket_PdfDelimiter)||\
70                             ((ch)==kClosedInequityBracket_PdfDelimiter)||\
71                             ((ch)==kOpenedSquareBracket_PdfDelimiter)||\
72                             ((ch)==kClosedSquareBracket_PdfDelimiter)||\
73                             ((ch)==kOpenedCurlyBracket_PdfDelimiter)||\
74                             ((ch)==kClosedCurlyBracket_PdfDelimiter)||\
75                             ((ch)==kNamed_PdfDelimiter)||\
76                             ((ch)==kComment_PdfDelimiter))
77 
78 #define isPdfWhiteSpaceOrPdfDelimiter(ch) (isPdfWhiteSpace(ch)||isPdfDelimiter(ch))
79 
80 #define isPdfDigit(ch) ((ch)>='0'&&(ch)<='9')
81 #define isPdfNumeric(ch) (isPdfDigit(ch)||(ch)=='+'||(ch)=='-'||(ch)=='.')
82 
83 const unsigned char* skipPdfWhiteSpaces(const unsigned char* buffer, const unsigned char* end);
84 const unsigned char* endOfPdfToken(const unsigned char* start, const unsigned char* end);
85 
86 #define BUFFER_SIZE 1024
87 
88 /** \class SkPdfAllocator
89  *
90  *   An allocator only allocates memory, and it deletes it all when the allocator is destroyed.
91  *   This strategy would allow us not to do any garbage collection while we parse and/or render
92  *   a pdf.
93  *
94  */
95 class SkPdfAllocator {
96 public:
SkPdfAllocator()97     SkPdfAllocator() {
98         fSizeInBytes = sizeof(*this);
99         fCurrent = allocBlock();
100         fCurrentUsed = 0;
101     }
102 
103     ~SkPdfAllocator();
104 
105     // Allocates an object. It will be reset automatically when ~SkPdfAllocator() is called.
106     SkPdfNativeObject* allocObject();
107 
108     // Allocates a buffer. It will be freed automatically when ~SkPdfAllocator() is called.
alloc(size_t bytes)109     void* alloc(size_t bytes) {
110         void* data = malloc(bytes);
111         fHandles.push(data);
112         fSizeInBytes += bytes;
113         return data;
114     }
115 
116     // Returns the number of bytes used in this allocator.
bytesUsed()117     size_t bytesUsed() const {
118         return fSizeInBytes;
119     }
120 
121 private:
122     SkTDArray<SkPdfNativeObject*> fHistory;
123     SkTDArray<void*> fHandles;
124     SkPdfNativeObject* fCurrent;
125     int fCurrentUsed;
126 
127     SkPdfNativeObject* allocBlock();
128     size_t fSizeInBytes;
129 };
130 
131 // Type of a parsed token.
132 enum SkPdfTokenType {
133     kKeyword_TokenType,
134     kObject_TokenType,
135 };
136 
137 
138 /** \struct PdfToken
139  *
140  *   Stores the result of the parsing - a keyword or an object.
141  *
142  */
143 struct PdfToken {
144     const char*             fKeyword;
145     size_t                  fKeywordLength;
146     SkPdfNativeObject*      fObject;
147     SkPdfTokenType          fType;
148 
PdfTokenPdfToken149     PdfToken() : fKeyword(NULL), fKeywordLength(0), fObject(NULL) {}
150 };
151 
152 /** \class SkPdfNativeTokenizer
153  *
154  *   Responsible to tokenize a stream in small tokens, eityh a keyword or an object.
155  *   A renderer can feed on the tokens and render a pdf.
156  *
157  */
158 class SkPdfNativeTokenizer {
159 public:
160     SkPdfNativeTokenizer(SkPdfNativeObject* objWithStream,
161                          SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
162     SkPdfNativeTokenizer(const unsigned char* buffer, int len,
163                          SkPdfAllocator* allocator, SkPdfNativeDoc* doc);
164 
165     virtual ~SkPdfNativeTokenizer();
166 
167     // Reads one token. Returns false if there are no more tokens.
168     // If writeDiff is true, and a token was read, create a PNG highlighting
169     // the difference caused by this command in /tmp/log_step_by_step.
170     // If PDF_TRACE_DIFF_IN_PNG is not defined, writeDiff does nothing.
171     bool readToken(PdfToken* token, bool writeDiff = false);
172 
173     // Put back a token to be read in the nextToken read. Only one token is allowed to be put
174     // back. Must not necesaarely be the last token read.
175     void PutBack(PdfToken token);
176 
177     // Reads the inline image that is present in the stream. At this point we just consumed the ID
178     // token already.
179     SkPdfImageDictionary* readInlineImage();
180 
181 private:
182     bool readTokenCore(PdfToken* token);
183 
184     SkPdfNativeDoc* fDoc;
185     SkPdfAllocator* fAllocator;
186 
187     const unsigned char* fUncompressedStreamStart;
188     const unsigned char* fUncompressedStream;
189     const unsigned char* fUncompressedStreamEnd;
190 
191     bool fEmpty;
192     bool fHasPutBack;
193     PdfToken fPutBack;
194 };
195 
196 const unsigned char* nextObject(const unsigned char* start, const unsigned char* end,
197                                 SkPdfNativeObject* token,
198                                 SkPdfAllocator* allocator,
199                                 SkPdfNativeDoc* doc);
200 
201 #endif  // SkPdfNativeTokenizer_DEFINED
202