1 // Copyright 2015 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "core/include/fpdfapi/fpdf_parser.h"
6 #include "core/include/fxcrt/fx_stream.h"
7 #include "testing/gtest/include/gtest/gtest.h"
8 #include "testing/utils/path_service.h"
9
10 // Functions to help test an array's content against expected results.
11 template <class TYPE>
CompareArray(const CFX_ArrayTemplate<TYPE> & array1,const TYPE * array2,size_t size)12 bool CompareArray(const CFX_ArrayTemplate<TYPE>& array1,
13 const TYPE* array2,
14 size_t size) {
15 if (array1.GetSize() != size)
16 return false;
17
18 for (int i = 0; i < size; ++i)
19 if (array1.GetAt(i) != array2[i])
20 return false;
21 return true;
22 }
23
24 // Provide a way to read test data from a buffer instead of a file.
25 class CFX_TestBufferRead : public IFX_FileRead {
26 public:
CFX_TestBufferRead(const unsigned char * buffer_in,size_t buf_size)27 CFX_TestBufferRead(const unsigned char* buffer_in, size_t buf_size)
28 : buffer_(buffer_in), total_size_(buf_size) {}
29
30 // IFX_Stream
Release()31 void Release() override { delete this; }
32
33 // IFX_FileRead
ReadBlock(void * buffer,FX_FILESIZE offset,size_t size)34 FX_BOOL ReadBlock(void* buffer, FX_FILESIZE offset, size_t size) override {
35 if (offset < 0 || offset + size > total_size_) {
36 return FALSE;
37 }
38
39 memcpy(buffer, buffer_ + offset, size);
40 return TRUE;
41 }
GetSize()42 FX_FILESIZE GetSize() override { return (FX_FILESIZE)total_size_; };
43
44 protected:
45 const unsigned char* buffer_;
46 size_t total_size_;
47 };
48
49 // A wrapper class to help test member functions of CPDF_Parser.
50 class CPDF_TestParser : public CPDF_Parser {
51 public:
CPDF_TestParser()52 CPDF_TestParser() {}
~CPDF_TestParser()53 ~CPDF_TestParser() {}
54
55 // Setup reading from a file and initial states.
InitTestFromFile(const FX_CHAR * path)56 bool InitTestFromFile(const FX_CHAR* path) {
57 IFX_FileRead* pFileAccess = FX_CreateFileRead(path);
58 if (!pFileAccess)
59 return false;
60
61 // For the test file, the header is set at the beginning.
62 m_Syntax.InitParser(pFileAccess, 0);
63 return true;
64 }
65
66 // Setup reading from a buffer and initial states.
InitTestFromBuffer(const unsigned char * buffer,size_t len)67 bool InitTestFromBuffer(const unsigned char* buffer, size_t len) {
68 CFX_TestBufferRead* buffer_reader = new CFX_TestBufferRead(buffer, len);
69
70 // For the test file, the header is set at the beginning.
71 m_Syntax.InitParser(buffer_reader, 0);
72 return true;
73 }
74
75 private:
76 // Add test cases here as private friend so that protected members in
77 // CPDF_Parser can be accessed by test cases.
78 // Need to access RebuildCrossRef.
79 FRIEND_TEST(fpdf_parser_parser, RebuildCrossRefCorrectly);
80 FRIEND_TEST(fpdf_parser_parser, RebuildCrossRefFailed);
81 // Need to access LoadCrossRefV4.
82 FRIEND_TEST(fpdf_parser_parser, LoadCrossRefV4);
83 };
84
85 // TODO(thestig) Using unique_ptr with ReleaseDeleter is still not ideal.
86 // Come up or wait for something better.
87 using ScopedFileStream =
88 std::unique_ptr<IFX_FileStream, ReleaseDeleter<IFX_FileStream>>;
89
TEST(fpdf_parser_parser,ReadHexString)90 TEST(fpdf_parser_parser, ReadHexString) {
91 {
92 // Empty string.
93 uint8_t data[] = "";
94 ScopedFileStream stream(FX_CreateMemoryStream(data, 0, FALSE));
95
96 CPDF_SyntaxParser parser;
97 parser.InitParser(stream.get(), 0);
98 EXPECT_EQ("", parser.ReadHexString());
99 EXPECT_EQ(0, parser.SavePos());
100 }
101
102 {
103 // Blank string.
104 uint8_t data[] = " ";
105 ScopedFileStream stream(FX_CreateMemoryStream(data, 2, FALSE));
106
107 CPDF_SyntaxParser parser;
108 parser.InitParser(stream.get(), 0);
109 EXPECT_EQ("", parser.ReadHexString());
110 EXPECT_EQ(2, parser.SavePos());
111 }
112
113 {
114 // Skips unknown characters.
115 uint8_t data[] = "z12b";
116 ScopedFileStream stream(FX_CreateMemoryStream(data, 4, FALSE));
117
118 CPDF_SyntaxParser parser;
119 parser.InitParser(stream.get(), 0);
120 EXPECT_EQ("\x12\xb0", parser.ReadHexString());
121 EXPECT_EQ(4, parser.SavePos());
122 }
123
124 {
125 // Skips unknown characters.
126 uint8_t data[] = "*<&*#$^&@1";
127 ScopedFileStream stream(FX_CreateMemoryStream(data, 10, FALSE));
128
129 CPDF_SyntaxParser parser;
130 parser.InitParser(stream.get(), 0);
131 EXPECT_EQ("\x10", parser.ReadHexString());
132 EXPECT_EQ(10, parser.SavePos());
133 }
134
135 {
136 // Skips unknown characters.
137 uint8_t data[] = "\x80zab";
138 ScopedFileStream stream(FX_CreateMemoryStream(data, 4, FALSE));
139
140 CPDF_SyntaxParser parser;
141 parser.InitParser(stream.get(), 0);
142 EXPECT_EQ("\xab", parser.ReadHexString());
143 EXPECT_EQ(4, parser.SavePos());
144 }
145
146 {
147 // Skips unknown characters.
148 uint8_t data[] = "\xffzab";
149 ScopedFileStream stream(FX_CreateMemoryStream(data, 4, FALSE));
150
151 CPDF_SyntaxParser parser;
152 parser.InitParser(stream.get(), 0);
153 EXPECT_EQ("\xab", parser.ReadHexString());
154 EXPECT_EQ(4, parser.SavePos());
155 }
156
157 {
158 // Regular conversion.
159 uint8_t data[] = "1A2b>abcd";
160 ScopedFileStream stream(FX_CreateMemoryStream(data, 9, FALSE));
161
162 CPDF_SyntaxParser parser;
163 parser.InitParser(stream.get(), 0);
164 EXPECT_EQ("\x1a\x2b", parser.ReadHexString());
165 EXPECT_EQ(5, parser.SavePos());
166 }
167
168 {
169 // Position out of bounds.
170 uint8_t data[] = "12ab>";
171 ScopedFileStream stream(FX_CreateMemoryStream(data, 5, FALSE));
172
173 CPDF_SyntaxParser parser;
174 parser.InitParser(stream.get(), 0);
175 parser.RestorePos(5);
176 EXPECT_EQ("", parser.ReadHexString());
177
178 parser.RestorePos(6);
179 EXPECT_EQ("", parser.ReadHexString());
180
181 parser.RestorePos(-1);
182 EXPECT_EQ("", parser.ReadHexString());
183
184 parser.RestorePos(std::numeric_limits<FX_FILESIZE>::max());
185 EXPECT_EQ("", parser.ReadHexString());
186
187 // Check string still parses when set to 0.
188 parser.RestorePos(0);
189 EXPECT_EQ("\x12\xab", parser.ReadHexString());
190 }
191
192 {
193 // Missing ending >.
194 uint8_t data[] = "1A2b";
195 ScopedFileStream stream(FX_CreateMemoryStream(data, 4, FALSE));
196
197 CPDF_SyntaxParser parser;
198 parser.InitParser(stream.get(), 0);
199 EXPECT_EQ("\x1a\x2b", parser.ReadHexString());
200 EXPECT_EQ(4, parser.SavePos());
201 }
202
203 {
204 // Missing ending >.
205 uint8_t data[] = "12abz";
206 ScopedFileStream stream(FX_CreateMemoryStream(data, 5, FALSE));
207
208 CPDF_SyntaxParser parser;
209 parser.InitParser(stream.get(), 0);
210 EXPECT_EQ("\x12\xab", parser.ReadHexString());
211 EXPECT_EQ(5, parser.SavePos());
212 }
213
214 {
215 // Uneven number of bytes.
216 uint8_t data[] = "1A2>asdf";
217 ScopedFileStream stream(FX_CreateMemoryStream(data, 8, FALSE));
218
219 CPDF_SyntaxParser parser;
220 parser.InitParser(stream.get(), 0);
221 EXPECT_EQ("\x1a\x20", parser.ReadHexString());
222 EXPECT_EQ(4, parser.SavePos());
223 }
224
225 {
226 // Uneven number of bytes.
227 uint8_t data[] = "1A2zasdf";
228 ScopedFileStream stream(FX_CreateMemoryStream(data, 8, FALSE));
229
230 CPDF_SyntaxParser parser;
231 parser.InitParser(stream.get(), 0);
232 EXPECT_EQ("\x1a\x2a\xdf", parser.ReadHexString());
233 EXPECT_EQ(8, parser.SavePos());
234 }
235
236 {
237 // Just ending character.
238 uint8_t data[] = ">";
239 ScopedFileStream stream(FX_CreateMemoryStream(data, 1, FALSE));
240
241 CPDF_SyntaxParser parser;
242 parser.InitParser(stream.get(), 0);
243 EXPECT_EQ("", parser.ReadHexString());
244 EXPECT_EQ(1, parser.SavePos());
245 }
246 }
247
TEST(fpdf_parser_parser,RebuildCrossRefCorrectly)248 TEST(fpdf_parser_parser, RebuildCrossRefCorrectly) {
249 CPDF_TestParser parser;
250 std::string test_file;
251 ASSERT_TRUE(PathService::GetTestFilePath("parser_rebuildxref_correct.pdf",
252 &test_file));
253 ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file;
254
255 ASSERT_TRUE(parser.RebuildCrossRef());
256 const FX_FILESIZE offsets[] = {0, 15, 61, 154, 296, 374, 450};
257 const FX_WORD versions[] = {0, 0, 2, 4, 6, 8, 0};
258 for (size_t i = 0; i < FX_ArraySize(offsets); ++i)
259 EXPECT_EQ(offsets[i], parser.m_ObjectInfo[i].pos);
260 ASSERT_TRUE(
261 CompareArray(parser.m_ObjVersion, versions, FX_ArraySize(versions)));
262 }
263
TEST(fpdf_parser_parser,RebuildCrossRefFailed)264 TEST(fpdf_parser_parser, RebuildCrossRefFailed) {
265 CPDF_TestParser parser;
266 std::string test_file;
267 ASSERT_TRUE(PathService::GetTestFilePath(
268 "parser_rebuildxref_error_notrailer.pdf", &test_file));
269 ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file;
270
271 ASSERT_FALSE(parser.RebuildCrossRef());
272 }
273
TEST(fpdf_parser_parser,LoadCrossRefV4)274 TEST(fpdf_parser_parser, LoadCrossRefV4) {
275 {
276 const unsigned char xref_table[] =
277 "xref \n"
278 "0 6 \n"
279 "0000000003 65535 f \n"
280 "0000000017 00000 n \n"
281 "0000000081 00000 n \n"
282 "0000000000 00007 f \n"
283 "0000000331 00000 n \n"
284 "0000000409 00000 n \n"
285 "trail"; // Needed to end cross ref table reading.
286 CPDF_TestParser parser;
287 ASSERT_TRUE(
288 parser.InitTestFromBuffer(xref_table, FX_ArraySize(xref_table)));
289
290 ASSERT_TRUE(parser.LoadCrossRefV4(0, 0, FALSE));
291 const FX_FILESIZE offsets[] = {0, 17, 81, 0, 331, 409};
292 const uint8_t types[] = {0, 1, 1, 0, 1, 1};
293 for (size_t i = 0; i < FX_ArraySize(offsets); ++i)
294 EXPECT_EQ(offsets[i], parser.m_ObjectInfo[i].pos);
295 ASSERT_TRUE(CompareArray(parser.m_V5Type, types, FX_ArraySize(types)));
296 }
297 {
298 const unsigned char xref_table[] =
299 "xref \n"
300 "0 1 \n"
301 "0000000000 65535 f \n"
302 "3 1 \n"
303 "0000025325 00000 n \n"
304 "8 2 \n"
305 "0000025518 00002 n \n"
306 "0000025635 00000 n \n"
307 "12 1 \n"
308 "0000025777 00000 n \n"
309 "trail"; // Needed to end cross ref table reading.
310 CPDF_TestParser parser;
311 ASSERT_TRUE(
312 parser.InitTestFromBuffer(xref_table, FX_ArraySize(xref_table)));
313
314 ASSERT_TRUE(parser.LoadCrossRefV4(0, 0, FALSE));
315 const FX_FILESIZE offsets[] = {0, 0, 0, 25325, 0, 0, 0,
316 0, 25518, 25635, 0, 0, 25777};
317 const uint8_t types[] = {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1};
318 for (size_t i = 0; i < FX_ArraySize(offsets); ++i)
319 EXPECT_EQ(offsets[i], parser.m_ObjectInfo[i].pos);
320 ASSERT_TRUE(CompareArray(parser.m_V5Type, types, FX_ArraySize(types)));
321 }
322 {
323 const unsigned char xref_table[] =
324 "xref \n"
325 "0 1 \n"
326 "0000000000 65535 f \n"
327 "3 1 \n"
328 "0000025325 00000 n \n"
329 "8 2 \n"
330 "0000000000 65535 f \n"
331 "0000025635 00000 n \n"
332 "12 1 \n"
333 "0000025777 00000 n \n"
334 "trail"; // Needed to end cross ref table reading.
335 CPDF_TestParser parser;
336 ASSERT_TRUE(
337 parser.InitTestFromBuffer(xref_table, FX_ArraySize(xref_table)));
338
339 ASSERT_TRUE(parser.LoadCrossRefV4(0, 0, FALSE));
340 const FX_FILESIZE offsets[] = {0, 0, 0, 25325, 0, 0, 0,
341 0, 0, 25635, 0, 0, 25777};
342 const uint8_t types[] = {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1};
343 for (size_t i = 0; i < FX_ArraySize(offsets); ++i)
344 EXPECT_EQ(offsets[i], parser.m_ObjectInfo[i].pos);
345 ASSERT_TRUE(CompareArray(parser.m_V5Type, types, FX_ArraySize(types)));
346 }
347 {
348 const unsigned char xref_table[] =
349 "xref \n"
350 "0 7 \n"
351 "0000000002 65535 f \n"
352 "0000000023 00000 n \n"
353 "0000000003 65535 f \n"
354 "0000000004 65535 f \n"
355 "0000000000 65535 f \n"
356 "0000000045 00000 n \n"
357 "0000000179 00000 n \n"
358 "trail"; // Needed to end cross ref table reading.
359 CPDF_TestParser parser;
360 ASSERT_TRUE(
361 parser.InitTestFromBuffer(xref_table, FX_ArraySize(xref_table)));
362
363 ASSERT_TRUE(parser.LoadCrossRefV4(0, 0, FALSE));
364 const FX_FILESIZE offsets[] = {0, 23, 0, 0, 0, 45, 179};
365 const uint8_t types[] = {0, 1, 0, 0, 0, 1, 1};
366 for (size_t i = 0; i < FX_ArraySize(offsets); ++i)
367 EXPECT_EQ(offsets[i], parser.m_ObjectInfo[i].pos);
368 ASSERT_TRUE(CompareArray(parser.m_V5Type, types, FX_ArraySize(types)));
369 }
370 }
371