1 // Copyright 2015 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "core/include/fpdfapi/fpdf_parser.h"
6 #include "core/include/fxcrt/fx_stream.h"
7 #include "testing/gtest/include/gtest/gtest.h"
8 #include "testing/utils/path_service.h"
9 
10 // Functions to help test an array's content against expected results.
11 template <class TYPE>
CompareArray(const CFX_ArrayTemplate<TYPE> & array1,const TYPE * array2,size_t size)12 bool CompareArray(const CFX_ArrayTemplate<TYPE>& array1,
13                   const TYPE* array2,
14                   size_t size) {
15   if (array1.GetSize() != size)
16     return false;
17 
18   for (int i = 0; i < size; ++i)
19     if (array1.GetAt(i) != array2[i])
20       return false;
21   return true;
22 }
23 
24 // Provide a way to read test data from a buffer instead of a file.
25 class CFX_TestBufferRead : public IFX_FileRead {
26  public:
CFX_TestBufferRead(const unsigned char * buffer_in,size_t buf_size)27   CFX_TestBufferRead(const unsigned char* buffer_in, size_t buf_size)
28       : buffer_(buffer_in), total_size_(buf_size) {}
29 
30   // IFX_Stream
Release()31   void Release() override { delete this; }
32 
33   // IFX_FileRead
ReadBlock(void * buffer,FX_FILESIZE offset,size_t size)34   FX_BOOL ReadBlock(void* buffer, FX_FILESIZE offset, size_t size) override {
35     if (offset < 0 || offset + size > total_size_) {
36       return FALSE;
37     }
38 
39     memcpy(buffer, buffer_ + offset, size);
40     return TRUE;
41   }
GetSize()42   FX_FILESIZE GetSize() override { return (FX_FILESIZE)total_size_; };
43 
44  protected:
45   const unsigned char* buffer_;
46   size_t total_size_;
47 };
48 
49 // A wrapper class to help test member functions of CPDF_Parser.
50 class CPDF_TestParser : public CPDF_Parser {
51  public:
CPDF_TestParser()52   CPDF_TestParser() {}
~CPDF_TestParser()53   ~CPDF_TestParser() {}
54 
55   // Setup reading from a file and initial states.
InitTestFromFile(const FX_CHAR * path)56   bool InitTestFromFile(const FX_CHAR* path) {
57     IFX_FileRead* pFileAccess = FX_CreateFileRead(path);
58     if (!pFileAccess)
59       return false;
60 
61     // For the test file, the header is set at the beginning.
62     m_Syntax.InitParser(pFileAccess, 0);
63     return true;
64   }
65 
66   // Setup reading from a buffer and initial states.
InitTestFromBuffer(const unsigned char * buffer,size_t len)67   bool InitTestFromBuffer(const unsigned char* buffer, size_t len) {
68     CFX_TestBufferRead* buffer_reader = new CFX_TestBufferRead(buffer, len);
69 
70     // For the test file, the header is set at the beginning.
71     m_Syntax.InitParser(buffer_reader, 0);
72     return true;
73   }
74 
75  private:
76   // Add test cases here as private friend so that protected members in
77   // CPDF_Parser can be accessed by test cases.
78   // Need to access RebuildCrossRef.
79   FRIEND_TEST(fpdf_parser_parser, RebuildCrossRefCorrectly);
80   FRIEND_TEST(fpdf_parser_parser, RebuildCrossRefFailed);
81   // Need to access LoadCrossRefV4.
82   FRIEND_TEST(fpdf_parser_parser, LoadCrossRefV4);
83 };
84 
85 // TODO(thestig) Using unique_ptr with ReleaseDeleter is still not ideal.
86 // Come up or wait for something better.
87 using ScopedFileStream =
88     std::unique_ptr<IFX_FileStream, ReleaseDeleter<IFX_FileStream>>;
89 
TEST(fpdf_parser_parser,ReadHexString)90 TEST(fpdf_parser_parser, ReadHexString) {
91   {
92     // Empty string.
93     uint8_t data[] = "";
94     ScopedFileStream stream(FX_CreateMemoryStream(data, 0, FALSE));
95 
96     CPDF_SyntaxParser parser;
97     parser.InitParser(stream.get(), 0);
98     EXPECT_EQ("", parser.ReadHexString());
99     EXPECT_EQ(0, parser.SavePos());
100   }
101 
102   {
103     // Blank string.
104     uint8_t data[] = "  ";
105     ScopedFileStream stream(FX_CreateMemoryStream(data, 2, FALSE));
106 
107     CPDF_SyntaxParser parser;
108     parser.InitParser(stream.get(), 0);
109     EXPECT_EQ("", parser.ReadHexString());
110     EXPECT_EQ(2, parser.SavePos());
111   }
112 
113   {
114     // Skips unknown characters.
115     uint8_t data[] = "z12b";
116     ScopedFileStream stream(FX_CreateMemoryStream(data, 4, FALSE));
117 
118     CPDF_SyntaxParser parser;
119     parser.InitParser(stream.get(), 0);
120     EXPECT_EQ("\x12\xb0", parser.ReadHexString());
121     EXPECT_EQ(4, parser.SavePos());
122   }
123 
124   {
125     // Skips unknown characters.
126     uint8_t data[] = "*<&*#$^&@1";
127     ScopedFileStream stream(FX_CreateMemoryStream(data, 10, FALSE));
128 
129     CPDF_SyntaxParser parser;
130     parser.InitParser(stream.get(), 0);
131     EXPECT_EQ("\x10", parser.ReadHexString());
132     EXPECT_EQ(10, parser.SavePos());
133   }
134 
135   {
136     // Skips unknown characters.
137     uint8_t data[] = "\x80zab";
138     ScopedFileStream stream(FX_CreateMemoryStream(data, 4, FALSE));
139 
140     CPDF_SyntaxParser parser;
141     parser.InitParser(stream.get(), 0);
142     EXPECT_EQ("\xab", parser.ReadHexString());
143     EXPECT_EQ(4, parser.SavePos());
144   }
145 
146   {
147     // Skips unknown characters.
148     uint8_t data[] = "\xffzab";
149     ScopedFileStream stream(FX_CreateMemoryStream(data, 4, FALSE));
150 
151     CPDF_SyntaxParser parser;
152     parser.InitParser(stream.get(), 0);
153     EXPECT_EQ("\xab", parser.ReadHexString());
154     EXPECT_EQ(4, parser.SavePos());
155   }
156 
157   {
158     // Regular conversion.
159     uint8_t data[] = "1A2b>abcd";
160     ScopedFileStream stream(FX_CreateMemoryStream(data, 9, FALSE));
161 
162     CPDF_SyntaxParser parser;
163     parser.InitParser(stream.get(), 0);
164     EXPECT_EQ("\x1a\x2b", parser.ReadHexString());
165     EXPECT_EQ(5, parser.SavePos());
166   }
167 
168   {
169     // Position out of bounds.
170     uint8_t data[] = "12ab>";
171     ScopedFileStream stream(FX_CreateMemoryStream(data, 5, FALSE));
172 
173     CPDF_SyntaxParser parser;
174     parser.InitParser(stream.get(), 0);
175     parser.RestorePos(5);
176     EXPECT_EQ("", parser.ReadHexString());
177 
178     parser.RestorePos(6);
179     EXPECT_EQ("", parser.ReadHexString());
180 
181     parser.RestorePos(-1);
182     EXPECT_EQ("", parser.ReadHexString());
183 
184     parser.RestorePos(std::numeric_limits<FX_FILESIZE>::max());
185     EXPECT_EQ("", parser.ReadHexString());
186 
187     // Check string still parses when set to 0.
188     parser.RestorePos(0);
189     EXPECT_EQ("\x12\xab", parser.ReadHexString());
190   }
191 
192   {
193     // Missing ending >.
194     uint8_t data[] = "1A2b";
195     ScopedFileStream stream(FX_CreateMemoryStream(data, 4, FALSE));
196 
197     CPDF_SyntaxParser parser;
198     parser.InitParser(stream.get(), 0);
199     EXPECT_EQ("\x1a\x2b", parser.ReadHexString());
200     EXPECT_EQ(4, parser.SavePos());
201   }
202 
203   {
204     // Missing ending >.
205     uint8_t data[] = "12abz";
206     ScopedFileStream stream(FX_CreateMemoryStream(data, 5, FALSE));
207 
208     CPDF_SyntaxParser parser;
209     parser.InitParser(stream.get(), 0);
210     EXPECT_EQ("\x12\xab", parser.ReadHexString());
211     EXPECT_EQ(5, parser.SavePos());
212   }
213 
214   {
215     // Uneven number of bytes.
216     uint8_t data[] = "1A2>asdf";
217     ScopedFileStream stream(FX_CreateMemoryStream(data, 8, FALSE));
218 
219     CPDF_SyntaxParser parser;
220     parser.InitParser(stream.get(), 0);
221     EXPECT_EQ("\x1a\x20", parser.ReadHexString());
222     EXPECT_EQ(4, parser.SavePos());
223   }
224 
225   {
226     // Uneven number of bytes.
227     uint8_t data[] = "1A2zasdf";
228     ScopedFileStream stream(FX_CreateMemoryStream(data, 8, FALSE));
229 
230     CPDF_SyntaxParser parser;
231     parser.InitParser(stream.get(), 0);
232     EXPECT_EQ("\x1a\x2a\xdf", parser.ReadHexString());
233     EXPECT_EQ(8, parser.SavePos());
234   }
235 
236   {
237     // Just ending character.
238     uint8_t data[] = ">";
239     ScopedFileStream stream(FX_CreateMemoryStream(data, 1, FALSE));
240 
241     CPDF_SyntaxParser parser;
242     parser.InitParser(stream.get(), 0);
243     EXPECT_EQ("", parser.ReadHexString());
244     EXPECT_EQ(1, parser.SavePos());
245   }
246 }
247 
TEST(fpdf_parser_parser,RebuildCrossRefCorrectly)248 TEST(fpdf_parser_parser, RebuildCrossRefCorrectly) {
249   CPDF_TestParser parser;
250   std::string test_file;
251   ASSERT_TRUE(PathService::GetTestFilePath("parser_rebuildxref_correct.pdf",
252                                            &test_file));
253   ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file;
254 
255   ASSERT_TRUE(parser.RebuildCrossRef());
256   const FX_FILESIZE offsets[] = {0, 15, 61, 154, 296, 374, 450};
257   const FX_WORD versions[] = {0, 0, 2, 4, 6, 8, 0};
258   for (size_t i = 0; i < FX_ArraySize(offsets); ++i)
259     EXPECT_EQ(offsets[i], parser.m_ObjectInfo[i].pos);
260   ASSERT_TRUE(
261       CompareArray(parser.m_ObjVersion, versions, FX_ArraySize(versions)));
262 }
263 
TEST(fpdf_parser_parser,RebuildCrossRefFailed)264 TEST(fpdf_parser_parser, RebuildCrossRefFailed) {
265   CPDF_TestParser parser;
266   std::string test_file;
267   ASSERT_TRUE(PathService::GetTestFilePath(
268       "parser_rebuildxref_error_notrailer.pdf", &test_file));
269   ASSERT_TRUE(parser.InitTestFromFile(test_file.c_str())) << test_file;
270 
271   ASSERT_FALSE(parser.RebuildCrossRef());
272 }
273 
TEST(fpdf_parser_parser,LoadCrossRefV4)274 TEST(fpdf_parser_parser, LoadCrossRefV4) {
275   {
276     const unsigned char xref_table[] =
277         "xref \n"
278         "0 6 \n"
279         "0000000003 65535 f \n"
280         "0000000017 00000 n \n"
281         "0000000081 00000 n \n"
282         "0000000000 00007 f \n"
283         "0000000331 00000 n \n"
284         "0000000409 00000 n \n"
285         "trail";  // Needed to end cross ref table reading.
286     CPDF_TestParser parser;
287     ASSERT_TRUE(
288         parser.InitTestFromBuffer(xref_table, FX_ArraySize(xref_table)));
289 
290     ASSERT_TRUE(parser.LoadCrossRefV4(0, 0, FALSE));
291     const FX_FILESIZE offsets[] = {0, 17, 81, 0, 331, 409};
292     const uint8_t types[] = {0, 1, 1, 0, 1, 1};
293     for (size_t i = 0; i < FX_ArraySize(offsets); ++i)
294       EXPECT_EQ(offsets[i], parser.m_ObjectInfo[i].pos);
295     ASSERT_TRUE(CompareArray(parser.m_V5Type, types, FX_ArraySize(types)));
296   }
297   {
298     const unsigned char xref_table[] =
299         "xref \n"
300         "0 1 \n"
301         "0000000000 65535 f \n"
302         "3 1 \n"
303         "0000025325 00000 n \n"
304         "8 2 \n"
305         "0000025518 00002 n \n"
306         "0000025635 00000 n \n"
307         "12 1 \n"
308         "0000025777 00000 n \n"
309         "trail";  // Needed to end cross ref table reading.
310     CPDF_TestParser parser;
311     ASSERT_TRUE(
312         parser.InitTestFromBuffer(xref_table, FX_ArraySize(xref_table)));
313 
314     ASSERT_TRUE(parser.LoadCrossRefV4(0, 0, FALSE));
315     const FX_FILESIZE offsets[] = {0, 0,     0,     25325, 0, 0,    0,
316                                    0, 25518, 25635, 0,     0, 25777};
317     const uint8_t types[] = {0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1};
318     for (size_t i = 0; i < FX_ArraySize(offsets); ++i)
319       EXPECT_EQ(offsets[i], parser.m_ObjectInfo[i].pos);
320     ASSERT_TRUE(CompareArray(parser.m_V5Type, types, FX_ArraySize(types)));
321   }
322   {
323     const unsigned char xref_table[] =
324         "xref \n"
325         "0 1 \n"
326         "0000000000 65535 f \n"
327         "3 1 \n"
328         "0000025325 00000 n \n"
329         "8 2 \n"
330         "0000000000 65535 f \n"
331         "0000025635 00000 n \n"
332         "12 1 \n"
333         "0000025777 00000 n \n"
334         "trail";  // Needed to end cross ref table reading.
335     CPDF_TestParser parser;
336     ASSERT_TRUE(
337         parser.InitTestFromBuffer(xref_table, FX_ArraySize(xref_table)));
338 
339     ASSERT_TRUE(parser.LoadCrossRefV4(0, 0, FALSE));
340     const FX_FILESIZE offsets[] = {0, 0, 0,     25325, 0, 0,    0,
341                                    0, 0, 25635, 0,     0, 25777};
342     const uint8_t types[] = {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1};
343     for (size_t i = 0; i < FX_ArraySize(offsets); ++i)
344       EXPECT_EQ(offsets[i], parser.m_ObjectInfo[i].pos);
345     ASSERT_TRUE(CompareArray(parser.m_V5Type, types, FX_ArraySize(types)));
346   }
347   {
348     const unsigned char xref_table[] =
349         "xref \n"
350         "0 7 \n"
351         "0000000002 65535 f \n"
352         "0000000023 00000 n \n"
353         "0000000003 65535 f \n"
354         "0000000004 65535 f \n"
355         "0000000000 65535 f \n"
356         "0000000045 00000 n \n"
357         "0000000179 00000 n \n"
358         "trail";  // Needed to end cross ref table reading.
359     CPDF_TestParser parser;
360     ASSERT_TRUE(
361         parser.InitTestFromBuffer(xref_table, FX_ArraySize(xref_table)));
362 
363     ASSERT_TRUE(parser.LoadCrossRefV4(0, 0, FALSE));
364     const FX_FILESIZE offsets[] = {0, 23, 0, 0, 0, 45, 179};
365     const uint8_t types[] = {0, 1, 0, 0, 0, 1, 1};
366     for (size_t i = 0; i < FX_ArraySize(offsets); ++i)
367       EXPECT_EQ(offsets[i], parser.m_ObjectInfo[i].pos);
368     ASSERT_TRUE(CompareArray(parser.m_V5Type, types, FX_ArraySize(types)));
369   }
370 }
371