• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // Copyright 2015 PDFium Authors. All rights reserved.
2  // Use of this source code is governed by a BSD-style license that can be
3  // found in the LICENSE file.
4  
5  #include <algorithm>
6  #include <memory>
7  #include <string>
8  #include <utility>
9  #include <vector>
10  
11  #include "core/fxcrt/bytestring.h"
12  #include "core/fxcrt/widestring.h"
13  #include "public/fpdfview.h"
14  #include "testing/embedder_test.h"
15  #include "testing/gtest/include/gtest/gtest.h"
16  #include "testing/range_set.h"
17  #include "testing/utils/file_util.h"
18  #include "testing/utils/path_service.h"
19  
20  namespace {
21  
22  class MockDownloadHints final : public FX_DOWNLOADHINTS {
23   public:
SAddSegment(FX_DOWNLOADHINTS * pThis,size_t offset,size_t size)24    static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
25    }
26  
MockDownloadHints()27    MockDownloadHints() {
28      FX_DOWNLOADHINTS::version = 1;
29      FX_DOWNLOADHINTS::AddSegment = SAddSegment;
30    }
31  
32    ~MockDownloadHints() = default;
33  };
34  
35  class TestAsyncLoader final : public FX_DOWNLOADHINTS, FX_FILEAVAIL {
36   public:
TestAsyncLoader(const std::string & file_name)37    explicit TestAsyncLoader(const std::string& file_name) {
38      std::string file_path;
39      if (!PathService::GetTestFilePath(file_name, &file_path))
40        return;
41      file_contents_ = GetFileContents(file_path.c_str(), &file_length_);
42      if (!file_contents_)
43        return;
44  
45      file_access_.m_FileLen = static_cast<unsigned long>(file_length_);
46      file_access_.m_GetBlock = SGetBlock;
47      file_access_.m_Param = this;
48  
49      FX_DOWNLOADHINTS::version = 1;
50      FX_DOWNLOADHINTS::AddSegment = SAddSegment;
51  
52      FX_FILEAVAIL::version = 1;
53      FX_FILEAVAIL::IsDataAvail = SIsDataAvail;
54    }
55  
IsOpened() const56    bool IsOpened() const { return !!file_contents_; }
57  
file_access()58    FPDF_FILEACCESS* file_access() { return &file_access_; }
hints()59    FX_DOWNLOADHINTS* hints() { return this; }
file_avail()60    FX_FILEAVAIL* file_avail() { return this; }
61  
requested_segments() const62    const std::vector<std::pair<size_t, size_t>>& requested_segments() const {
63      return requested_segments_;
64    }
65  
max_requested_bound() const66    size_t max_requested_bound() const { return max_requested_bound_; }
67  
ClearRequestedSegments()68    void ClearRequestedSegments() {
69      requested_segments_.clear();
70      max_requested_bound_ = 0;
71    }
72  
is_new_data_available() const73    bool is_new_data_available() const { return is_new_data_available_; }
set_is_new_data_available(bool is_new_data_available)74    void set_is_new_data_available(bool is_new_data_available) {
75      is_new_data_available_ = is_new_data_available;
76    }
77  
max_already_available_bound() const78    size_t max_already_available_bound() const {
79      return available_ranges_.IsEmpty()
80                 ? 0
81                 : available_ranges_.ranges().rbegin()->second;
82    }
83  
FlushRequestedData()84    void FlushRequestedData() {
85      for (const auto& it : requested_segments_) {
86        SetDataAvailable(it.first, it.second);
87      }
88      ClearRequestedSegments();
89    }
90  
file_contents()91    char* file_contents() { return file_contents_.get(); }
file_length() const92    size_t file_length() const { return file_length_; }
93  
94   private:
SetDataAvailable(size_t start,size_t size)95    void SetDataAvailable(size_t start, size_t size) {
96      available_ranges_.Union(RangeSet::Range(start, start + size));
97    }
98  
CheckDataAlreadyAvailable(size_t start,size_t size) const99    bool CheckDataAlreadyAvailable(size_t start, size_t size) const {
100      return available_ranges_.Contains(RangeSet::Range(start, start + size));
101    }
102  
GetBlockImpl(unsigned long pos,unsigned char * pBuf,unsigned long size)103    int GetBlockImpl(unsigned long pos, unsigned char* pBuf, unsigned long size) {
104      if (!IsDataAvailImpl(pos, size))
105        return 0;
106      const unsigned long end =
107          std::min(static_cast<unsigned long>(file_length_), pos + size);
108      if (end <= pos)
109        return 0;
110      memcpy(pBuf, file_contents_.get() + pos, end - pos);
111      SetDataAvailable(pos, end - pos);
112      return static_cast<int>(end - pos);
113    }
114  
AddSegmentImpl(size_t offset,size_t size)115    void AddSegmentImpl(size_t offset, size_t size) {
116      requested_segments_.push_back(std::make_pair(offset, size));
117      max_requested_bound_ = std::max(max_requested_bound_, offset + size);
118    }
119  
IsDataAvailImpl(size_t offset,size_t size)120    bool IsDataAvailImpl(size_t offset, size_t size) {
121      if (offset + size > file_length_)
122        return false;
123      if (is_new_data_available_) {
124        SetDataAvailable(offset, size);
125        return true;
126      }
127      return CheckDataAlreadyAvailable(offset, size);
128    }
129  
SGetBlock(void * param,unsigned long pos,unsigned char * pBuf,unsigned long size)130    static int SGetBlock(void* param,
131                         unsigned long pos,
132                         unsigned char* pBuf,
133                         unsigned long size) {
134      return static_cast<TestAsyncLoader*>(param)->GetBlockImpl(pos, pBuf, size);
135    }
136  
SAddSegment(FX_DOWNLOADHINTS * pThis,size_t offset,size_t size)137    static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) {
138      return static_cast<TestAsyncLoader*>(pThis)->AddSegmentImpl(offset, size);
139    }
140  
SIsDataAvail(FX_FILEAVAIL * pThis,size_t offset,size_t size)141    static FPDF_BOOL SIsDataAvail(FX_FILEAVAIL* pThis,
142                                  size_t offset,
143                                  size_t size) {
144      return static_cast<TestAsyncLoader*>(pThis)->IsDataAvailImpl(offset, size);
145    }
146  
147    FPDF_FILEACCESS file_access_;
148  
149    std::unique_ptr<char, pdfium::FreeDeleter> file_contents_;
150    size_t file_length_ = 0;
151    std::vector<std::pair<size_t, size_t>> requested_segments_;
152    size_t max_requested_bound_ = 0;
153    bool is_new_data_available_ = true;
154  
155    RangeSet available_ranges_;
156  };
157  
158  }  // namespace
159  
160  class FPDFDataAvailEmbedderTest : public EmbedderTest {};
161  
TEST_F(FPDFDataAvailEmbedderTest,TrailerUnterminated)162  TEST_F(FPDFDataAvailEmbedderTest, TrailerUnterminated) {
163    // Document must load without crashing but is too malformed to be available.
164    EXPECT_FALSE(OpenDocument("trailer_unterminated.pdf"));
165    MockDownloadHints hints;
166    EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints));
167  }
168  
TEST_F(FPDFDataAvailEmbedderTest,TrailerAsHexstring)169  TEST_F(FPDFDataAvailEmbedderTest, TrailerAsHexstring) {
170    // Document must load without crashing but is too malformed to be available.
171    EXPECT_FALSE(OpenDocument("trailer_as_hexstring.pdf"));
172    MockDownloadHints hints;
173    EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints));
174  }
175  
TEST_F(FPDFDataAvailEmbedderTest,LoadUsingHintTables)176  TEST_F(FPDFDataAvailEmbedderTest, LoadUsingHintTables) {
177    TestAsyncLoader loader("feature_linearized_loading.pdf");
178    avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
179    ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
180    document_ = FPDFAvail_GetDocument(avail_, nullptr);
181    ASSERT_TRUE(document_);
182    ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints()));
183  
184    // No new data available, to prevent load "Pages" node.
185    loader.set_is_new_data_available(false);
186    ScopedFPDFPage page(FPDF_LoadPage(document(), 1));
187    EXPECT_TRUE(page);
188  }
189  
TEST_F(FPDFDataAvailEmbedderTest,CheckFormAvailIfLinearized)190  TEST_F(FPDFDataAvailEmbedderTest, CheckFormAvailIfLinearized) {
191    TestAsyncLoader loader("feature_linearized_loading.pdf");
192    avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
193    ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
194    document_ = FPDFAvail_GetDocument(avail_, nullptr);
195    ASSERT_TRUE(document_);
196  
197    // Prevent access to non-requested data to coerce the parser to send new
198    // request for non available (non-requested before) data.
199    loader.set_is_new_data_available(false);
200    loader.ClearRequestedSegments();
201  
202    int status = PDF_FORM_NOTAVAIL;
203    while (status == PDF_FORM_NOTAVAIL) {
204      loader.FlushRequestedData();
205      status = FPDFAvail_IsFormAvail(avail_, loader.hints());
206    }
207    EXPECT_NE(PDF_FORM_ERROR, status);
208  }
209  
TEST_F(FPDFDataAvailEmbedderTest,DoNotLoadMainCrossRefForFirstPageIfLinearized)210  TEST_F(FPDFDataAvailEmbedderTest,
211         DoNotLoadMainCrossRefForFirstPageIfLinearized) {
212    TestAsyncLoader loader("feature_linearized_loading.pdf");
213    avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
214    ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
215    document_ = FPDFAvail_GetDocument(avail_, nullptr);
216    ASSERT_TRUE(document_);
217    const int first_page_num = FPDFAvail_GetFirstPageNum(document_);
218  
219    // The main cross ref table should not be processed.
220    // (It is always at file end)
221    EXPECT_GT(loader.file_access()->m_FileLen,
222              loader.max_already_available_bound());
223  
224    // Prevent access to non-requested data to coerce the parser to send new
225    // request for non available (non-requested before) data.
226    loader.set_is_new_data_available(false);
227    FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints());
228  
229    // The main cross ref table should not be requested.
230    // (It is always at file end)
231    EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound());
232  
233    // Allow parse page.
234    loader.set_is_new_data_available(true);
235    ASSERT_EQ(PDF_DATA_AVAIL,
236              FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints()));
237  
238    // The main cross ref table should not be processed.
239    // (It is always at file end)
240    EXPECT_GT(loader.file_access()->m_FileLen,
241              loader.max_already_available_bound());
242  
243    // Prevent loading data, while page loading.
244    loader.set_is_new_data_available(false);
245    ScopedFPDFPage page(FPDF_LoadPage(document(), first_page_num));
246    EXPECT_TRUE(page);
247  }
248  
TEST_F(FPDFDataAvailEmbedderTest,LoadSecondPageIfLinearizedWithHints)249  TEST_F(FPDFDataAvailEmbedderTest, LoadSecondPageIfLinearizedWithHints) {
250    TestAsyncLoader loader("feature_linearized_loading.pdf");
251    avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
252    ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
253    document_ = FPDFAvail_GetDocument(avail_, nullptr);
254    ASSERT_TRUE(document_);
255  
256    static constexpr uint32_t kSecondPageNum = 1;
257  
258    // Prevent access to non-requested data to coerce the parser to send new
259    // request for non available (non-requested before) data.
260    loader.set_is_new_data_available(false);
261    loader.ClearRequestedSegments();
262  
263    int status = PDF_DATA_NOTAVAIL;
264    while (status == PDF_DATA_NOTAVAIL) {
265      loader.FlushRequestedData();
266      status = FPDFAvail_IsPageAvail(avail_, kSecondPageNum, loader.hints());
267    }
268    EXPECT_EQ(PDF_DATA_AVAIL, status);
269  
270    // Prevent loading data, while page loading.
271    loader.set_is_new_data_available(false);
272    ScopedFPDFPage page(FPDF_LoadPage(document(), kSecondPageNum));
273    EXPECT_TRUE(page);
274  }
275  
TEST_F(FPDFDataAvailEmbedderTest,LoadInfoAfterReceivingWholeDocument)276  TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingWholeDocument) {
277    TestAsyncLoader loader("linearized.pdf");
278    loader.set_is_new_data_available(false);
279    avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
280    while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
281      loader.FlushRequestedData();
282    }
283  
284    document_ = FPDFAvail_GetDocument(avail_, nullptr);
285    ASSERT_TRUE(document_);
286  
287    // The "info" dictionary should still be unavailable.
288    EXPECT_FALSE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0));
289  
290    // Simulate receiving whole file.
291    loader.set_is_new_data_available(true);
292    // Load second page, to parse additional crossref sections.
293    EXPECT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints()));
294  
295    EXPECT_TRUE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0));
296  }
297  
TEST_F(FPDFDataAvailEmbedderTest,LoadInfoAfterReceivingFirstPage)298  TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingFirstPage) {
299    TestAsyncLoader loader("linearized.pdf");
300    // Map "Info" to an object within the first section without breaking
301    // linearization.
302    ByteString data(loader.file_contents(), loader.file_length());
303    Optional<size_t> index = data.Find("/Info 27 0 R");
304    ASSERT_TRUE(index);
305    memcpy(loader.file_contents() + *index, "/Info 29 0 R", 12);
306  
307    loader.set_is_new_data_available(false);
308    avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
309    while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
310      loader.FlushRequestedData();
311    }
312  
313    document_ = FPDFAvail_GetDocument(avail_, nullptr);
314    ASSERT_TRUE(document_);
315  
316    // The "Info" dictionary should be available for the linearized document, if
317    // it is located in the first page section.
318    // Info was remapped to a dictionary with Type "Catalog"
319    unsigned short buffer[100] = {0};
320    EXPECT_TRUE(FPDF_GetMetaText(document_, "Type", buffer, sizeof(buffer)));
321    constexpr wchar_t kExpectedValue[] = L"Catalog";
322    EXPECT_EQ(WideString(kExpectedValue),
323              WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedValue)));
324  }
325  
TEST_F(FPDFDataAvailEmbedderTest,TryLoadInvalidInfo)326  TEST_F(FPDFDataAvailEmbedderTest, TryLoadInvalidInfo) {
327    TestAsyncLoader loader("linearized.pdf");
328    // Map "Info" to an invalid object without breaking linearization.
329    ByteString data(loader.file_contents(), loader.file_length());
330    Optional<size_t> index = data.Find("/Info 27 0 R");
331    ASSERT_TRUE(index);
332    memcpy(loader.file_contents() + *index, "/Info 99 0 R", 12);
333  
334    loader.set_is_new_data_available(false);
335    avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
336    while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
337      loader.FlushRequestedData();
338    }
339  
340    document_ = FPDFAvail_GetDocument(avail_, nullptr);
341    ASSERT_TRUE(document_);
342  
343    // Set all data available.
344    loader.set_is_new_data_available(true);
345    // Check second page, to load additional crossrefs.
346    ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints()));
347  
348    // Test that api is robust enough to handle the bad case.
349    EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0));
350  }
351  
TEST_F(FPDFDataAvailEmbedderTest,TryLoadNonExistsInfo)352  TEST_F(FPDFDataAvailEmbedderTest, TryLoadNonExistsInfo) {
353    TestAsyncLoader loader("linearized.pdf");
354    // Break the "Info" parameter without breaking linearization.
355    ByteString data(loader.file_contents(), loader.file_length());
356    Optional<size_t> index = data.Find("/Info 27 0 R");
357    ASSERT_TRUE(index);
358    memcpy(loader.file_contents() + *index, "/I_fo 27 0 R", 12);
359  
360    loader.set_is_new_data_available(false);
361    avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
362    while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) {
363      loader.FlushRequestedData();
364    }
365  
366    document_ = FPDFAvail_GetDocument(avail_, nullptr);
367    ASSERT_TRUE(document_);
368  
369    // Set all data available.
370    loader.set_is_new_data_available(true);
371    // Check second page, to load additional crossrefs.
372    ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints()));
373  
374    // Test that api is robust enough to handle the bad case.
375    EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0));
376  }
377  
TEST_F(FPDFDataAvailEmbedderTest,BadInputsToAPIs)378  TEST_F(FPDFDataAvailEmbedderTest, BadInputsToAPIs) {
379    EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsDocAvail(nullptr, nullptr));
380    EXPECT_FALSE(FPDFAvail_GetDocument(nullptr, nullptr));
381    EXPECT_EQ(0, FPDFAvail_GetFirstPageNum(nullptr));
382    EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsPageAvail(nullptr, 0, nullptr));
383    EXPECT_EQ(PDF_FORM_ERROR, FPDFAvail_IsFormAvail(nullptr, nullptr));
384    EXPECT_EQ(PDF_LINEARIZATION_UNKNOWN, FPDFAvail_IsLinearized(nullptr));
385  }
386  
TEST_F(FPDFDataAvailEmbedderTest,NegativePageIndex)387  TEST_F(FPDFDataAvailEmbedderTest, NegativePageIndex) {
388    TestAsyncLoader loader("linearized.pdf");
389    avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access());
390    ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints()));
391    EXPECT_EQ(PDF_DATA_NOTAVAIL,
392              FPDFAvail_IsPageAvail(avail_, -1, loader.hints()));
393  }
394