1 // Copyright 2015 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <algorithm> 6 #include <memory> 7 #include <string> 8 #include <utility> 9 #include <vector> 10 11 #include "core/fxcrt/bytestring.h" 12 #include "core/fxcrt/widestring.h" 13 #include "public/fpdfview.h" 14 #include "testing/embedder_test.h" 15 #include "testing/gtest/include/gtest/gtest.h" 16 #include "testing/range_set.h" 17 #include "testing/utils/file_util.h" 18 #include "testing/utils/path_service.h" 19 20 namespace { 21 22 class MockDownloadHints final : public FX_DOWNLOADHINTS { 23 public: SAddSegment(FX_DOWNLOADHINTS * pThis,size_t offset,size_t size)24 static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) { 25 } 26 MockDownloadHints()27 MockDownloadHints() { 28 FX_DOWNLOADHINTS::version = 1; 29 FX_DOWNLOADHINTS::AddSegment = SAddSegment; 30 } 31 32 ~MockDownloadHints() = default; 33 }; 34 35 class TestAsyncLoader final : public FX_DOWNLOADHINTS, FX_FILEAVAIL { 36 public: TestAsyncLoader(const std::string & file_name)37 explicit TestAsyncLoader(const std::string& file_name) { 38 std::string file_path; 39 if (!PathService::GetTestFilePath(file_name, &file_path)) 40 return; 41 file_contents_ = GetFileContents(file_path.c_str(), &file_length_); 42 if (!file_contents_) 43 return; 44 45 file_access_.m_FileLen = static_cast<unsigned long>(file_length_); 46 file_access_.m_GetBlock = SGetBlock; 47 file_access_.m_Param = this; 48 49 FX_DOWNLOADHINTS::version = 1; 50 FX_DOWNLOADHINTS::AddSegment = SAddSegment; 51 52 FX_FILEAVAIL::version = 1; 53 FX_FILEAVAIL::IsDataAvail = SIsDataAvail; 54 } 55 IsOpened() const56 bool IsOpened() const { return !!file_contents_; } 57 file_access()58 FPDF_FILEACCESS* file_access() { return &file_access_; } hints()59 FX_DOWNLOADHINTS* hints() { return this; } file_avail()60 FX_FILEAVAIL* file_avail() { return this; } 61 requested_segments() const62 const std::vector<std::pair<size_t, size_t>>& requested_segments() const { 63 return requested_segments_; 64 } 65 max_requested_bound() const66 size_t max_requested_bound() const { return max_requested_bound_; } 67 ClearRequestedSegments()68 void ClearRequestedSegments() { 69 requested_segments_.clear(); 70 max_requested_bound_ = 0; 71 } 72 is_new_data_available() const73 bool is_new_data_available() const { return is_new_data_available_; } set_is_new_data_available(bool is_new_data_available)74 void set_is_new_data_available(bool is_new_data_available) { 75 is_new_data_available_ = is_new_data_available; 76 } 77 max_already_available_bound() const78 size_t max_already_available_bound() const { 79 return available_ranges_.IsEmpty() 80 ? 0 81 : available_ranges_.ranges().rbegin()->second; 82 } 83 FlushRequestedData()84 void FlushRequestedData() { 85 for (const auto& it : requested_segments_) { 86 SetDataAvailable(it.first, it.second); 87 } 88 ClearRequestedSegments(); 89 } 90 file_contents()91 char* file_contents() { return file_contents_.get(); } file_length() const92 size_t file_length() const { return file_length_; } 93 94 private: SetDataAvailable(size_t start,size_t size)95 void SetDataAvailable(size_t start, size_t size) { 96 available_ranges_.Union(RangeSet::Range(start, start + size)); 97 } 98 CheckDataAlreadyAvailable(size_t start,size_t size) const99 bool CheckDataAlreadyAvailable(size_t start, size_t size) const { 100 return available_ranges_.Contains(RangeSet::Range(start, start + size)); 101 } 102 GetBlockImpl(unsigned long pos,unsigned char * pBuf,unsigned long size)103 int GetBlockImpl(unsigned long pos, unsigned char* pBuf, unsigned long size) { 104 if (!IsDataAvailImpl(pos, size)) 105 return 0; 106 const unsigned long end = 107 std::min(static_cast<unsigned long>(file_length_), pos + size); 108 if (end <= pos) 109 return 0; 110 memcpy(pBuf, file_contents_.get() + pos, end - pos); 111 SetDataAvailable(pos, end - pos); 112 return static_cast<int>(end - pos); 113 } 114 AddSegmentImpl(size_t offset,size_t size)115 void AddSegmentImpl(size_t offset, size_t size) { 116 requested_segments_.push_back(std::make_pair(offset, size)); 117 max_requested_bound_ = std::max(max_requested_bound_, offset + size); 118 } 119 IsDataAvailImpl(size_t offset,size_t size)120 bool IsDataAvailImpl(size_t offset, size_t size) { 121 if (offset + size > file_length_) 122 return false; 123 if (is_new_data_available_) { 124 SetDataAvailable(offset, size); 125 return true; 126 } 127 return CheckDataAlreadyAvailable(offset, size); 128 } 129 SGetBlock(void * param,unsigned long pos,unsigned char * pBuf,unsigned long size)130 static int SGetBlock(void* param, 131 unsigned long pos, 132 unsigned char* pBuf, 133 unsigned long size) { 134 return static_cast<TestAsyncLoader*>(param)->GetBlockImpl(pos, pBuf, size); 135 } 136 SAddSegment(FX_DOWNLOADHINTS * pThis,size_t offset,size_t size)137 static void SAddSegment(FX_DOWNLOADHINTS* pThis, size_t offset, size_t size) { 138 return static_cast<TestAsyncLoader*>(pThis)->AddSegmentImpl(offset, size); 139 } 140 SIsDataAvail(FX_FILEAVAIL * pThis,size_t offset,size_t size)141 static FPDF_BOOL SIsDataAvail(FX_FILEAVAIL* pThis, 142 size_t offset, 143 size_t size) { 144 return static_cast<TestAsyncLoader*>(pThis)->IsDataAvailImpl(offset, size); 145 } 146 147 FPDF_FILEACCESS file_access_; 148 149 std::unique_ptr<char, pdfium::FreeDeleter> file_contents_; 150 size_t file_length_ = 0; 151 std::vector<std::pair<size_t, size_t>> requested_segments_; 152 size_t max_requested_bound_ = 0; 153 bool is_new_data_available_ = true; 154 155 RangeSet available_ranges_; 156 }; 157 158 } // namespace 159 160 class FPDFDataAvailEmbedderTest : public EmbedderTest {}; 161 TEST_F(FPDFDataAvailEmbedderTest,TrailerUnterminated)162 TEST_F(FPDFDataAvailEmbedderTest, TrailerUnterminated) { 163 // Document must load without crashing but is too malformed to be available. 164 EXPECT_FALSE(OpenDocument("trailer_unterminated.pdf")); 165 MockDownloadHints hints; 166 EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints)); 167 } 168 TEST_F(FPDFDataAvailEmbedderTest,TrailerAsHexstring)169 TEST_F(FPDFDataAvailEmbedderTest, TrailerAsHexstring) { 170 // Document must load without crashing but is too malformed to be available. 171 EXPECT_FALSE(OpenDocument("trailer_as_hexstring.pdf")); 172 MockDownloadHints hints; 173 EXPECT_FALSE(FPDFAvail_IsDocAvail(avail_, &hints)); 174 } 175 TEST_F(FPDFDataAvailEmbedderTest,LoadUsingHintTables)176 TEST_F(FPDFDataAvailEmbedderTest, LoadUsingHintTables) { 177 TestAsyncLoader loader("feature_linearized_loading.pdf"); 178 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); 179 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints())); 180 document_ = FPDFAvail_GetDocument(avail_, nullptr); 181 ASSERT_TRUE(document_); 182 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints())); 183 184 // No new data available, to prevent load "Pages" node. 185 loader.set_is_new_data_available(false); 186 ScopedFPDFPage page(FPDF_LoadPage(document(), 1)); 187 EXPECT_TRUE(page); 188 } 189 TEST_F(FPDFDataAvailEmbedderTest,CheckFormAvailIfLinearized)190 TEST_F(FPDFDataAvailEmbedderTest, CheckFormAvailIfLinearized) { 191 TestAsyncLoader loader("feature_linearized_loading.pdf"); 192 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); 193 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints())); 194 document_ = FPDFAvail_GetDocument(avail_, nullptr); 195 ASSERT_TRUE(document_); 196 197 // Prevent access to non-requested data to coerce the parser to send new 198 // request for non available (non-requested before) data. 199 loader.set_is_new_data_available(false); 200 loader.ClearRequestedSegments(); 201 202 int status = PDF_FORM_NOTAVAIL; 203 while (status == PDF_FORM_NOTAVAIL) { 204 loader.FlushRequestedData(); 205 status = FPDFAvail_IsFormAvail(avail_, loader.hints()); 206 } 207 EXPECT_NE(PDF_FORM_ERROR, status); 208 } 209 TEST_F(FPDFDataAvailEmbedderTest,DoNotLoadMainCrossRefForFirstPageIfLinearized)210 TEST_F(FPDFDataAvailEmbedderTest, 211 DoNotLoadMainCrossRefForFirstPageIfLinearized) { 212 TestAsyncLoader loader("feature_linearized_loading.pdf"); 213 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); 214 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints())); 215 document_ = FPDFAvail_GetDocument(avail_, nullptr); 216 ASSERT_TRUE(document_); 217 const int first_page_num = FPDFAvail_GetFirstPageNum(document_); 218 219 // The main cross ref table should not be processed. 220 // (It is always at file end) 221 EXPECT_GT(loader.file_access()->m_FileLen, 222 loader.max_already_available_bound()); 223 224 // Prevent access to non-requested data to coerce the parser to send new 225 // request for non available (non-requested before) data. 226 loader.set_is_new_data_available(false); 227 FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints()); 228 229 // The main cross ref table should not be requested. 230 // (It is always at file end) 231 EXPECT_GT(loader.file_access()->m_FileLen, loader.max_requested_bound()); 232 233 // Allow parse page. 234 loader.set_is_new_data_available(true); 235 ASSERT_EQ(PDF_DATA_AVAIL, 236 FPDFAvail_IsPageAvail(avail_, first_page_num, loader.hints())); 237 238 // The main cross ref table should not be processed. 239 // (It is always at file end) 240 EXPECT_GT(loader.file_access()->m_FileLen, 241 loader.max_already_available_bound()); 242 243 // Prevent loading data, while page loading. 244 loader.set_is_new_data_available(false); 245 ScopedFPDFPage page(FPDF_LoadPage(document(), first_page_num)); 246 EXPECT_TRUE(page); 247 } 248 TEST_F(FPDFDataAvailEmbedderTest,LoadSecondPageIfLinearizedWithHints)249 TEST_F(FPDFDataAvailEmbedderTest, LoadSecondPageIfLinearizedWithHints) { 250 TestAsyncLoader loader("feature_linearized_loading.pdf"); 251 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); 252 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints())); 253 document_ = FPDFAvail_GetDocument(avail_, nullptr); 254 ASSERT_TRUE(document_); 255 256 static constexpr uint32_t kSecondPageNum = 1; 257 258 // Prevent access to non-requested data to coerce the parser to send new 259 // request for non available (non-requested before) data. 260 loader.set_is_new_data_available(false); 261 loader.ClearRequestedSegments(); 262 263 int status = PDF_DATA_NOTAVAIL; 264 while (status == PDF_DATA_NOTAVAIL) { 265 loader.FlushRequestedData(); 266 status = FPDFAvail_IsPageAvail(avail_, kSecondPageNum, loader.hints()); 267 } 268 EXPECT_EQ(PDF_DATA_AVAIL, status); 269 270 // Prevent loading data, while page loading. 271 loader.set_is_new_data_available(false); 272 ScopedFPDFPage page(FPDF_LoadPage(document(), kSecondPageNum)); 273 EXPECT_TRUE(page); 274 } 275 TEST_F(FPDFDataAvailEmbedderTest,LoadInfoAfterReceivingWholeDocument)276 TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingWholeDocument) { 277 TestAsyncLoader loader("linearized.pdf"); 278 loader.set_is_new_data_available(false); 279 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); 280 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) { 281 loader.FlushRequestedData(); 282 } 283 284 document_ = FPDFAvail_GetDocument(avail_, nullptr); 285 ASSERT_TRUE(document_); 286 287 // The "info" dictionary should still be unavailable. 288 EXPECT_FALSE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0)); 289 290 // Simulate receiving whole file. 291 loader.set_is_new_data_available(true); 292 // Load second page, to parse additional crossref sections. 293 EXPECT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 1, loader.hints())); 294 295 EXPECT_TRUE(FPDF_GetMetaText(document_, "CreationDate", nullptr, 0)); 296 } 297 TEST_F(FPDFDataAvailEmbedderTest,LoadInfoAfterReceivingFirstPage)298 TEST_F(FPDFDataAvailEmbedderTest, LoadInfoAfterReceivingFirstPage) { 299 TestAsyncLoader loader("linearized.pdf"); 300 // Map "Info" to an object within the first section without breaking 301 // linearization. 302 ByteString data(loader.file_contents(), loader.file_length()); 303 Optional<size_t> index = data.Find("/Info 27 0 R"); 304 ASSERT_TRUE(index); 305 memcpy(loader.file_contents() + *index, "/Info 29 0 R", 12); 306 307 loader.set_is_new_data_available(false); 308 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); 309 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) { 310 loader.FlushRequestedData(); 311 } 312 313 document_ = FPDFAvail_GetDocument(avail_, nullptr); 314 ASSERT_TRUE(document_); 315 316 // The "Info" dictionary should be available for the linearized document, if 317 // it is located in the first page section. 318 // Info was remapped to a dictionary with Type "Catalog" 319 unsigned short buffer[100] = {0}; 320 EXPECT_TRUE(FPDF_GetMetaText(document_, "Type", buffer, sizeof(buffer))); 321 constexpr wchar_t kExpectedValue[] = L"Catalog"; 322 EXPECT_EQ(WideString(kExpectedValue), 323 WideString::FromUTF16LE(buffer, FXSYS_len(kExpectedValue))); 324 } 325 TEST_F(FPDFDataAvailEmbedderTest,TryLoadInvalidInfo)326 TEST_F(FPDFDataAvailEmbedderTest, TryLoadInvalidInfo) { 327 TestAsyncLoader loader("linearized.pdf"); 328 // Map "Info" to an invalid object without breaking linearization. 329 ByteString data(loader.file_contents(), loader.file_length()); 330 Optional<size_t> index = data.Find("/Info 27 0 R"); 331 ASSERT_TRUE(index); 332 memcpy(loader.file_contents() + *index, "/Info 99 0 R", 12); 333 334 loader.set_is_new_data_available(false); 335 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); 336 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) { 337 loader.FlushRequestedData(); 338 } 339 340 document_ = FPDFAvail_GetDocument(avail_, nullptr); 341 ASSERT_TRUE(document_); 342 343 // Set all data available. 344 loader.set_is_new_data_available(true); 345 // Check second page, to load additional crossrefs. 346 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints())); 347 348 // Test that api is robust enough to handle the bad case. 349 EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0)); 350 } 351 TEST_F(FPDFDataAvailEmbedderTest,TryLoadNonExistsInfo)352 TEST_F(FPDFDataAvailEmbedderTest, TryLoadNonExistsInfo) { 353 TestAsyncLoader loader("linearized.pdf"); 354 // Break the "Info" parameter without breaking linearization. 355 ByteString data(loader.file_contents(), loader.file_length()); 356 Optional<size_t> index = data.Find("/Info 27 0 R"); 357 ASSERT_TRUE(index); 358 memcpy(loader.file_contents() + *index, "/I_fo 27 0 R", 12); 359 360 loader.set_is_new_data_available(false); 361 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); 362 while (PDF_DATA_AVAIL != FPDFAvail_IsDocAvail(avail_, loader.hints())) { 363 loader.FlushRequestedData(); 364 } 365 366 document_ = FPDFAvail_GetDocument(avail_, nullptr); 367 ASSERT_TRUE(document_); 368 369 // Set all data available. 370 loader.set_is_new_data_available(true); 371 // Check second page, to load additional crossrefs. 372 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsPageAvail(avail_, 0, loader.hints())); 373 374 // Test that api is robust enough to handle the bad case. 375 EXPECT_FALSE(FPDF_GetMetaText(document_, "Type", nullptr, 0)); 376 } 377 TEST_F(FPDFDataAvailEmbedderTest,BadInputsToAPIs)378 TEST_F(FPDFDataAvailEmbedderTest, BadInputsToAPIs) { 379 EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsDocAvail(nullptr, nullptr)); 380 EXPECT_FALSE(FPDFAvail_GetDocument(nullptr, nullptr)); 381 EXPECT_EQ(0, FPDFAvail_GetFirstPageNum(nullptr)); 382 EXPECT_EQ(PDF_DATA_ERROR, FPDFAvail_IsPageAvail(nullptr, 0, nullptr)); 383 EXPECT_EQ(PDF_FORM_ERROR, FPDFAvail_IsFormAvail(nullptr, nullptr)); 384 EXPECT_EQ(PDF_LINEARIZATION_UNKNOWN, FPDFAvail_IsLinearized(nullptr)); 385 } 386 TEST_F(FPDFDataAvailEmbedderTest,NegativePageIndex)387 TEST_F(FPDFDataAvailEmbedderTest, NegativePageIndex) { 388 TestAsyncLoader loader("linearized.pdf"); 389 avail_ = FPDFAvail_Create(loader.file_avail(), loader.file_access()); 390 ASSERT_EQ(PDF_DATA_AVAIL, FPDFAvail_IsDocAvail(avail_, loader.hints())); 391 EXPECT_EQ(PDF_DATA_NOTAVAIL, 392 FPDFAvail_IsPageAvail(avail_, -1, loader.hints())); 393 } 394