1 // Copyright 2016 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6 
7 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
8 
9 #include <limits>
10 
11 #include "core/fpdfapi/parser/cpdf_array.h"
12 #include "core/fpdfapi/parser/cpdf_data_avail.h"
13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
14 #include "core/fpdfapi/parser/cpdf_document.h"
15 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
16 #include "core/fpdfapi/parser/cpdf_read_validator.h"
17 #include "core/fpdfapi/parser/cpdf_stream.h"
18 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
19 #include "core/fxcrt/cfx_bitstream.h"
20 #include "core/fxcrt/fx_safe_types.h"
21 #include "third_party/base/numerics/safe_conversions.h"
22 
23 namespace {
24 
CanReadFromBitStream(const CFX_BitStream * hStream,const FX_SAFE_UINT32 & bits)25 bool CanReadFromBitStream(const CFX_BitStream* hStream,
26                           const FX_SAFE_UINT32& bits) {
27   return bits.IsValid() && hStream->BitsRemaining() >= bits.ValueOrDie();
28 }
29 
30 // Sanity check values from the page table header. The note in the PDF 1.7
31 // reference for Table F.3 says the valid range is only 0 through 32. Though 0
32 // is not useful either.
IsValidPageOffsetHintTableBitCount(uint32_t bits)33 bool IsValidPageOffsetHintTableBitCount(uint32_t bits) {
34   return bits > 0 && bits <= 32;
35 }
36 
37 }  // namespace
38 
CPDF_HintTables(CPDF_ReadValidator * pValidator,CPDF_LinearizedHeader * pLinearized)39 CPDF_HintTables::CPDF_HintTables(CPDF_ReadValidator* pValidator,
40                                  CPDF_LinearizedHeader* pLinearized)
41     : m_pValidator(pValidator),
42       m_pLinearized(pLinearized),
43       m_nFirstPageSharedObjs(0),
44       m_szFirstPageObjOffset(0) {
45   ASSERT(m_pLinearized);
46 }
47 
~CPDF_HintTables()48 CPDF_HintTables::~CPDF_HintTables() {}
49 
GetItemLength(uint32_t index,const std::vector<FX_FILESIZE> & szArray) const50 uint32_t CPDF_HintTables::GetItemLength(
51     uint32_t index,
52     const std::vector<FX_FILESIZE>& szArray) const {
53   if (szArray.size() < 2 || index > szArray.size() - 2 ||
54       szArray[index] > szArray[index + 1]) {
55     return 0;
56   }
57   return szArray[index + 1] - szArray[index];
58 }
59 
ReadPageHintTable(CFX_BitStream * hStream)60 bool CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) {
61   if (!hStream || hStream->IsEOF())
62     return false;
63 
64   int nStreamOffset = ReadPrimaryHintStreamOffset();
65   if (nStreamOffset < 0)
66     return false;
67 
68   int nStreamLen = ReadPrimaryHintStreamLength();
69   if (nStreamLen < 1 ||
70       !pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(nStreamLen)) {
71     return false;
72   }
73 
74   const uint32_t kHeaderSize = 288;
75   if (hStream->BitsRemaining() < kHeaderSize)
76     return false;
77 
78   // Item 1: The least number of objects in a page.
79   const uint32_t dwObjLeastNum = hStream->GetBits(32);
80   if (!dwObjLeastNum)
81     return false;
82 
83   // Item 2: The location of the first page's page object.
84   const uint32_t dwFirstObjLoc = hStream->GetBits(32);
85   if (dwFirstObjLoc > static_cast<uint32_t>(nStreamOffset)) {
86     FX_SAFE_FILESIZE safeLoc = nStreamLen;
87     safeLoc += dwFirstObjLoc;
88     if (!safeLoc.IsValid())
89       return false;
90     m_szFirstPageObjOffset = safeLoc.ValueOrDie();
91   } else {
92     if (!pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(dwFirstObjLoc))
93       return false;
94     m_szFirstPageObjOffset = dwFirstObjLoc;
95   }
96 
97   // Item 3: The number of bits needed to represent the difference
98   // between the greatest and least number of objects in a page.
99   const uint32_t dwDeltaObjectsBits = hStream->GetBits(16);
100   if (!IsValidPageOffsetHintTableBitCount(dwDeltaObjectsBits))
101     return false;
102 
103   // Item 4: The least length of a page in bytes.
104   const uint32_t dwPageLeastLen = hStream->GetBits(32);
105   if (!dwPageLeastLen)
106     return false;
107 
108   // Item 5: The number of bits needed to represent the difference
109   // between the greatest and least length of a page, in bytes.
110   const uint32_t dwDeltaPageLenBits = hStream->GetBits(16);
111   if (!IsValidPageOffsetHintTableBitCount(dwDeltaPageLenBits))
112     return false;
113 
114   // Skip Item 6, 7, 8, 9 total 96 bits.
115   hStream->SkipBits(96);
116 
117   // Item 10: The number of bits needed to represent the greatest
118   // number of shared object references.
119   const uint32_t dwSharedObjBits = hStream->GetBits(16);
120   if (!IsValidPageOffsetHintTableBitCount(dwSharedObjBits))
121     return false;
122 
123   // Item 11: The number of bits needed to represent the numerically
124   // greatest shared object identifier used by the pages.
125   const uint32_t dwSharedIdBits = hStream->GetBits(16);
126   if (!IsValidPageOffsetHintTableBitCount(dwSharedIdBits))
127     return false;
128 
129   // Item 12: The number of bits needed to represent the numerator of
130   // the fractional position for each shared object reference. For each
131   // shared object referenced from a page, there is an indication of
132   // where in the page's content stream the object is first referenced.
133   const uint32_t dwSharedNumeratorBits = hStream->GetBits(16);
134   if (!IsValidPageOffsetHintTableBitCount(dwSharedNumeratorBits))
135     return false;
136 
137   // Item 13: Skip Item 13 which has 16 bits.
138   hStream->SkipBits(16);
139 
140   const int nPages = GetNumberOfPages();
141   if (nPages < 1 || nPages >= FPDF_PAGE_MAX_NUM)
142     return false;
143 
144   const uint32_t dwPages = pdfium::base::checked_cast<uint32_t>(nPages);
145   FX_SAFE_UINT32 required_bits = dwDeltaObjectsBits;
146   required_bits *= dwPages;
147   if (!CanReadFromBitStream(hStream, required_bits))
148     return false;
149 
150   for (int i = 0; i < nPages; ++i) {
151     FX_SAFE_UINT32 safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits);
152     safeDeltaObj += dwObjLeastNum;
153     if (!safeDeltaObj.IsValid())
154       return false;
155     m_dwDeltaNObjsArray.push_back(safeDeltaObj.ValueOrDie());
156   }
157   hStream->ByteAlign();
158 
159   required_bits = dwDeltaPageLenBits;
160   required_bits *= dwPages;
161   if (!CanReadFromBitStream(hStream, required_bits))
162     return false;
163 
164   std::vector<uint32_t> dwPageLenArray;
165   for (int i = 0; i < nPages; ++i) {
166     FX_SAFE_UINT32 safePageLen = hStream->GetBits(dwDeltaPageLenBits);
167     safePageLen += dwPageLeastLen;
168     if (!safePageLen.IsValid())
169       return false;
170 
171     dwPageLenArray.push_back(safePageLen.ValueOrDie());
172   }
173 
174   int nOffsetE = GetEndOfFirstPageOffset();
175   if (nOffsetE < 0)
176     return false;
177 
178   int nFirstPageNum = GetFirstPageNumber();
179   if (nFirstPageNum < 0 || nFirstPageNum > std::numeric_limits<int>::max() - 1)
180     return false;
181 
182   for (int i = 0; i < nPages; ++i) {
183     if (i == nFirstPageNum) {
184       m_szPageOffsetArray.push_back(m_szFirstPageObjOffset);
185     } else if (i == nFirstPageNum + 1) {
186       if (i == 1) {
187         m_szPageOffsetArray.push_back(nOffsetE);
188       } else {
189         m_szPageOffsetArray.push_back(m_szPageOffsetArray[i - 2] +
190                                       dwPageLenArray[i - 2]);
191       }
192     } else {
193       if (i == 0) {
194         m_szPageOffsetArray.push_back(nOffsetE);
195       } else {
196         m_szPageOffsetArray.push_back(m_szPageOffsetArray[i - 1] +
197                                       dwPageLenArray[i - 1]);
198       }
199     }
200   }
201 
202   m_szPageOffsetArray.push_back(m_szPageOffsetArray[nPages - 1] +
203                                 dwPageLenArray[nPages - 1]);
204   hStream->ByteAlign();
205 
206   // Number of shared objects.
207   required_bits = dwSharedObjBits;
208   required_bits *= dwPages;
209   if (!CanReadFromBitStream(hStream, required_bits))
210     return false;
211 
212   for (int i = 0; i < nPages; i++)
213     m_dwNSharedObjsArray.push_back(hStream->GetBits(dwSharedObjBits));
214   hStream->ByteAlign();
215 
216   // Array of identifiers, size = nshared_objects.
217   for (int i = 0; i < nPages; i++) {
218     required_bits = dwSharedIdBits;
219     required_bits *= m_dwNSharedObjsArray[i];
220     if (!CanReadFromBitStream(hStream, required_bits))
221       return false;
222 
223     for (uint32_t j = 0; j < m_dwNSharedObjsArray[i]; j++)
224       m_dwIdentifierArray.push_back(hStream->GetBits(dwSharedIdBits));
225   }
226   hStream->ByteAlign();
227 
228   for (int i = 0; i < nPages; i++) {
229     FX_SAFE_UINT32 safeSize = m_dwNSharedObjsArray[i];
230     safeSize *= dwSharedNumeratorBits;
231     if (!CanReadFromBitStream(hStream, safeSize))
232       return false;
233 
234     hStream->SkipBits(safeSize.ValueOrDie());
235   }
236   hStream->ByteAlign();
237 
238   FX_SAFE_UINT32 safeTotalPageLen = dwPages;
239   safeTotalPageLen *= dwDeltaPageLenBits;
240   if (!CanReadFromBitStream(hStream, safeTotalPageLen))
241     return false;
242 
243   hStream->SkipBits(safeTotalPageLen.ValueOrDie());
244   hStream->ByteAlign();
245   return true;
246 }
247 
ReadSharedObjHintTable(CFX_BitStream * hStream,uint32_t offset)248 bool CPDF_HintTables::ReadSharedObjHintTable(CFX_BitStream* hStream,
249                                              uint32_t offset) {
250   if (!hStream || hStream->IsEOF())
251     return false;
252 
253   int nStreamOffset = ReadPrimaryHintStreamOffset();
254   int nStreamLen = ReadPrimaryHintStreamLength();
255   if (nStreamOffset < 0 || nStreamLen < 1)
256     return false;
257 
258   FX_SAFE_UINT32 bit_offset = offset;
259   bit_offset *= 8;
260   if (!bit_offset.IsValid() || hStream->GetPos() > bit_offset.ValueOrDie())
261     return false;
262   hStream->SkipBits((bit_offset - hStream->GetPos()).ValueOrDie());
263 
264   const uint32_t kHeaderSize = 192;
265   if (hStream->BitsRemaining() < kHeaderSize)
266     return false;
267 
268   // Item 1: The object number of the first object in the shared objects
269   // section.
270   uint32_t dwFirstSharedObjNum = hStream->GetBits(32);
271 
272   // Item 2: The location of the first object in the shared objects section.
273   uint32_t dwFirstSharedObjLoc = hStream->GetBits(32);
274   if (dwFirstSharedObjLoc > static_cast<uint32_t>(nStreamOffset))
275     dwFirstSharedObjLoc += nStreamLen;
276 
277   // Item 3: The number of shared object entries for the first page.
278   m_nFirstPageSharedObjs = hStream->GetBits(32);
279 
280   // Item 4: The number of shared object entries for the shared objects
281   // section, including the number of shared object entries for the first page.
282   uint32_t dwSharedObjTotal = hStream->GetBits(32);
283 
284   // Item 5: The number of bits needed to represent the greatest number of
285   // objects in a shared object group. Skipped.
286   hStream->SkipBits(16);
287 
288   // Item 6: The least length of a shared object group in bytes.
289   uint32_t dwGroupLeastLen = hStream->GetBits(32);
290 
291   // Item 7: The number of bits needed to represent the difference between the
292   // greatest and least length of a shared object group, in bytes.
293   uint32_t dwDeltaGroupLen = hStream->GetBits(16);
294 
295   // Trying to decode more than 32 bits isn't going to work when we write into
296   // a uint32_t.
297   if (dwDeltaGroupLen > 31)
298     return false;
299 
300   if (dwFirstSharedObjNum >= CPDF_Parser::kMaxObjectNumber ||
301       m_nFirstPageSharedObjs >= CPDF_Parser::kMaxObjectNumber ||
302       dwSharedObjTotal >= CPDF_Parser::kMaxObjectNumber) {
303     return false;
304   }
305 
306   int nFirstPageObjNum = GetFirstPageObjectNumber();
307   if (nFirstPageObjNum < 0)
308     return false;
309 
310   uint32_t dwPrevObjLen = 0;
311   uint32_t dwCurObjLen = 0;
312   FX_SAFE_UINT32 required_bits = dwSharedObjTotal;
313   required_bits *= dwDeltaGroupLen;
314   if (!CanReadFromBitStream(hStream, required_bits))
315     return false;
316 
317   for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
318     dwPrevObjLen = dwCurObjLen;
319     FX_SAFE_UINT32 safeObjLen = hStream->GetBits(dwDeltaGroupLen);
320     safeObjLen += dwGroupLeastLen;
321     if (!safeObjLen.IsValid())
322       return false;
323 
324     dwCurObjLen = safeObjLen.ValueOrDie();
325     if (i < m_nFirstPageSharedObjs) {
326       m_dwSharedObjNumArray.push_back(nFirstPageObjNum + i);
327       if (i == 0)
328         m_szSharedObjOffsetArray.push_back(m_szFirstPageObjOffset);
329     } else {
330       FX_SAFE_UINT32 safeObjNum = dwFirstSharedObjNum;
331       safeObjNum += i - m_nFirstPageSharedObjs;
332       if (!safeObjNum.IsValid())
333         return false;
334 
335       m_dwSharedObjNumArray.push_back(safeObjNum.ValueOrDie());
336       if (i == m_nFirstPageSharedObjs) {
337         FX_SAFE_FILESIZE safeLoc = dwFirstSharedObjLoc;
338         if (!safeLoc.IsValid())
339           return false;
340 
341         m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie());
342       }
343     }
344 
345     if (i != 0 && i != m_nFirstPageSharedObjs) {
346       FX_SAFE_FILESIZE safeLoc = dwPrevObjLen;
347       safeLoc += m_szSharedObjOffsetArray[i - 1];
348       if (!safeLoc.IsValid())
349         return false;
350 
351       m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie());
352     }
353   }
354 
355   if (dwSharedObjTotal > 0) {
356     FX_SAFE_FILESIZE safeLoc = dwCurObjLen;
357     safeLoc += m_szSharedObjOffsetArray[dwSharedObjTotal - 1];
358     if (!safeLoc.IsValid())
359       return false;
360 
361     m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie());
362   }
363 
364   hStream->ByteAlign();
365   if (hStream->BitsRemaining() < dwSharedObjTotal)
366     return false;
367 
368   hStream->SkipBits(dwSharedObjTotal);
369   hStream->ByteAlign();
370   return true;
371 }
372 
GetPagePos(uint32_t index,FX_FILESIZE * szPageStartPos,FX_FILESIZE * szPageLength,uint32_t * dwObjNum) const373 bool CPDF_HintTables::GetPagePos(uint32_t index,
374                                  FX_FILESIZE* szPageStartPos,
375                                  FX_FILESIZE* szPageLength,
376                                  uint32_t* dwObjNum) const {
377   if (index >= m_pLinearized->GetPageCount())
378     return false;
379 
380   *szPageStartPos = m_szPageOffsetArray[index];
381   *szPageLength = GetItemLength(index, m_szPageOffsetArray);
382 
383   int nFirstPageObjNum = GetFirstPageObjectNumber();
384   if (nFirstPageObjNum < 0)
385     return false;
386 
387   int nFirstPageNum = GetFirstPageNumber();
388   if (!pdfium::base::IsValueInRangeForNumericType<uint32_t>(nFirstPageNum))
389     return false;
390 
391   uint32_t dwFirstPageNum = static_cast<uint32_t>(nFirstPageNum);
392   if (index == dwFirstPageNum) {
393     *dwObjNum = nFirstPageObjNum;
394     return true;
395   }
396 
397   // The object number of remaining pages starts from 1.
398   *dwObjNum = 1;
399   for (uint32_t i = 0; i < index; ++i) {
400     if (i == dwFirstPageNum)
401       continue;
402     *dwObjNum += m_dwDeltaNObjsArray[i];
403   }
404   return true;
405 }
406 
CheckPage(uint32_t index)407 CPDF_DataAvail::DocAvailStatus CPDF_HintTables::CheckPage(uint32_t index) {
408   int nFirstPageNum = GetFirstPageNumber();
409   if (!pdfium::base::IsValueInRangeForNumericType<uint32_t>(nFirstPageNum))
410     return CPDF_DataAvail::DataError;
411 
412   if (index == static_cast<uint32_t>(nFirstPageNum))
413     return CPDF_DataAvail::DataAvailable;
414 
415   uint32_t dwLength = GetItemLength(index, m_szPageOffsetArray);
416   // If two pages have the same offset, it should be treated as an error.
417   if (!dwLength)
418     return CPDF_DataAvail::DataError;
419 
420   if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
421           m_szPageOffsetArray[index], dwLength))
422     return CPDF_DataAvail::DataNotAvailable;
423 
424   // Download data of shared objects in the page.
425   uint32_t offset = 0;
426   for (uint32_t i = 0; i < index; ++i)
427     offset += m_dwNSharedObjsArray[i];
428 
429   int nFirstPageObjNum = GetFirstPageObjectNumber();
430   if (nFirstPageObjNum < 0)
431     return CPDF_DataAvail::DataError;
432 
433   uint32_t dwIndex = 0;
434   uint32_t dwObjNum = 0;
435   for (uint32_t j = 0; j < m_dwNSharedObjsArray[index]; ++j) {
436     dwIndex = m_dwIdentifierArray[offset + j];
437     if (dwIndex >= m_dwSharedObjNumArray.size())
438       return CPDF_DataAvail::DataNotAvailable;
439 
440     dwObjNum = m_dwSharedObjNumArray[dwIndex];
441     if (dwObjNum >= static_cast<uint32_t>(nFirstPageObjNum) &&
442         dwObjNum <
443             static_cast<uint32_t>(nFirstPageObjNum) + m_nFirstPageSharedObjs) {
444       continue;
445     }
446 
447     dwLength = GetItemLength(dwIndex, m_szSharedObjOffsetArray);
448     // If two objects have the same offset, it should be treated as an error.
449     if (!dwLength)
450       return CPDF_DataAvail::DataError;
451 
452     if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
453             m_szSharedObjOffsetArray[dwIndex], dwLength)) {
454       return CPDF_DataAvail::DataNotAvailable;
455     }
456   }
457   return CPDF_DataAvail::DataAvailable;
458 }
459 
LoadHintStream(CPDF_Stream * pHintStream)460 bool CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) {
461   if (!pHintStream)
462     return false;
463 
464   CPDF_Dictionary* pDict = pHintStream->GetDict();
465   CPDF_Object* pOffset = pDict ? pDict->GetObjectFor("S") : nullptr;
466   if (!pOffset || !pOffset->IsNumber())
467     return false;
468 
469   int shared_hint_table_offset = pOffset->GetInteger();
470   if (shared_hint_table_offset <= 0)
471     return false;
472 
473   auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pHintStream);
474   pAcc->LoadAllDataFiltered();
475 
476   uint32_t size = pAcc->GetSize();
477   // The header section of page offset hint table is 36 bytes.
478   // The header section of shared object hint table is 24 bytes.
479   // Hint table has at least 60 bytes.
480   const uint32_t kMinStreamLength = 60;
481   if (size < kMinStreamLength)
482     return false;
483 
484   FX_SAFE_UINT32 safe_shared_hint_table_offset = shared_hint_table_offset;
485   if (!safe_shared_hint_table_offset.IsValid() ||
486       size < safe_shared_hint_table_offset.ValueOrDie()) {
487     return false;
488   }
489 
490   CFX_BitStream bs(pAcc->GetData(), size);
491   return ReadPageHintTable(&bs) &&
492          ReadSharedObjHintTable(&bs, shared_hint_table_offset);
493 }
494 
GetEndOfFirstPageOffset() const495 int CPDF_HintTables::GetEndOfFirstPageOffset() const {
496   return static_cast<int>(m_pLinearized->GetFirstPageEndOffset());
497 }
498 
GetNumberOfPages() const499 int CPDF_HintTables::GetNumberOfPages() const {
500   return static_cast<int>(m_pLinearized->GetPageCount());
501 }
502 
GetFirstPageObjectNumber() const503 int CPDF_HintTables::GetFirstPageObjectNumber() const {
504   return static_cast<int>(m_pLinearized->GetFirstPageObjNum());
505 }
506 
GetFirstPageNumber() const507 int CPDF_HintTables::GetFirstPageNumber() const {
508   return static_cast<int>(m_pLinearized->GetFirstPageNo());
509 }
510 
ReadPrimaryHintStreamOffset() const511 int CPDF_HintTables::ReadPrimaryHintStreamOffset() const {
512   return static_cast<int>(m_pLinearized->GetHintStart());
513 }
514 
ReadPrimaryHintStreamLength() const515 int CPDF_HintTables::ReadPrimaryHintStreamLength() const {
516   return static_cast<int>(m_pLinearized->GetHintLength());
517 }
518