1 // Copyright 2015 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 ////////////////////////////////////////////////////////////////////////////////
16 
17 #include "src/binary_parse/range_checked_byte_ptr.h"
18 
19 #include <assert.h>
20 #include <cstddef>
21 #include <cstring>
22 
23 namespace piex {
24 namespace binary_parse {
25 
26 #ifdef BREAK_IF_DEBUGGING_AND_OUT_OF_RANGE
27 #define BREAK_IF_DEBUGGING() assert(false)
28 #else
29 #define BREAK_IF_DEBUGGING() assert(true)
30 #endif
31 
32 namespace {
33 class MemoryPagedByteArray : public PagedByteArray {
34  public:
35   MemoryPagedByteArray(const unsigned char *buffer, const size_t len);
36 
37   virtual size_t length() const;
38   virtual size_t pageSize() const;
39   virtual void getPage(size_t page_index, const unsigned char **begin,
40                        const unsigned char **end, PagePtr *page) const;
41 
42  private:
43   const unsigned char *buffer_;
44   const size_t len_;
45 };
46 
MemoryPagedByteArray(const unsigned char * buffer,const size_t len)47 MemoryPagedByteArray::MemoryPagedByteArray(const unsigned char *buffer,
48                                            const size_t len)
49     : buffer_(buffer), len_(len) {}
50 
length() const51 size_t MemoryPagedByteArray::length() const { return len_; }
52 
pageSize() const53 size_t MemoryPagedByteArray::pageSize() const { return len_; }
54 
getPage(size_t,const unsigned char ** begin,const unsigned char ** end,PagePtr * page) const55 void MemoryPagedByteArray::getPage(size_t /* page_index */,
56                                    const unsigned char **begin,
57                                    const unsigned char **end,
58                                    PagePtr *page) const {
59   *begin = buffer_;
60   *end = buffer_ + len_;
61   *page = PagePtr();
62 }
63 
64 // A functor that does nothing. This is used as a no-op shared pointer
65 // deallocator below.
66 class NullFunctor {
67  public:
operator ()()68   void operator()() {}
operator ()(PagedByteArray *) const69   void operator()(PagedByteArray * /* p */) const {}
70 };
71 }  // namespace
72 
~PagedByteArray()73 PagedByteArray::~PagedByteArray() {}
74 
RangeCheckedBytePtr()75 RangeCheckedBytePtr::RangeCheckedBytePtr()
76     : array_(),
77       page_data_(NULL),
78       current_pos_(0),
79       sub_array_begin_(0),
80       sub_array_end_(0),
81       page_begin_offset_(0),
82       current_page_len_(0),
83       error_flag_(RANGE_CHECKED_BYTE_ERROR) {}
84 
RangeCheckedBytePtr(const unsigned char * array,const size_t len)85 RangeCheckedBytePtr::RangeCheckedBytePtr(const unsigned char *array,
86                                          const size_t len)
87     : array_(new MemoryPagedByteArray(array, len)),
88       page_data_(NULL),
89       current_pos_(0),
90       sub_array_begin_(0),
91       sub_array_end_(len),
92       page_begin_offset_(0),
93       current_page_len_(0),
94       error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {
95   assert(array);
96   if (array == NULL) {
97     error_flag_ = RANGE_CHECKED_BYTE_ERROR;
98   }
99 }
100 
RangeCheckedBytePtr(PagedByteArray * array)101 RangeCheckedBytePtr::RangeCheckedBytePtr(PagedByteArray *array)
102     : array_(array, NullFunctor()),
103       page_data_(NULL),
104       current_pos_(0),
105       sub_array_begin_(0),
106       sub_array_end_(array->length()),
107       page_begin_offset_(0),
108       current_page_len_(0),
109       error_flag_(RANGE_CHECKED_BYTE_SUCCESS) {}
110 
invalidPointer()111 RangeCheckedBytePtr RangeCheckedBytePtr::invalidPointer() {
112   return RangeCheckedBytePtr();
113 }
114 
pointerToSubArray(size_t pos,size_t length) const115 RangeCheckedBytePtr RangeCheckedBytePtr::pointerToSubArray(
116     size_t pos, size_t length) const {
117   RangeCheckedBytePtr sub_result = (*this) + pos;
118   if (!sub_result.errorOccurred() && length <= sub_result.remainingLength()) {
119     sub_result.sub_array_begin_ = sub_result.current_pos_;
120     sub_result.sub_array_end_ = sub_result.sub_array_begin_ + length;
121 
122     // Restrict the boundaries of the current page to the newly set sub-array.
123     sub_result.restrictPageToSubArray();
124 
125     return sub_result;
126   } else {
127     error_flag_ = RANGE_CHECKED_BYTE_ERROR;
128     return invalidPointer();
129   }
130 }
131 
offsetInArray() const132 size_t RangeCheckedBytePtr::offsetInArray() const {
133   // sub_array_begin_ <= current_pos_ is a class invariant, but protect
134   // against violations of this invariant.
135   if (sub_array_begin_ <= current_pos_) {
136     return current_pos_ - sub_array_begin_;
137   } else {
138     assert(false);
139     return 0;
140   }
141 }
142 
substr(size_t pos,size_t length) const143 std::string RangeCheckedBytePtr::substr(size_t pos, size_t length) const {
144   std::vector<unsigned char> bytes = extractBytes(pos, length);
145   std::string result;
146   result.reserve(bytes.size());
147   for (size_t i = 0; i < bytes.size(); ++i) {
148     result.push_back(static_cast<char>(bytes[i]));
149   }
150   return result;
151 }
152 
extractBytes(size_t pos,size_t length) const153 std::vector<unsigned char> RangeCheckedBytePtr::extractBytes(
154     size_t pos, size_t length) const {
155   std::vector<unsigned char> result;
156   if (pos + length < pos /* overflow */ || remainingLength() < pos + length) {
157     BREAK_IF_DEBUGGING();
158     error_flag_ = RANGE_CHECKED_BYTE_ERROR_OVERFLOW;
159     return result;
160   }
161   result.reserve(length);
162   for (size_t i = 0; i < length; ++i) {
163     result.push_back((*this)[pos + i]);
164   }
165   return result;
166 }
167 
operator ==(const RangeCheckedBytePtr & x,const RangeCheckedBytePtr & y)168 bool operator==(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
169   if (x.array_ != y.array_) {
170     assert(false);
171     return false;
172   }
173 
174   return x.current_pos_ == y.current_pos_;
175 }
176 
operator !=(const RangeCheckedBytePtr & x,const RangeCheckedBytePtr & y)177 bool operator!=(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y) {
178   return !(x == y);
179 }
180 
loadPageForOffset(size_t offset) const181 void RangeCheckedBytePtr::loadPageForOffset(size_t offset) const {
182   // The offset should always lie within the bounds of the sub-array (this
183   // condition is enforced at the callsite). However, even if the offset lies
184   // outside the sub-array, the restrictPageToSubArray() call at the end
185   // ensures that the object is left in a consistent state that maintains the
186   // class invariants.
187   assert(offset >= sub_array_begin_ && offset < sub_array_end_);
188 
189   // Ensure that offset lies within the array.
190   if (offset >= array_->length()) {
191     assert(false);
192     return;
193   }
194 
195   // Determine the index of the page to request.
196   size_t page_index = offset / array_->pageSize();
197 
198   // Get the page.
199   const unsigned char *page_begin;
200   const unsigned char *page_end;
201   array_->getPage(page_index, &page_begin, &page_end, &page_);
202 
203   // Ensure that the page has the expected length (as specified in the
204   // PagedByteArray interface).
205   size_t expected_page_size = array_->pageSize();
206   if (page_index == (array_->length() - 1) / array_->pageSize()) {
207     expected_page_size = array_->length() - array_->pageSize() * page_index;
208   }
209   if ((page_end < page_begin) ||
210       (static_cast<size_t>(page_end - page_begin) != expected_page_size)) {
211     assert(false);
212     return;
213   }
214 
215   // Remember information about page.
216   page_data_ = page_begin;
217   page_begin_offset_ = page_index * array_->pageSize();
218   current_page_len_ = static_cast<size_t>(page_end - page_begin);
219 
220   // Restrict the boundaries of the page to lie within the sub-array.
221   restrictPageToSubArray();
222 }
223 
restrictPageToSubArray() const224 void RangeCheckedBytePtr::restrictPageToSubArray() const {
225   // Restrict the current page's boundaries so that it is always contained
226   // completely within the extent of the sub-array.
227   // This function is purposely designed to work correctly in the following
228   // two special cases:
229   // a) The current page lies entirely outside the sub-array. In this case,
230   //    current_page_len_ will be set to zero. page_data_ may either remain
231   //    unchanged or may be changed to point one element beyond the end of the
232   //    page, depending on whether the current page lies before or after the
233   //    sub-array.
234   // b) The current page is in the state as initialized by the constructor
235   //    (i.e. page_data_ is NULL and current_page_len_ is zero). In this case,
236   //    page_data_ and current_page_len_ will remain unchanged.
237 
238   // Does the beginning of the page lie before the beginning of the sub-array?
239   if (page_begin_offset_ < sub_array_begin_) {
240     // Compute amount by which to shorten page.
241     size_t amount_to_shorten = sub_array_begin_ - page_begin_offset_;
242     if (amount_to_shorten > current_page_len_) {
243       amount_to_shorten = current_page_len_;
244     }
245 
246     // Adjust beginning of page accordingly.
247     page_begin_offset_ += amount_to_shorten;
248     page_data_ += amount_to_shorten;
249     current_page_len_ -= amount_to_shorten;
250   }
251 
252   // Does the end of the page lie beyond the end of the sub-array?
253   if (page_begin_offset_ + current_page_len_ > sub_array_end_) {
254     // Reduce length of page accordingly.
255     size_t new_len = sub_array_end_ - page_begin_offset_;
256     if (new_len > current_page_len_) {
257       new_len = current_page_len_;
258     }
259     current_page_len_ = new_len;
260   }
261 }
262 
memcmp(const RangeCheckedBytePtr & x,const RangeCheckedBytePtr & y,size_t num)263 int memcmp(const RangeCheckedBytePtr &x, const RangeCheckedBytePtr &y,
264            size_t num) {
265   std::vector<unsigned char> x_vec = x.extractBytes(0, num);
266   std::vector<unsigned char> y_vec = y.extractBytes(0, num);
267 
268   if (!x.errorOccurred() && !y.errorOccurred()) {
269     return ::memcmp(&x_vec[0], &y_vec[0], num);
270   } else {
271     // return an arbitrary value
272     return -1;
273   }
274 }
275 
strcmp(const RangeCheckedBytePtr & x,const std::string & y)276 int strcmp(const RangeCheckedBytePtr &x, const std::string &y) {
277   std::vector<unsigned char> x_vec = x.extractBytes(0, y.length());
278 
279   if (!x.errorOccurred()) {
280     return ::memcmp(&x_vec[0], y.c_str(), y.length());
281   } else {
282     // return an arbitrary value
283     return -1;
284   }
285 }
286 
strlen(const RangeCheckedBytePtr & src)287 size_t strlen(const RangeCheckedBytePtr &src) {
288   size_t len = 0;
289   RangeCheckedBytePtr str = src;
290   while (!str.errorOccurred() && (str[0] != '\0')) {
291     str++;
292     len++;
293   }
294   return len;
295 }
296 
Get16s(const RangeCheckedBytePtr & input,const bool big_endian,MemoryStatus * status)297 int16 Get16s(const RangeCheckedBytePtr &input, const bool big_endian,
298              MemoryStatus *status) {
299   const uint16 unsigned_value = Get16u(input, big_endian, status);
300   if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
301     // Return an arbitrary value.
302     return 0;
303   }
304 
305   // Convert the two's-complement signed integer encoded in 'unsigned_value'
306   // into a signed representation in the implementation's native representation
307   // for signed integers. An optimized Blaze build (x64) compiles all of the
308   // following code to a no-op (as of this writing).
309   // For further details, see the corresponding comment in Get32s().
310   if (unsigned_value == 0x8000u) {
311     return static_cast<int16>(-0x8000);
312   } else if (unsigned_value > 0x8000u) {
313     return -static_cast<int16>(0x10000u - unsigned_value);
314   } else {
315     return static_cast<int16>(unsigned_value);
316   }
317 }
318 
Get16u(const RangeCheckedBytePtr & input,const bool big_endian,MemoryStatus * status)319 uint16 Get16u(const RangeCheckedBytePtr &input, const bool big_endian,
320               MemoryStatus *status) {
321   if (input.remainingLength() < 2) {
322     if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
323       *status = RANGE_CHECKED_BYTE_ERROR;
324     }
325     // Return an arbitrary value.
326     return 0;
327   }
328   if (big_endian) {
329     return (static_cast<uint16>(input[0]) << 8) | static_cast<uint16>(input[1]);
330   } else {
331     return (static_cast<uint16>(input[1]) << 8) | static_cast<uint16>(input[0]);
332   }
333 }
334 
Get32s(const RangeCheckedBytePtr & input,const bool big_endian,MemoryStatus * status)335 int32 Get32s(const RangeCheckedBytePtr &input, const bool big_endian,
336              MemoryStatus *status) {
337   const uint32 unsigned_value = Get32u(input, big_endian, status);
338   if (*status != RANGE_CHECKED_BYTE_SUCCESS) {
339     // Return an arbitrary value.
340     return 0;
341   }
342 
343   // Convert the two's-complement signed integer encoded in 'unsigned_value'
344   // into a signed representation in the implementation's native representation
345   // for signed integers.
346   // For all practical purposes, the same result could be obtained simply by
347   // casting unsigned_value to int32; the result of this is
348   // implementation-defined, but on all of the platforms we care about, it does
349   // what we want.
350   // The code below, however, arguably has the aesthetic advantage of being
351   // independent of the representation for signed integers chosen by the
352   // implementation, as long as 'int' and 'unsigned' have the required range to
353   // represent all of the required values.
354   // An optimized Blaze build (x64) compiles all of the following code to a
355   // no-op (as of this writing); i.e. the value that Get32u() returned in %eax
356   // is left unchanged.
357   if (unsigned_value == 0x80000000u) {
358     // Read here on why the constant expression is written this way:
359     // http://stackoverflow.com/questions/14695118
360     return -0x7fffffff - 1;
361   } else if (unsigned_value > 0x80000000u) {
362     // The expression
363     //   0xffffffffu - unsigned_value + 1
364     // is a portable way of flipping the sign of a twos-complement signed
365     // integer whose binary representation is stored in an unsigned integer.
366     // '0xffffffffu + 1' is used in preference to simply '0' because it makes
367     // it clearer that the correct result will be obtained even if an int is
368     // greater than 32 bits. The '0xffffffffu + 1' is "spread out" around
369     // 'unsigned_value' to prevent the compiler from warning about an
370     // integral constant overflow. ('0' would produce the correct result in
371     // this case too but would rely in a more subtle way on the rules for
372     // unsigned wraparound.)
373     return -static_cast<int32>(0xffffffffu - unsigned_value + 1);
374   } else {
375     return static_cast<int32>(unsigned_value);
376   }
377 }
378 
Get32u(const RangeCheckedBytePtr & input,const bool big_endian,MemoryStatus * status)379 uint32 Get32u(const RangeCheckedBytePtr &input, const bool big_endian,
380               MemoryStatus *status) {
381   if (input.remainingLength() < 4) {
382     if (status && *status == RANGE_CHECKED_BYTE_SUCCESS) {
383       *status = RANGE_CHECKED_BYTE_ERROR;
384     }
385     // Return an arbitrary value.
386     return 0;
387   }
388   if (big_endian) {
389     return (static_cast<uint32>(input[0]) << 24) |
390            (static_cast<uint32>(input[1]) << 16) |
391            (static_cast<uint32>(input[2]) << 8) |
392            (static_cast<uint32>(input[3]) << 0);
393   } else {
394     return (static_cast<uint32>(input[3]) << 24) |
395            (static_cast<uint32>(input[2]) << 16) |
396            (static_cast<uint32>(input[1]) << 8) |
397            (static_cast<uint32>(input[0]) << 0);
398   }
399 }
400 
401 }  // namespace binary_parse
402 }  // namespace piex
403