1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_LIBARTBASE_BASE_LEB128_H_
18 #define ART_LIBARTBASE_BASE_LEB128_H_
19 
20 #include <vector>
21 
22 #include <android-base/logging.h>
23 
24 #include "bit_utils.h"
25 #include "globals.h"
26 #include "macros.h"
27 
28 namespace art {
29 
30 // Reads an unsigned LEB128 value, updating the given pointer to point
31 // just past the end of the read value. This function tolerates
32 // non-zero high-order bits in the fifth encoded byte.
DecodeUnsignedLeb128(const uint8_t ** data)33 static inline uint32_t DecodeUnsignedLeb128(const uint8_t** data) {
34   const uint8_t* ptr = *data;
35   int result = *(ptr++);
36   if (UNLIKELY(result > 0x7f)) {
37     int cur = *(ptr++);
38     result = (result & 0x7f) | ((cur & 0x7f) << 7);
39     if (cur > 0x7f) {
40       cur = *(ptr++);
41       result |= (cur & 0x7f) << 14;
42       if (cur > 0x7f) {
43         cur = *(ptr++);
44         result |= (cur & 0x7f) << 21;
45         if (cur > 0x7f) {
46           // Note: We don't check to see if cur is out of range here,
47           // meaning we tolerate garbage in the four high-order bits.
48           cur = *(ptr++);
49           result |= cur << 28;
50         }
51       }
52     }
53   }
54   *data = ptr;
55   return static_cast<uint32_t>(result);
56 }
57 
DecodeUnsignedLeb128WithoutMovingCursor(const uint8_t * data)58 static inline uint32_t DecodeUnsignedLeb128WithoutMovingCursor(const uint8_t* data) {
59   return DecodeUnsignedLeb128(&data);
60 }
61 
DecodeUnsignedLeb128Checked(const uint8_t ** data,const void * end,uint32_t * out)62 static inline bool DecodeUnsignedLeb128Checked(const uint8_t** data,
63                                                const void* end,
64                                                uint32_t* out) {
65   const uint8_t* ptr = *data;
66   if (ptr >= end) {
67     return false;
68   }
69   int result = *(ptr++);
70   if (UNLIKELY(result > 0x7f)) {
71     if (ptr >= end) {
72       return false;
73     }
74     int cur = *(ptr++);
75     result = (result & 0x7f) | ((cur & 0x7f) << 7);
76     if (cur > 0x7f) {
77       if (ptr >= end) {
78         return false;
79       }
80       cur = *(ptr++);
81       result |= (cur & 0x7f) << 14;
82       if (cur > 0x7f) {
83         if (ptr >= end) {
84           return false;
85         }
86         cur = *(ptr++);
87         result |= (cur & 0x7f) << 21;
88         if (cur > 0x7f) {
89           if (ptr >= end) {
90             return false;
91           }
92           // Note: We don't check to see if cur is out of range here,
93           // meaning we tolerate garbage in the four high-order bits.
94           cur = *(ptr++);
95           result |= cur << 28;
96         }
97       }
98     }
99   }
100   *data = ptr;
101   *out = static_cast<uint32_t>(result);
102   return true;
103 }
104 
105 // Reads an unsigned LEB128 + 1 value. updating the given pointer to point
106 // just past the end of the read value. This function tolerates
107 // non-zero high-order bits in the fifth encoded byte.
108 // It is possible for this function to return -1.
DecodeUnsignedLeb128P1(const uint8_t ** data)109 static inline int32_t DecodeUnsignedLeb128P1(const uint8_t** data) {
110   return DecodeUnsignedLeb128(data) - 1;
111 }
112 
113 // Reads a signed LEB128 value, updating the given pointer to point
114 // just past the end of the read value. This function tolerates
115 // non-zero high-order bits in the fifth encoded byte.
DecodeSignedLeb128(const uint8_t ** data)116 static inline int32_t DecodeSignedLeb128(const uint8_t** data) {
117   const uint8_t* ptr = *data;
118   int32_t result = *(ptr++);
119   if (result <= 0x7f) {
120     result = (result << 25) >> 25;
121   } else {
122     int cur = *(ptr++);
123     result = (result & 0x7f) | ((cur & 0x7f) << 7);
124     if (cur <= 0x7f) {
125       result = (result << 18) >> 18;
126     } else {
127       cur = *(ptr++);
128       result |= (cur & 0x7f) << 14;
129       if (cur <= 0x7f) {
130         result = (result << 11) >> 11;
131       } else {
132         cur = *(ptr++);
133         result |= (cur & 0x7f) << 21;
134         if (cur <= 0x7f) {
135           result = (result << 4) >> 4;
136         } else {
137           // Note: We don't check to see if cur is out of range here,
138           // meaning we tolerate garbage in the four high-order bits.
139           cur = *(ptr++);
140           result |= cur << 28;
141         }
142       }
143     }
144   }
145   *data = ptr;
146   return result;
147 }
148 
DecodeSignedLeb128Checked(const uint8_t ** data,const void * end,int32_t * out)149 static inline bool DecodeSignedLeb128Checked(const uint8_t** data,
150                                              const void* end,
151                                              int32_t* out) {
152   const uint8_t* ptr = *data;
153   if (ptr >= end) {
154     return false;
155   }
156   int32_t result = *(ptr++);
157   if (result <= 0x7f) {
158     result = (result << 25) >> 25;
159   } else {
160     if (ptr >= end) {
161       return false;
162     }
163     int cur = *(ptr++);
164     result = (result & 0x7f) | ((cur & 0x7f) << 7);
165     if (cur <= 0x7f) {
166       result = (result << 18) >> 18;
167     } else {
168       if (ptr >= end) {
169         return false;
170       }
171       cur = *(ptr++);
172       result |= (cur & 0x7f) << 14;
173       if (cur <= 0x7f) {
174         result = (result << 11) >> 11;
175       } else {
176         if (ptr >= end) {
177           return false;
178         }
179         cur = *(ptr++);
180         result |= (cur & 0x7f) << 21;
181         if (cur <= 0x7f) {
182           result = (result << 4) >> 4;
183         } else {
184           if (ptr >= end) {
185             return false;
186           }
187           // Note: We don't check to see if cur is out of range here,
188           // meaning we tolerate garbage in the four high-order bits.
189           cur = *(ptr++);
190           result |= cur << 28;
191         }
192       }
193     }
194   }
195   *data = ptr;
196   *out = static_cast<uint32_t>(result);
197   return true;
198 }
199 
200 // Returns the number of bytes needed to encode the value in unsigned LEB128.
UnsignedLeb128Size(uint32_t data)201 static inline uint32_t UnsignedLeb128Size(uint32_t data) {
202   // bits_to_encode = (data != 0) ? 32 - CLZ(x) : 1  // 32 - CLZ(data | 1)
203   // bytes = ceil(bits_to_encode / 7.0);             // (6 + bits_to_encode) / 7
204   uint32_t x = 6 + 32 - CLZ(data | 1U);
205   // Division by 7 is done by (x * 37) >> 8 where 37 = ceil(256 / 7).
206   // This works for 0 <= x < 256 / (7 * 37 - 256), i.e. 0 <= x <= 85.
207   return (x * 37) >> 8;
208 }
209 
IsLeb128Terminator(const uint8_t * ptr)210 static inline bool IsLeb128Terminator(const uint8_t* ptr) {
211   return *ptr <= 0x7f;
212 }
213 
214 // Returns the first byte of a Leb128 value assuming that:
215 // (1) `end_ptr` points to the first byte after the Leb128 value, and
216 // (2) there is another Leb128 value before this one.
217 template <typename T>
ReverseSearchUnsignedLeb128(T * end_ptr)218 static inline T* ReverseSearchUnsignedLeb128(T* end_ptr) {
219   static_assert(std::is_same<typename std::remove_const<T>::type, uint8_t>::value,
220                 "T must be a uint8_t");
221   T* ptr = end_ptr;
222 
223   // Move one byte back, check that this is the terminating byte.
224   ptr--;
225   DCHECK(IsLeb128Terminator(ptr));
226 
227   // Keep moving back while the previous byte is not a terminating byte.
228   // Fail after reading five bytes in case there isn't another Leb128 value
229   // before this one.
230   while (!IsLeb128Terminator(ptr - 1)) {
231     ptr--;
232     DCHECK_LE(static_cast<ptrdiff_t>(end_ptr - ptr), 5);
233   }
234 
235   return ptr;
236 }
237 
238 // Returns the number of bytes needed to encode the value in unsigned LEB128.
SignedLeb128Size(int32_t data)239 static inline uint32_t SignedLeb128Size(int32_t data) {
240   // Like UnsignedLeb128Size(), but we need one bit beyond the highest bit that differs from sign.
241   data = data ^ (data >> 31);
242   uint32_t x = 1 /* we need to encode the sign bit */ + 6 + 32 - CLZ(data | 1U);
243   return (x * 37) >> 8;
244 }
245 
EncodeUnsignedLeb128(uint8_t * dest,uint32_t value)246 static inline uint8_t* EncodeUnsignedLeb128(uint8_t* dest, uint32_t value) {
247   uint8_t out = value & 0x7f;
248   value >>= 7;
249   while (value != 0) {
250     *dest++ = out | 0x80;
251     out = value & 0x7f;
252     value >>= 7;
253   }
254   *dest++ = out;
255   return dest;
256 }
257 
258 template <typename Vector>
EncodeUnsignedLeb128(Vector * dest,uint32_t value)259 static inline void EncodeUnsignedLeb128(Vector* dest, uint32_t value) {
260   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
261   uint8_t out = value & 0x7f;
262   value >>= 7;
263   while (value != 0) {
264     dest->push_back(out | 0x80);
265     out = value & 0x7f;
266     value >>= 7;
267   }
268   dest->push_back(out);
269 }
270 
271 // Overwrite encoded Leb128 with a new value. The new value must be less than
272 // or equal to the old value to ensure that it fits the allocated space.
UpdateUnsignedLeb128(uint8_t * dest,uint32_t value)273 static inline void UpdateUnsignedLeb128(uint8_t* dest, uint32_t value) {
274   const uint8_t* old_end = dest;
275   uint32_t old_value = DecodeUnsignedLeb128(&old_end);
276   DCHECK_LE(UnsignedLeb128Size(value), UnsignedLeb128Size(old_value));
277   for (uint8_t* end = EncodeUnsignedLeb128(dest, value); end < old_end; end++) {
278     // Use longer encoding than necessary to fill the allocated space.
279     end[-1] |= 0x80;
280     end[0] = 0;
281   }
282 }
283 
EncodeSignedLeb128(uint8_t * dest,int32_t value)284 static inline uint8_t* EncodeSignedLeb128(uint8_t* dest, int32_t value) {
285   uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
286   uint8_t out = value & 0x7f;
287   while (extra_bits != 0u) {
288     *dest++ = out | 0x80;
289     value >>= 7;
290     out = value & 0x7f;
291     extra_bits >>= 7;
292   }
293   *dest++ = out;
294   return dest;
295 }
296 
297 template<typename Vector>
EncodeSignedLeb128(Vector * dest,int32_t value)298 static inline void EncodeSignedLeb128(Vector* dest, int32_t value) {
299   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
300   uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
301   uint8_t out = value & 0x7f;
302   while (extra_bits != 0u) {
303     dest->push_back(out | 0x80);
304     value >>= 7;
305     out = value & 0x7f;
306     extra_bits >>= 7;
307   }
308   dest->push_back(out);
309 }
310 
311 // An encoder that pushes int32_t/uint32_t data onto the given std::vector.
312 template <typename Vector = std::vector<uint8_t>>
313 class Leb128Encoder {
314   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
315 
316  public:
Leb128Encoder(Vector * data)317   explicit Leb128Encoder(Vector* data) : data_(data) {
318     DCHECK(data != nullptr);
319   }
320 
Reserve(uint32_t size)321   void Reserve(uint32_t size) {
322     data_->reserve(size);
323   }
324 
PushBackUnsigned(uint32_t value)325   void PushBackUnsigned(uint32_t value) {
326     EncodeUnsignedLeb128(data_, value);
327   }
328 
329   template<typename It>
InsertBackUnsigned(It cur,It end)330   void InsertBackUnsigned(It cur, It end) {
331     for (; cur != end; ++cur) {
332       PushBackUnsigned(*cur);
333     }
334   }
335 
PushBackSigned(int32_t value)336   void PushBackSigned(int32_t value) {
337     EncodeSignedLeb128(data_, value);
338   }
339 
340   template<typename It>
InsertBackSigned(It cur,It end)341   void InsertBackSigned(It cur, It end) {
342     for (; cur != end; ++cur) {
343       PushBackSigned(*cur);
344     }
345   }
346 
GetData()347   const Vector& GetData() const {
348     return *data_;
349   }
350 
351  protected:
352   Vector* const data_;
353 
354  private:
355   DISALLOW_COPY_AND_ASSIGN(Leb128Encoder);
356 };
357 
358 // An encoder with an API similar to vector<uint32_t> where the data is captured in ULEB128 format.
359 template <typename Vector = std::vector<uint8_t>>
360 class Leb128EncodingVector final : private Vector,
361                                    public Leb128Encoder<Vector> {
362   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
363 
364  public:
Leb128EncodingVector()365   Leb128EncodingVector() : Leb128Encoder<Vector>(this) { }
366 
Leb128EncodingVector(const typename Vector::allocator_type & alloc)367   explicit Leb128EncodingVector(const typename Vector::allocator_type& alloc)
368     : Vector(alloc),
369       Leb128Encoder<Vector>(this) { }
370 
371  private:
372   DISALLOW_COPY_AND_ASSIGN(Leb128EncodingVector);
373 };
374 
375 }  // namespace art
376 
377 #endif  // ART_LIBARTBASE_BASE_LEB128_H_
378