1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_RUNTIME_LEB128_H_
18 #define ART_RUNTIME_LEB128_H_
19 
20 #include <vector>
21 
22 #include "base/bit_utils.h"
23 #include "base/logging.h"
24 #include "globals.h"
25 
26 namespace art {
27 
28 // Reads an unsigned LEB128 value, updating the given pointer to point
29 // just past the end of the read value. This function tolerates
30 // non-zero high-order bits in the fifth encoded byte.
DecodeUnsignedLeb128(const uint8_t ** data)31 static inline uint32_t DecodeUnsignedLeb128(const uint8_t** data) {
32   const uint8_t* ptr = *data;
33   int result = *(ptr++);
34   if (UNLIKELY(result > 0x7f)) {
35     int cur = *(ptr++);
36     result = (result & 0x7f) | ((cur & 0x7f) << 7);
37     if (cur > 0x7f) {
38       cur = *(ptr++);
39       result |= (cur & 0x7f) << 14;
40       if (cur > 0x7f) {
41         cur = *(ptr++);
42         result |= (cur & 0x7f) << 21;
43         if (cur > 0x7f) {
44           // Note: We don't check to see if cur is out of range here,
45           // meaning we tolerate garbage in the four high-order bits.
46           cur = *(ptr++);
47           result |= cur << 28;
48         }
49       }
50     }
51   }
52   *data = ptr;
53   return static_cast<uint32_t>(result);
54 }
55 
DecodeUnsignedLeb128Checked(const uint8_t ** data,const void * end,uint32_t * out)56 static inline bool DecodeUnsignedLeb128Checked(const uint8_t** data,
57                                                const void* end,
58                                                uint32_t* out) {
59   const uint8_t* ptr = *data;
60   if (ptr >= end) {
61     return false;
62   }
63   int result = *(ptr++);
64   if (UNLIKELY(result > 0x7f)) {
65     if (ptr >= end) {
66       return false;
67     }
68     int cur = *(ptr++);
69     result = (result & 0x7f) | ((cur & 0x7f) << 7);
70     if (cur > 0x7f) {
71       if (ptr >= end) {
72         return false;
73       }
74       cur = *(ptr++);
75       result |= (cur & 0x7f) << 14;
76       if (cur > 0x7f) {
77         if (ptr >= end) {
78           return false;
79         }
80         cur = *(ptr++);
81         result |= (cur & 0x7f) << 21;
82         if (cur > 0x7f) {
83           if (ptr >= end) {
84             return false;
85           }
86           // Note: We don't check to see if cur is out of range here,
87           // meaning we tolerate garbage in the four high-order bits.
88           cur = *(ptr++);
89           result |= cur << 28;
90         }
91       }
92     }
93   }
94   *data = ptr;
95   *out = static_cast<uint32_t>(result);
96   return true;
97 }
98 
99 // Reads an unsigned LEB128 + 1 value. updating the given pointer to point
100 // just past the end of the read value. This function tolerates
101 // non-zero high-order bits in the fifth encoded byte.
102 // It is possible for this function to return -1.
DecodeUnsignedLeb128P1(const uint8_t ** data)103 static inline int32_t DecodeUnsignedLeb128P1(const uint8_t** data) {
104   return DecodeUnsignedLeb128(data) - 1;
105 }
106 
107 // Reads a signed LEB128 value, updating the given pointer to point
108 // just past the end of the read value. This function tolerates
109 // non-zero high-order bits in the fifth encoded byte.
DecodeSignedLeb128(const uint8_t ** data)110 static inline int32_t DecodeSignedLeb128(const uint8_t** data) {
111   const uint8_t* ptr = *data;
112   int32_t result = *(ptr++);
113   if (result <= 0x7f) {
114     result = (result << 25) >> 25;
115   } else {
116     int cur = *(ptr++);
117     result = (result & 0x7f) | ((cur & 0x7f) << 7);
118     if (cur <= 0x7f) {
119       result = (result << 18) >> 18;
120     } else {
121       cur = *(ptr++);
122       result |= (cur & 0x7f) << 14;
123       if (cur <= 0x7f) {
124         result = (result << 11) >> 11;
125       } else {
126         cur = *(ptr++);
127         result |= (cur & 0x7f) << 21;
128         if (cur <= 0x7f) {
129           result = (result << 4) >> 4;
130         } else {
131           // Note: We don't check to see if cur is out of range here,
132           // meaning we tolerate garbage in the four high-order bits.
133           cur = *(ptr++);
134           result |= cur << 28;
135         }
136       }
137     }
138   }
139   *data = ptr;
140   return result;
141 }
142 
DecodeSignedLeb128Checked(const uint8_t ** data,const void * end,int32_t * out)143 static inline bool DecodeSignedLeb128Checked(const uint8_t** data,
144                                              const void* end,
145                                              int32_t* out) {
146   const uint8_t* ptr = *data;
147   if (ptr >= end) {
148     return false;
149   }
150   int32_t result = *(ptr++);
151   if (result <= 0x7f) {
152     result = (result << 25) >> 25;
153   } else {
154     if (ptr >= end) {
155       return false;
156     }
157     int cur = *(ptr++);
158     result = (result & 0x7f) | ((cur & 0x7f) << 7);
159     if (cur <= 0x7f) {
160       result = (result << 18) >> 18;
161     } else {
162       if (ptr >= end) {
163         return false;
164       }
165       cur = *(ptr++);
166       result |= (cur & 0x7f) << 14;
167       if (cur <= 0x7f) {
168         result = (result << 11) >> 11;
169       } else {
170         if (ptr >= end) {
171           return false;
172         }
173         cur = *(ptr++);
174         result |= (cur & 0x7f) << 21;
175         if (cur <= 0x7f) {
176           result = (result << 4) >> 4;
177         } else {
178           if (ptr >= end) {
179             return false;
180           }
181           // Note: We don't check to see if cur is out of range here,
182           // meaning we tolerate garbage in the four high-order bits.
183           cur = *(ptr++);
184           result |= cur << 28;
185         }
186       }
187     }
188   }
189   *data = ptr;
190   *out = static_cast<uint32_t>(result);
191   return true;
192 }
193 
194 // Returns the number of bytes needed to encode the value in unsigned LEB128.
UnsignedLeb128Size(uint32_t data)195 static inline uint32_t UnsignedLeb128Size(uint32_t data) {
196   // bits_to_encode = (data != 0) ? 32 - CLZ(x) : 1  // 32 - CLZ(data | 1)
197   // bytes = ceil(bits_to_encode / 7.0);             // (6 + bits_to_encode) / 7
198   uint32_t x = 6 + 32 - CLZ(data | 1U);
199   // Division by 7 is done by (x * 37) >> 8 where 37 = ceil(256 / 7).
200   // This works for 0 <= x < 256 / (7 * 37 - 256), i.e. 0 <= x <= 85.
201   return (x * 37) >> 8;
202 }
203 
204 // Returns the number of bytes needed to encode the value in unsigned LEB128.
SignedLeb128Size(int32_t data)205 static inline uint32_t SignedLeb128Size(int32_t data) {
206   // Like UnsignedLeb128Size(), but we need one bit beyond the highest bit that differs from sign.
207   data = data ^ (data >> 31);
208   uint32_t x = 1 /* we need to encode the sign bit */ + 6 + 32 - CLZ(data | 1U);
209   return (x * 37) >> 8;
210 }
211 
EncodeUnsignedLeb128(uint8_t * dest,uint32_t value)212 static inline uint8_t* EncodeUnsignedLeb128(uint8_t* dest, uint32_t value) {
213   uint8_t out = value & 0x7f;
214   value >>= 7;
215   while (value != 0) {
216     *dest++ = out | 0x80;
217     out = value & 0x7f;
218     value >>= 7;
219   }
220   *dest++ = out;
221   return dest;
222 }
223 
224 template <typename Vector>
EncodeUnsignedLeb128(Vector * dest,uint32_t value)225 static inline void EncodeUnsignedLeb128(Vector* dest, uint32_t value) {
226   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
227   uint8_t out = value & 0x7f;
228   value >>= 7;
229   while (value != 0) {
230     dest->push_back(out | 0x80);
231     out = value & 0x7f;
232     value >>= 7;
233   }
234   dest->push_back(out);
235 }
236 
237 // Overwrite encoded Leb128 with a new value. The new value must be less than
238 // or equal to the old value to ensure that it fits the allocated space.
UpdateUnsignedLeb128(uint8_t * dest,uint32_t value)239 static inline void UpdateUnsignedLeb128(uint8_t* dest, uint32_t value) {
240   const uint8_t* old_end = dest;
241   uint32_t old_value = DecodeUnsignedLeb128(&old_end);
242   DCHECK_LE(value, old_value);
243   for (uint8_t* end = EncodeUnsignedLeb128(dest, value); end < old_end; end++) {
244     // Use longer encoding than necessary to fill the allocated space.
245     end[-1] |= 0x80;
246     end[0] = 0;
247   }
248 }
249 
EncodeSignedLeb128(uint8_t * dest,int32_t value)250 static inline uint8_t* EncodeSignedLeb128(uint8_t* dest, int32_t value) {
251   uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
252   uint8_t out = value & 0x7f;
253   while (extra_bits != 0u) {
254     *dest++ = out | 0x80;
255     value >>= 7;
256     out = value & 0x7f;
257     extra_bits >>= 7;
258   }
259   *dest++ = out;
260   return dest;
261 }
262 
263 template<typename Vector>
EncodeSignedLeb128(Vector * dest,int32_t value)264 static inline void EncodeSignedLeb128(Vector* dest, int32_t value) {
265   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
266   uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
267   uint8_t out = value & 0x7f;
268   while (extra_bits != 0u) {
269     dest->push_back(out | 0x80);
270     value >>= 7;
271     out = value & 0x7f;
272     extra_bits >>= 7;
273   }
274   dest->push_back(out);
275 }
276 
277 // An encoder that pushes int32_t/uint32_t data onto the given std::vector.
278 template <typename Vector = std::vector<uint8_t>>
279 class Leb128Encoder {
280   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
281 
282  public:
Leb128Encoder(Vector * data)283   explicit Leb128Encoder(Vector* data) : data_(data) {
284     DCHECK(data != nullptr);
285   }
286 
Reserve(uint32_t size)287   void Reserve(uint32_t size) {
288     data_->reserve(size);
289   }
290 
PushBackUnsigned(uint32_t value)291   void PushBackUnsigned(uint32_t value) {
292     EncodeUnsignedLeb128(data_, value);
293   }
294 
295   template<typename It>
InsertBackUnsigned(It cur,It end)296   void InsertBackUnsigned(It cur, It end) {
297     for (; cur != end; ++cur) {
298       PushBackUnsigned(*cur);
299     }
300   }
301 
PushBackSigned(int32_t value)302   void PushBackSigned(int32_t value) {
303     EncodeSignedLeb128(data_, value);
304   }
305 
306   template<typename It>
InsertBackSigned(It cur,It end)307   void InsertBackSigned(It cur, It end) {
308     for (; cur != end; ++cur) {
309       PushBackSigned(*cur);
310     }
311   }
312 
GetData()313   const Vector& GetData() const {
314     return *data_;
315   }
316 
317  protected:
318   Vector* const data_;
319 
320  private:
321   DISALLOW_COPY_AND_ASSIGN(Leb128Encoder);
322 };
323 
324 // An encoder with an API similar to vector<uint32_t> where the data is captured in ULEB128 format.
325 template <typename Vector = std::vector<uint8_t>>
326 class Leb128EncodingVector FINAL : private Vector,
327                                    public Leb128Encoder<Vector> {
328   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
329 
330  public:
Leb128EncodingVector()331   Leb128EncodingVector() : Leb128Encoder<Vector>(this) { }
332 
Leb128EncodingVector(const typename Vector::allocator_type & alloc)333   explicit Leb128EncodingVector(const typename Vector::allocator_type& alloc)
334     : Vector(alloc),
335       Leb128Encoder<Vector>(this) { }
336 
337  private:
338   DISALLOW_COPY_AND_ASSIGN(Leb128EncodingVector);
339 };
340 
341 }  // namespace art
342 
343 #endif  // ART_RUNTIME_LEB128_H_
344