1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_RUNTIME_LEB128_H_
18 #define ART_RUNTIME_LEB128_H_
19 
20 #include <vector>
21 
22 #include "base/bit_utils.h"
23 #include "base/logging.h"
24 #include "globals.h"
25 
26 namespace art {
27 
28 // Reads an unsigned LEB128 value, updating the given pointer to point
29 // just past the end of the read value. This function tolerates
30 // non-zero high-order bits in the fifth encoded byte.
DecodeUnsignedLeb128(const uint8_t ** data)31 static inline uint32_t DecodeUnsignedLeb128(const uint8_t** data) {
32   const uint8_t* ptr = *data;
33   int result = *(ptr++);
34   if (UNLIKELY(result > 0x7f)) {
35     int cur = *(ptr++);
36     result = (result & 0x7f) | ((cur & 0x7f) << 7);
37     if (cur > 0x7f) {
38       cur = *(ptr++);
39       result |= (cur & 0x7f) << 14;
40       if (cur > 0x7f) {
41         cur = *(ptr++);
42         result |= (cur & 0x7f) << 21;
43         if (cur > 0x7f) {
44           // Note: We don't check to see if cur is out of range here,
45           // meaning we tolerate garbage in the four high-order bits.
46           cur = *(ptr++);
47           result |= cur << 28;
48         }
49       }
50     }
51   }
52   *data = ptr;
53   return static_cast<uint32_t>(result);
54 }
55 
56 // Reads an unsigned LEB128 + 1 value. updating the given pointer to point
57 // just past the end of the read value. This function tolerates
58 // non-zero high-order bits in the fifth encoded byte.
59 // It is possible for this function to return -1.
DecodeUnsignedLeb128P1(const uint8_t ** data)60 static inline int32_t DecodeUnsignedLeb128P1(const uint8_t** data) {
61   return DecodeUnsignedLeb128(data) - 1;
62 }
63 
64 // Reads a signed LEB128 value, updating the given pointer to point
65 // just past the end of the read value. This function tolerates
66 // non-zero high-order bits in the fifth encoded byte.
DecodeSignedLeb128(const uint8_t ** data)67 static inline int32_t DecodeSignedLeb128(const uint8_t** data) {
68   const uint8_t* ptr = *data;
69   int32_t result = *(ptr++);
70   if (result <= 0x7f) {
71     result = (result << 25) >> 25;
72   } else {
73     int cur = *(ptr++);
74     result = (result & 0x7f) | ((cur & 0x7f) << 7);
75     if (cur <= 0x7f) {
76       result = (result << 18) >> 18;
77     } else {
78       cur = *(ptr++);
79       result |= (cur & 0x7f) << 14;
80       if (cur <= 0x7f) {
81         result = (result << 11) >> 11;
82       } else {
83         cur = *(ptr++);
84         result |= (cur & 0x7f) << 21;
85         if (cur <= 0x7f) {
86           result = (result << 4) >> 4;
87         } else {
88           // Note: We don't check to see if cur is out of range here,
89           // meaning we tolerate garbage in the four high-order bits.
90           cur = *(ptr++);
91           result |= cur << 28;
92         }
93       }
94     }
95   }
96   *data = ptr;
97   return result;
98 }
99 
100 // Returns the number of bytes needed to encode the value in unsigned LEB128.
UnsignedLeb128Size(uint32_t data)101 static inline uint32_t UnsignedLeb128Size(uint32_t data) {
102   // bits_to_encode = (data != 0) ? 32 - CLZ(x) : 1  // 32 - CLZ(data | 1)
103   // bytes = ceil(bits_to_encode / 7.0);             // (6 + bits_to_encode) / 7
104   uint32_t x = 6 + 32 - CLZ(data | 1U);
105   // Division by 7 is done by (x * 37) >> 8 where 37 = ceil(256 / 7).
106   // This works for 0 <= x < 256 / (7 * 37 - 256), i.e. 0 <= x <= 85.
107   return (x * 37) >> 8;
108 }
109 
110 // Returns the number of bytes needed to encode the value in unsigned LEB128.
SignedLeb128Size(int32_t data)111 static inline uint32_t SignedLeb128Size(int32_t data) {
112   // Like UnsignedLeb128Size(), but we need one bit beyond the highest bit that differs from sign.
113   data = data ^ (data >> 31);
114   uint32_t x = 1 /* we need to encode the sign bit */ + 6 + 32 - CLZ(data | 1U);
115   return (x * 37) >> 8;
116 }
117 
EncodeUnsignedLeb128(uint8_t * dest,uint32_t value)118 static inline uint8_t* EncodeUnsignedLeb128(uint8_t* dest, uint32_t value) {
119   uint8_t out = value & 0x7f;
120   value >>= 7;
121   while (value != 0) {
122     *dest++ = out | 0x80;
123     out = value & 0x7f;
124     value >>= 7;
125   }
126   *dest++ = out;
127   return dest;
128 }
129 
130 template <typename Vector>
EncodeUnsignedLeb128(Vector * dest,uint32_t value)131 static inline void EncodeUnsignedLeb128(Vector* dest, uint32_t value) {
132   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
133   uint8_t out = value & 0x7f;
134   value >>= 7;
135   while (value != 0) {
136     dest->push_back(out | 0x80);
137     out = value & 0x7f;
138     value >>= 7;
139   }
140   dest->push_back(out);
141 }
142 
143 // Overwrite encoded Leb128 with a new value. The new value must be less than
144 // or equal to the old value to ensure that it fits the allocated space.
UpdateUnsignedLeb128(uint8_t * dest,uint32_t value)145 static inline void UpdateUnsignedLeb128(uint8_t* dest, uint32_t value) {
146   const uint8_t* old_end = dest;
147   uint32_t old_value = DecodeUnsignedLeb128(&old_end);
148   DCHECK_LE(value, old_value);
149   for (uint8_t* end = EncodeUnsignedLeb128(dest, value); end < old_end; end++) {
150     // Use longer encoding than necessary to fill the allocated space.
151     end[-1] |= 0x80;
152     end[0] = 0;
153   }
154 }
155 
EncodeSignedLeb128(uint8_t * dest,int32_t value)156 static inline uint8_t* EncodeSignedLeb128(uint8_t* dest, int32_t value) {
157   uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
158   uint8_t out = value & 0x7f;
159   while (extra_bits != 0u) {
160     *dest++ = out | 0x80;
161     value >>= 7;
162     out = value & 0x7f;
163     extra_bits >>= 7;
164   }
165   *dest++ = out;
166   return dest;
167 }
168 
169 template<typename Vector>
EncodeSignedLeb128(Vector * dest,int32_t value)170 static inline void EncodeSignedLeb128(Vector* dest, int32_t value) {
171   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
172   uint32_t extra_bits = static_cast<uint32_t>(value ^ (value >> 31)) >> 6;
173   uint8_t out = value & 0x7f;
174   while (extra_bits != 0u) {
175     dest->push_back(out | 0x80);
176     value >>= 7;
177     out = value & 0x7f;
178     extra_bits >>= 7;
179   }
180   dest->push_back(out);
181 }
182 
183 // An encoder that pushes int32_t/uint32_t data onto the given std::vector.
184 template <typename Vector = std::vector<uint8_t>>
185 class Leb128Encoder {
186   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
187 
188  public:
Leb128Encoder(Vector * data)189   explicit Leb128Encoder(Vector* data) : data_(data) {
190     DCHECK(data != nullptr);
191   }
192 
Reserve(uint32_t size)193   void Reserve(uint32_t size) {
194     data_->reserve(size);
195   }
196 
PushBackUnsigned(uint32_t value)197   void PushBackUnsigned(uint32_t value) {
198     EncodeUnsignedLeb128(data_, value);
199   }
200 
201   template<typename It>
InsertBackUnsigned(It cur,It end)202   void InsertBackUnsigned(It cur, It end) {
203     for (; cur != end; ++cur) {
204       PushBackUnsigned(*cur);
205     }
206   }
207 
PushBackSigned(int32_t value)208   void PushBackSigned(int32_t value) {
209     EncodeSignedLeb128(data_, value);
210   }
211 
212   template<typename It>
InsertBackSigned(It cur,It end)213   void InsertBackSigned(It cur, It end) {
214     for (; cur != end; ++cur) {
215       PushBackSigned(*cur);
216     }
217   }
218 
GetData()219   const Vector& GetData() const {
220     return *data_;
221   }
222 
223  protected:
224   Vector* const data_;
225 
226  private:
227   DISALLOW_COPY_AND_ASSIGN(Leb128Encoder);
228 };
229 
230 // An encoder with an API similar to vector<uint32_t> where the data is captured in ULEB128 format.
231 template <typename Vector = std::vector<uint8_t>>
232 class Leb128EncodingVector FINAL : private Vector,
233                                    public Leb128Encoder<Vector> {
234   static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
235 
236  public:
Leb128EncodingVector()237   Leb128EncodingVector() : Leb128Encoder<Vector>(this) { }
238 
Leb128EncodingVector(const typename Vector::allocator_type & alloc)239   explicit Leb128EncodingVector(const typename Vector::allocator_type& alloc)
240     : Vector(alloc),
241       Leb128Encoder<Vector>(this) { }
242 
243  private:
244   DISALLOW_COPY_AND_ASSIGN(Leb128EncodingVector);
245 };
246 
247 }  // namespace art
248 
249 #endif  // ART_RUNTIME_LEB128_H_
250