1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_LIBARTBASE_BASE_BIT_MEMORY_REGION_H_
18 #define ART_LIBARTBASE_BASE_BIT_MEMORY_REGION_H_
19 
20 #include "memory_region.h"
21 
22 #include "bit_utils.h"
23 #include "memory_tool.h"
24 
25 #include <array>
26 
27 namespace art {
28 
29 // Bit memory region is a bit offset subregion of a normal memoryregion. This is useful for
30 // abstracting away the bit start offset to avoid needing passing as an argument everywhere.
31 class BitMemoryRegion final : public ValueObject {
32  public:
33   struct Less {
operatorLess34     bool operator()(const BitMemoryRegion& lhs, const BitMemoryRegion& rhs) const {
35       return Compare(lhs, rhs) < 0;
36     }
37   };
38 
39   BitMemoryRegion() = default;
BitMemoryRegion(uint8_t * data,ssize_t bit_start,size_t bit_size)40   ALWAYS_INLINE BitMemoryRegion(uint8_t* data, ssize_t bit_start, size_t bit_size) {
41     // Normalize the data pointer. Note that bit_start may be negative.
42     data_ = AlignDown(data + (bit_start >> kBitsPerByteLog2), kPageSize);
43     bit_start_ = bit_start + kBitsPerByte * (data - data_);
44     bit_size_ = bit_size;
45   }
BitMemoryRegion(MemoryRegion region)46   ALWAYS_INLINE explicit BitMemoryRegion(MemoryRegion region)
47     : BitMemoryRegion(region.begin(), /* bit_start */ 0, region.size_in_bits()) {
48   }
BitMemoryRegion(MemoryRegion region,size_t bit_offset,size_t bit_length)49   ALWAYS_INLINE BitMemoryRegion(MemoryRegion region, size_t bit_offset, size_t bit_length)
50     : BitMemoryRegion(region) {
51     *this = Subregion(bit_offset, bit_length);
52   }
53 
IsValid()54   ALWAYS_INLINE bool IsValid() const { return data_ != nullptr; }
55 
data()56   const uint8_t* data() const {
57     DCHECK_ALIGNED(bit_start_, kBitsPerByte);
58     return data_ + bit_start_ / kBitsPerByte;
59   }
60 
size_in_bits()61   size_t size_in_bits() const {
62     return bit_size_;
63   }
64 
Resize(size_t bit_size)65   void Resize(size_t bit_size) {
66     bit_size_ = bit_size;
67   }
68 
Subregion(size_t bit_offset,size_t bit_length)69   ALWAYS_INLINE BitMemoryRegion Subregion(size_t bit_offset, size_t bit_length) const {
70     DCHECK_LE(bit_offset, bit_size_);
71     DCHECK_LE(bit_length, bit_size_ - bit_offset);
72     BitMemoryRegion result = *this;
73     result.bit_start_ += bit_offset;
74     result.bit_size_ = bit_length;
75     return result;
76   }
77 
Subregion(size_t bit_offset)78   ALWAYS_INLINE BitMemoryRegion Subregion(size_t bit_offset) const {
79     DCHECK_LE(bit_offset, bit_size_);
80     BitMemoryRegion result = *this;
81     result.bit_start_ += bit_offset;
82     result.bit_size_ -= bit_offset;
83     return result;
84   }
85 
86   // Load a single bit in the region. The bit at offset 0 is the least
87   // significant bit in the first byte.
LoadBit(size_t bit_offset)88   ALWAYS_INLINE bool LoadBit(size_t bit_offset) const {
89     DCHECK_LT(bit_offset, bit_size_);
90     size_t index = (bit_start_ + bit_offset) / kBitsPerByte;
91     size_t shift = (bit_start_ + bit_offset) % kBitsPerByte;
92     return ((data_[index] >> shift) & 1) != 0;
93   }
94 
StoreBit(size_t bit_offset,bool value)95   ALWAYS_INLINE void StoreBit(size_t bit_offset, bool value) {
96     DCHECK_LT(bit_offset, bit_size_);
97     size_t index = (bit_start_ + bit_offset) / kBitsPerByte;
98     size_t shift = (bit_start_ + bit_offset) % kBitsPerByte;
99     data_[index] &= ~(1 << shift);  // Clear bit.
100     data_[index] |= (value ? 1 : 0) << shift;  // Set bit.
101     DCHECK_EQ(value, LoadBit(bit_offset));
102   }
103 
104   // Load `bit_length` bits from `data` starting at given `bit_offset`.
105   // The least significant bit is stored in the smallest memory offset.
106   template<typename Result = size_t>
107   ATTRIBUTE_NO_SANITIZE_ADDRESS  // We might touch extra bytes due to the alignment.
108   ATTRIBUTE_NO_SANITIZE_HWADDRESS  // The hwasan uses different attribute.
LoadBits(size_t bit_offset,size_t bit_length)109   ALWAYS_INLINE Result LoadBits(size_t bit_offset, size_t bit_length) const {
110     static_assert(std::is_integral<Result>::value, "Result must be integral");
111     static_assert(std::is_unsigned<Result>::value, "Result must be unsigned");
112     DCHECK(IsAligned<sizeof(Result)>(data_));
113     DCHECK_LE(bit_offset, bit_size_);
114     DCHECK_LE(bit_length, bit_size_ - bit_offset);
115     DCHECK_LE(bit_length, BitSizeOf<Result>());
116     if (bit_length == 0) {
117       return 0;
118     }
119     // Load naturally-aligned value which contains the least significant bit.
120     Result* data = reinterpret_cast<Result*>(data_);
121     size_t width = BitSizeOf<Result>();
122     size_t index = (bit_start_ + bit_offset) / width;
123     size_t shift = (bit_start_ + bit_offset) % width;
124     Result value = data[index] >> shift;
125     // Load extra value containing the most significant bit (it might be the same one).
126     // We can not just load the following value as that could potentially cause SIGSEGV.
127     Result extra = data[index + (shift + (bit_length - 1)) / width];
128     // Mask to clear unwanted bits (the 1s are needed to avoid avoid undefined shift).
129     Result clear = (std::numeric_limits<Result>::max() << 1) << (bit_length - 1);
130     // Prepend the extra value.  We add explicit '& (width - 1)' so that the shift is defined.
131     // It is a no-op for `shift != 0` and if `shift == 0` then `value == extra` because of
132     // bit_length <= width causing the `value` and `extra` to be read from the same location.
133     // The '& (width - 1)' is implied by the shift instruction on ARM and removed by compiler.
134     return (value | (extra << ((width - shift) & (width - 1)))) & ~clear;
135   }
136 
137   // Store `bit_length` bits in `data` starting at given `bit_offset`.
138   // The least significant bit is stored in the smallest memory offset.
StoreBits(size_t bit_offset,uint32_t value,size_t bit_length)139   ALWAYS_INLINE void StoreBits(size_t bit_offset, uint32_t value, size_t bit_length) {
140     DCHECK_LE(bit_offset, bit_size_);
141     DCHECK_LE(bit_length, bit_size_ - bit_offset);
142     DCHECK_LE(bit_length, BitSizeOf<uint32_t>());
143     DCHECK_LE(value, MaxInt<uint32_t>(bit_length));
144     if (bit_length == 0) {
145       return;
146     }
147     // Write data byte by byte to avoid races with other threads
148     // on bytes that do not overlap with this region.
149     uint32_t mask = std::numeric_limits<uint32_t>::max() >> (BitSizeOf<uint32_t>() - bit_length);
150     size_t index = (bit_start_ + bit_offset) / kBitsPerByte;
151     size_t shift = (bit_start_ + bit_offset) % kBitsPerByte;
152     data_[index] &= ~(mask << shift);  // Clear bits.
153     data_[index] |= (value << shift);  // Set bits.
154     size_t finished_bits = kBitsPerByte - shift;
155     for (int i = 1; finished_bits < bit_length; i++, finished_bits += kBitsPerByte) {
156       data_[index + i] &= ~(mask >> finished_bits);  // Clear bits.
157       data_[index + i] |= (value >> finished_bits);  // Set bits.
158     }
159     DCHECK_EQ(value, LoadBits(bit_offset, bit_length));
160   }
161 
162   // Store bits from other bit region.
StoreBits(size_t bit_offset,const BitMemoryRegion & src,size_t bit_length)163   ALWAYS_INLINE void StoreBits(size_t bit_offset, const BitMemoryRegion& src, size_t bit_length) {
164     DCHECK_LE(bit_offset, bit_size_);
165     DCHECK_LE(bit_length, bit_size_ - bit_offset);
166     size_t bit = 0;
167     constexpr size_t kNumBits = BitSizeOf<uint32_t>();
168     for (; bit + kNumBits <= bit_length; bit += kNumBits) {
169       StoreBits(bit_offset + bit, src.LoadBits(bit, kNumBits), kNumBits);
170     }
171     size_t num_bits = bit_length - bit;
172     StoreBits(bit_offset + bit, src.LoadBits(bit, num_bits), num_bits);
173   }
174 
175   // Or bits from other bit region.
OrBits(size_t bit_offset,const BitMemoryRegion & src,size_t bit_length)176   ALWAYS_INLINE void OrBits(size_t bit_offset, const BitMemoryRegion& src, size_t bit_length) {
177     // TODO: Load `size_t` chunks (instead of `uint32_t`) from aligned
178     // addresses except for the leading and trailing bits. Refactor to
179     // share code with StoreBits() and maybe other functions.
180     DCHECK_LE(bit_offset, bit_size_);
181     DCHECK_LE(bit_length, bit_size_ - bit_offset);
182     size_t bit = 0;
183     constexpr size_t kNumBits = BitSizeOf<uint32_t>();
184     for (; bit + kNumBits <= bit_length; bit += kNumBits) {
185       size_t old_bits = LoadBits(bit_offset + bit, kNumBits);
186       StoreBits(bit_offset + bit, old_bits | src.LoadBits(bit, kNumBits), kNumBits);
187     }
188     size_t num_bits = bit_length - bit;
189     size_t old_bits = LoadBits(bit_offset + bit, num_bits);
190     StoreBits(bit_offset + bit, old_bits | src.LoadBits(bit, num_bits), num_bits);
191   }
192 
193   // Count the number of set bits within the given bit range.
PopCount(size_t bit_offset,size_t bit_length)194   ALWAYS_INLINE size_t PopCount(size_t bit_offset, size_t bit_length) const {
195     DCHECK_LE(bit_offset, bit_size_);
196     DCHECK_LE(bit_length, bit_size_ - bit_offset);
197     size_t count = 0;
198     size_t bit = 0;
199     constexpr size_t kNumBits = BitSizeOf<uint32_t>();
200     for (; bit + kNumBits <= bit_length; bit += kNumBits) {
201       count += POPCOUNT(LoadBits(bit_offset + bit, kNumBits));
202     }
203     count += POPCOUNT(LoadBits(bit_offset + bit, bit_length - bit));
204     return count;
205   }
206 
207   // Check if there is any bit set within the given bit range.
HasSomeBitSet(size_t bit_offset,size_t bit_length)208   ALWAYS_INLINE bool HasSomeBitSet(size_t bit_offset, size_t bit_length) const {
209     // TODO: Load `size_t` chunks (instead of `uint32_t`) from aligned
210     // addresses except for the leading and trailing bits. Refactor to
211     // share code with PopCount() and maybe also Compare().
212     DCHECK_LE(bit_offset, bit_size_);
213     DCHECK_LE(bit_length, bit_size_ - bit_offset);
214     size_t bit = 0;
215     constexpr size_t kNumBits = BitSizeOf<uint32_t>();
216     for (; bit + kNumBits <= bit_length; bit += kNumBits) {
217       if (LoadBits(bit_offset + bit, kNumBits) != 0u) {
218         return true;
219       }
220     }
221     return LoadBits(bit_offset + bit, bit_length - bit) != 0u;
222   }
223 
Compare(const BitMemoryRegion & lhs,const BitMemoryRegion & rhs)224   static int Compare(const BitMemoryRegion& lhs, const BitMemoryRegion& rhs) {
225     if (lhs.size_in_bits() != rhs.size_in_bits()) {
226       return (lhs.size_in_bits() < rhs.size_in_bits()) ? -1 : 1;
227     }
228     size_t bit = 0;
229     constexpr size_t kNumBits = BitSizeOf<uint32_t>();
230     for (; bit + kNumBits <= lhs.size_in_bits(); bit += kNumBits) {
231       uint32_t lhs_bits = lhs.LoadBits(bit, kNumBits);
232       uint32_t rhs_bits = rhs.LoadBits(bit, kNumBits);
233       if (lhs_bits != rhs_bits) {
234         return (lhs_bits < rhs_bits) ? -1 : 1;
235       }
236     }
237     size_t num_bits = lhs.size_in_bits() - bit;
238     uint32_t lhs_bits = lhs.LoadBits(bit, num_bits);
239     uint32_t rhs_bits = rhs.LoadBits(bit, num_bits);
240     if (lhs_bits != rhs_bits) {
241       return (lhs_bits < rhs_bits) ? -1 : 1;
242     }
243     return 0;
244   }
245 
246  private:
247   uint8_t* data_ = nullptr;  // The pointer is page aligned.
248   size_t bit_start_ = 0;
249   size_t bit_size_ = 0;
250 };
251 
252 constexpr uint32_t kVarintBits = 4;  // Minimum number of bits used for varint.
253 constexpr uint32_t kVarintMax = 11;  // Maximum value which is stored "inline".
254 
255 class BitMemoryReader {
256  public:
257   BitMemoryReader(BitMemoryReader&&) = default;
BitMemoryReader(BitMemoryRegion data)258   explicit BitMemoryReader(BitMemoryRegion data)
259       : finished_region_(data.Subregion(0, 0) /* set the length to zero */ ) {
260   }
261   explicit BitMemoryReader(const uint8_t* data, ssize_t bit_offset = 0)
262       : finished_region_(const_cast<uint8_t*>(data), bit_offset, /* bit_length */ 0) {
263   }
264 
data()265   const uint8_t* data() const { return finished_region_.data(); }
266 
GetReadRegion()267   BitMemoryRegion GetReadRegion() const { return finished_region_; }
268 
NumberOfReadBits()269   size_t NumberOfReadBits() const { return finished_region_.size_in_bits(); }
270 
ReadRegion(size_t bit_length)271   ALWAYS_INLINE BitMemoryRegion ReadRegion(size_t bit_length) {
272     size_t bit_offset = finished_region_.size_in_bits();
273     finished_region_.Resize(bit_offset + bit_length);
274     return finished_region_.Subregion(bit_offset, bit_length);
275   }
276 
277   template<typename Result = size_t>
ReadBits(size_t bit_length)278   ALWAYS_INLINE Result ReadBits(size_t bit_length) {
279     return ReadRegion(bit_length).LoadBits<Result>(/* bit_offset */ 0, bit_length);
280   }
281 
ReadBit()282   ALWAYS_INLINE bool ReadBit() {
283     return ReadRegion(/* bit_length */ 1).LoadBit(/* bit_offset */ 0);
284   }
285 
286   // Read variable-length bit-packed integer.
287   // The first four bits determine the variable length of the encoded integer:
288   //   Values 0..11 represent the result as-is, with no further following bits.
289   //   Values 12..15 mean the result is in the next 8/16/24/32-bits respectively.
ReadVarint()290   ALWAYS_INLINE uint32_t ReadVarint() {
291     uint32_t x = ReadBits(kVarintBits);
292     return (x <= kVarintMax) ? x : ReadBits((x - kVarintMax) * kBitsPerByte);
293   }
294 
295   // Read N 'interleaved' varints (different to just reading consecutive varints).
296   // All small values are stored first and the large values are stored after them.
297   // This requires fewer bit-reads compared to indidually storing the varints.
298   template<size_t N>
ReadInterleavedVarints()299   ALWAYS_INLINE std::array<uint32_t, N> ReadInterleavedVarints() {
300     static_assert(N * kVarintBits <= sizeof(uint64_t) * kBitsPerByte, "N too big");
301     std::array<uint32_t, N> values;
302     // StackMap BitTable uses over 8 varints in the header, so we need uint64_t.
303     uint64_t data = ReadBits<uint64_t>(N * kVarintBits);
304     for (size_t i = 0; i < N; i++) {
305       values[i] = BitFieldExtract(data, i * kVarintBits, kVarintBits);
306     }
307     // Do the second part in its own loop as that seems to produce better code in clang.
308     for (size_t i = 0; i < N; i++) {
309       if (UNLIKELY(values[i] > kVarintMax)) {
310         values[i] = ReadBits((values[i] - kVarintMax) * kBitsPerByte);
311       }
312     }
313     return values;
314   }
315 
316  private:
317   // Represents all of the bits which were read so far. There is no upper bound.
318   // Therefore, by definition, the "cursor" is always at the end of the region.
319   BitMemoryRegion finished_region_;
320 
321   DISALLOW_COPY_AND_ASSIGN(BitMemoryReader);
322 };
323 
324 template<typename Vector>
325 class BitMemoryWriter {
326  public:
327   explicit BitMemoryWriter(Vector* out, size_t bit_offset = 0)
out_(out)328       : out_(out), bit_start_(bit_offset), bit_offset_(bit_offset) {
329     DCHECK_EQ(NumberOfWrittenBits(), 0u);
330   }
331 
GetWrittenRegion()332   BitMemoryRegion GetWrittenRegion() const {
333     return BitMemoryRegion(out_->data(), bit_start_, bit_offset_ - bit_start_);
334   }
335 
data()336   const uint8_t* data() const { return out_->data(); }
337 
NumberOfWrittenBits()338   size_t NumberOfWrittenBits() const { return bit_offset_ - bit_start_; }
339 
Allocate(size_t bit_length)340   ALWAYS_INLINE BitMemoryRegion Allocate(size_t bit_length) {
341     out_->resize(BitsToBytesRoundUp(bit_offset_ + bit_length));
342     BitMemoryRegion region(out_->data(), bit_offset_, bit_length);
343     DCHECK_LE(bit_length, std::numeric_limits<size_t>::max() - bit_offset_) << "Overflow";
344     bit_offset_ += bit_length;
345     return region;
346   }
347 
WriteRegion(const BitMemoryRegion & region)348   ALWAYS_INLINE void WriteRegion(const BitMemoryRegion& region) {
349     Allocate(region.size_in_bits()).StoreBits(/* bit_offset */ 0, region, region.size_in_bits());
350   }
351 
WriteBits(uint32_t value,size_t bit_length)352   ALWAYS_INLINE void WriteBits(uint32_t value, size_t bit_length) {
353     Allocate(bit_length).StoreBits(/* bit_offset */ 0, value, bit_length);
354   }
355 
WriteBit(bool value)356   ALWAYS_INLINE void WriteBit(bool value) {
357     Allocate(1).StoreBit(/* bit_offset */ 0, value);
358   }
359 
360   template<size_t N>
WriteInterleavedVarints(std::array<uint32_t,N> values)361   ALWAYS_INLINE void WriteInterleavedVarints(std::array<uint32_t, N> values) {
362     // Write small values (or the number of bytes needed for the large values).
363     for (uint32_t value : values) {
364       if (value > kVarintMax) {
365         WriteBits(kVarintMax + BitsToBytesRoundUp(MinimumBitsToStore(value)), kVarintBits);
366       } else {
367         WriteBits(value, kVarintBits);
368       }
369     }
370     // Write large values.
371     for (uint32_t value : values) {
372       if (value > kVarintMax) {
373         WriteBits(value, BitsToBytesRoundUp(MinimumBitsToStore(value)) * kBitsPerByte);
374       }
375     }
376   }
377 
WriteVarint(uint32_t value)378   ALWAYS_INLINE void WriteVarint(uint32_t value) {
379     WriteInterleavedVarints<1>({value});
380   }
381 
ByteAlign()382   ALWAYS_INLINE void ByteAlign() {
383     size_t end = bit_start_ + bit_offset_;
384     bit_offset_ += RoundUp(end, kBitsPerByte) - end;
385   }
386 
387  private:
388   Vector* out_;
389   size_t bit_start_;
390   size_t bit_offset_;
391 
392   DISALLOW_COPY_AND_ASSIGN(BitMemoryWriter);
393 };
394 
395 }  // namespace art
396 
397 #endif  // ART_LIBARTBASE_BASE_BIT_MEMORY_REGION_H_
398