1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_
18 #define SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_
19 
20 #include <stddef.h>
21 #include <stdint.h>
22 #include <stdio.h>
23 
24 #include <algorithm>
25 #include <array>
26 #include <vector>
27 
28 #include "perfetto/base/logging.h"
29 
30 namespace perfetto {
31 namespace trace_processor {
32 
33 namespace internal {
34 
35 class BaseIterator;
36 class AllBitsIterator;
37 class SetBitsIterator;
38 
39 }  // namespace internal
40 
41 // A bitvector which compactly stores a vector of bools using a single bit
42 // for each bool.
43 class BitVector {
44  public:
45   using AllBitsIterator = internal::AllBitsIterator;
46   using SetBitsIterator = internal::SetBitsIterator;
47 
48   // Creates an empty bitvector.
49   BitVector();
50 
51   explicit BitVector(std::initializer_list<bool> init);
52 
53   // Creates a bitvector of |count| size filled with |value|.
54   BitVector(uint32_t count, bool value = false);
55 
56   // Enable moving bitvectors as they have no unmovable state.
57   BitVector(BitVector&&) noexcept = default;
58   BitVector& operator=(BitVector&&) = default;
59 
60   // Create a copy of the bitvector.
61   BitVector Copy() const;
62 
63   // Returns the size of the bitvector.
size()64   uint32_t size() const { return static_cast<uint32_t>(size_); }
65 
66   // Returns whether the bit at |idx| is set.
IsSet(uint32_t idx)67   bool IsSet(uint32_t idx) const {
68     PERFETTO_DCHECK(idx < size());
69 
70     Address a = IndexToAddress(idx);
71     return blocks_[a.block_idx].IsSet(a.block_offset);
72   }
73 
74   // Returns the number of set bits in the bitvector.
GetNumBitsSet()75   uint32_t GetNumBitsSet() const { return GetNumBitsSet(size()); }
76 
77   // Returns the number of set bits between the start of the bitvector
78   // (inclusive) and the index |end| (exclusive).
GetNumBitsSet(uint32_t end)79   uint32_t GetNumBitsSet(uint32_t end) const {
80     if (end == 0)
81       return 0;
82 
83     // Although the external interface we present uses an exclusive |end|,
84     // internally it's a lot nicer to work with an inclusive |end| (mainly
85     // because we get block rollovers on exclusive ends which means we need
86     // to have if checks to ensure we don't overflow the number of blocks).
87     Address addr = IndexToAddress(end - 1);
88     uint32_t idx = addr.block_idx;
89 
90     // Add the number of set bits until the start of the block to the number
91     // of set bits until the end address inside the block.
92     return counts_[idx] + blocks_[idx].GetNumBitsSet(addr.block_offset);
93   }
94 
95   // Returns the index of the |n|th set bit. Should only be called with |n| <
96   // GetNumBitsSet().
IndexOfNthSet(uint32_t n)97   uint32_t IndexOfNthSet(uint32_t n) const {
98     PERFETTO_DCHECK(n < GetNumBitsSet());
99 
100     // First search for the block which, up until the start of it, has more than
101     // n bits set. Note that this should never return |counts.begin()| as
102     // that should always be 0.
103     // TODO(lalitm): investigate whether we can make this faster with small
104     // binary search followed by a linear search instead of binary searching the
105     // full way.
106     auto it = std::upper_bound(counts_.begin(), counts_.end(), n);
107     PERFETTO_DCHECK(it != counts_.begin());
108 
109     // Go back one block to find the block which has the bit we are looking for.
110     uint32_t block_idx =
111         static_cast<uint32_t>(std::distance(counts_.begin(), it) - 1);
112 
113     // Figure out how many set bits forward we are looking inside the block
114     // by taking away the number of bits at the start of the block from n.
115     uint32_t set_in_block = n - counts_[block_idx];
116 
117     // Compute the address of the bit in the block then convert the full
118     // address back to an index.
119     BlockOffset block_offset = blocks_[block_idx].IndexOfNthSet(set_in_block);
120     return AddressToIndex(Address{block_idx, block_offset});
121   }
122 
123   // Sets the bit at index |idx| to true.
Set(uint32_t idx)124   void Set(uint32_t idx) {
125     // Set the bit to the correct value inside the block but store the old
126     // bit to help fix the counts.
127     auto addr = IndexToAddress(idx);
128     bool old_value = blocks_[addr.block_idx].IsSet(addr.block_offset);
129 
130     // If the old value was unset, set the bit and add one to the count.
131     if (PERFETTO_LIKELY(!old_value)) {
132       blocks_[addr.block_idx].Set(addr.block_offset);
133 
134       uint32_t size = static_cast<uint32_t>(counts_.size());
135       for (uint32_t i = addr.block_idx + 1; i < size; ++i) {
136         counts_[i]++;
137       }
138     }
139   }
140 
141   // Sets the bit at index |idx| to false.
Clear(uint32_t idx)142   void Clear(uint32_t idx) {
143     // Set the bit to the correct value inside the block but store the old
144     // bit to help fix the counts.
145     auto addr = IndexToAddress(idx);
146     bool old_value = blocks_[addr.block_idx].IsSet(addr.block_offset);
147 
148     // If the old value was set, clear the bit and subtract one from all the
149     // counts.
150     if (PERFETTO_LIKELY(old_value)) {
151       blocks_[addr.block_idx].Clear(addr.block_offset);
152 
153       uint32_t size = static_cast<uint32_t>(counts_.size());
154       for (uint32_t i = addr.block_idx + 1; i < size; ++i) {
155         counts_[i]--;
156       }
157     }
158   }
159 
160   // Appends true to the bitvector.
AppendTrue()161   void AppendTrue() {
162     Address addr = IndexToAddress(size_);
163     uint32_t old_blocks_size = static_cast<uint32_t>(blocks_.size());
164     uint32_t new_blocks_size = addr.block_idx + 1;
165 
166     if (PERFETTO_UNLIKELY(new_blocks_size > old_blocks_size)) {
167       uint32_t t = GetNumBitsSet();
168       blocks_.emplace_back();
169       counts_.emplace_back(t);
170     }
171 
172     size_++;
173     blocks_[addr.block_idx].Set(addr.block_offset);
174   }
175 
176   // Appends false to the bitvector.
AppendFalse()177   void AppendFalse() {
178     Address addr = IndexToAddress(size_);
179     uint32_t old_blocks_size = static_cast<uint32_t>(blocks_.size());
180     uint32_t new_blocks_size = addr.block_idx + 1;
181 
182     if (PERFETTO_UNLIKELY(new_blocks_size > old_blocks_size)) {
183       uint32_t t = GetNumBitsSet();
184       blocks_.emplace_back();
185       counts_.emplace_back(t);
186     }
187 
188     size_++;
189     // We don't need to clear the bit as we ensure that anything after
190     // size_ is always set to false.
191   }
192 
193   // Resizes the BitVector to the given |size|.
194   // Truncates the BitVector if |size| < |size()| or fills the new space with
195   // |value| if |size| > |size()|. Calling this method is a noop if |size| ==
196   // |size()|.
197   void Resize(uint32_t size, bool value = false) {
198     uint32_t old_size = size_;
199     if (size == old_size)
200       return;
201 
202     // Empty bitvectors should be memory efficient so we don't keep any data
203     // around in the bitvector.
204     if (size == 0) {
205       blocks_.clear();
206       counts_.clear();
207       size_ = 0;
208       return;
209     }
210 
211     // Compute the address of the new last bit in the bitvector.
212     Address last_addr = IndexToAddress(size - 1);
213     uint32_t old_blocks_size = static_cast<uint32_t>(counts_.size());
214     uint32_t new_blocks_size = last_addr.block_idx + 1;
215 
216     // Then, resize the block and count vectors to have the correct
217     // number of entries.
218     blocks_.resize(new_blocks_size);
219     counts_.resize(new_blocks_size);
220 
221     if (size > old_size) {
222       if (value) {
223         // If the new space should be filled with true, then set all the bits
224         // between the address of the old size and the new last address.
225         const Address& start = IndexToAddress(old_size);
226         Set(start, last_addr);
227 
228         // We then need to update the counts vector to match the changes we
229         // made to the blocks.
230 
231         // We start by adding the bits we set in the first block to the
232         // cummulative count before the range we changed.
233         Address end_of_block = {start.block_idx,
234                                 {Block::kWords - 1, BitWord::kBits - 1}};
235         uint32_t count_in_block_after_end =
236             AddressToIndex(end_of_block) - AddressToIndex(start) + 1;
237         uint32_t set_count = GetNumBitsSet() + count_in_block_after_end;
238 
239         for (uint32_t i = start.block_idx + 1; i <= last_addr.block_idx; ++i) {
240           // Set the count to the cummulative count so far.
241           counts_[i] = set_count;
242 
243           // Add a full block of set bits to the count.
244           set_count += Block::kBits;
245         }
246       } else {
247         // If the newly added bits are false, we just need to update the
248         // counts vector with the current size of the bitvector for all
249         // the newly added blocks.
250         if (new_blocks_size > old_blocks_size) {
251           uint32_t count = GetNumBitsSet();
252           for (uint32_t i = old_blocks_size; i < new_blocks_size; ++i) {
253             counts_[i] = count;
254           }
255         }
256       }
257     } else {
258       // Throw away all the bits after the new last bit. We do this to make
259       // future lookup, append and resize operations not have to worrying about
260       // trailing garbage bits in the last block.
261       blocks_[last_addr.block_idx].ClearAfter(last_addr.block_offset);
262     }
263 
264     // Actually update the size.
265     size_ = size;
266   }
267 
268   // Creates a BitVector of size |end| with the bits between |start| and |end|
269   // filled by calling the filler function |f(index of bit)|.
270   //
271   // As an example, suppose Range(3, 7, [](x) { return x < 5 }). This would
272   // result in the following bitvector:
273   // [0 0 0 1 1 0 0 0]
274   template <typename Filler = bool(uint32_t)>
Range(uint32_t start,uint32_t end,Filler f)275   static BitVector Range(uint32_t start, uint32_t end, Filler f) {
276     // Compute the block index and bitvector index where we start and end
277     // working one block at a time.
278     uint32_t start_fast_block = BlockCeil(start);
279     uint32_t start_fast_idx = BlockToIndex(start_fast_block);
280     uint32_t end_fast_block = BlockFloor(end);
281     uint32_t end_fast_idx = BlockToIndex(end_fast_block);
282 
283     // First, create the BitVector up to |start| then fill up to
284     // |start_fast_index| with values from the filler.
285     BitVector bv(start, false);
286     for (uint32_t i = start; i < start_fast_idx; ++i) {
287       bv.Append(f(i));
288     }
289 
290     // At this point we can work one block at a time.
291     for (uint32_t i = start_fast_block; i < end_fast_block; ++i) {
292       bv.counts_.emplace_back(bv.GetNumBitsSet());
293       bv.blocks_.emplace_back(Block::FromFiller(bv.size_, f));
294       bv.size_ += Block::kBits;
295     }
296 
297     // Add the last few elements to finish up to |end|.
298     for (uint32_t i = end_fast_idx; i < end; ++i) {
299       bv.Append(f(i));
300     }
301     return bv;
302   }
303 
304   // Updates the ith set bit of this bitvector with the value of
305   // |other.IsSet(i)|.
306   //
307   // This is the best way to batch update all the bits which are set; for
308   // example when filtering rows, we want to filter all rows which are currently
309   // included but ignore rows which have already been excluded.
310   //
311   // For example suppose the following:
312   // this:  1 1 0 0 1 0 1
313   // other: 0 1 1 0
314   // This will change this to the following:
315   // this:  0 1 0 0 1 0 0
316   // TODO(lalitm): investigate whether we should just change this to And.
317   void UpdateSetBits(const BitVector& other);
318 
319   // Iterate all the bits in the BitVector.
320   //
321   // Usage:
322   // for (auto it = bv.IterateAllBits(); it; it.Next()) {
323   //   ...
324   // }
325   AllBitsIterator IterateAllBits() const;
326 
327   // Iterate all the set bits in the BitVector.
328   //
329   // Usage:
330   // for (auto it = bv.IterateSetBits(); it; it.Next()) {
331   //   ...
332   // }
333   SetBitsIterator IterateSetBits() const;
334 
335   // Returns the approximate cost (in bytes) of storing a bitvector with size
336   // |n|. This can be used to make decisions about whether using a BitVector is
337   // worthwhile.
338   // This cost should not be treated as exact - it just gives an indication of
339   // the memory needed.
ApproxBytesCost(uint32_t n)340   static constexpr uint32_t ApproxBytesCost(uint32_t n) {
341     // The two main things making up a bitvector is the cost of the blocks of
342     // bits and the cost of the counts vector.
343     return BlockCeil(n) * Block::kBits + BlockCeil(n) * sizeof(uint32_t);
344   }
345 
346  private:
347   friend class internal::BaseIterator;
348   friend class internal::AllBitsIterator;
349   friend class internal::SetBitsIterator;
350 
351   // Represents the offset of a bit within a block.
352   struct BlockOffset {
353     uint16_t word_idx;
354     uint16_t bit_idx;
355   };
356 
357   // Represents the address of a bit within the bitvector.
358   struct Address {
359     uint32_t block_idx;
360     BlockOffset block_offset;
361   };
362 
363   // Represents the smallest collection of bits we can refer to as
364   // one unit.
365   //
366   // Currently, this is implemented as a 64 bit integer as this is the
367   // largest type which we can assume to be present on all platforms.
368   class BitWord {
369    public:
370     static constexpr uint32_t kBits = 64;
371 
372     // Returns whether the bit at the given index is set.
IsSet(uint32_t idx)373     bool IsSet(uint32_t idx) const {
374       PERFETTO_DCHECK(idx < kBits);
375       return (word_ >> idx) & 1ull;
376     }
377 
378     // Bitwise ors the given |mask| to the current value.
Or(uint64_t mask)379     void Or(uint64_t mask) { word_ |= mask; }
380 
381     // Sets the bit at the given index to true.
Set(uint32_t idx)382     void Set(uint32_t idx) {
383       PERFETTO_DCHECK(idx < kBits);
384 
385       // Or the value for the true shifted up to |idx| with the word.
386       Or(1ull << idx);
387     }
388 
389     // Sets the bit at the given index to false.
Clear(uint32_t idx)390     void Clear(uint32_t idx) {
391       PERFETTO_DCHECK(idx < kBits);
392 
393       // And the integer of all bits set apart from |idx| with the word.
394       word_ &= ~(1ull << idx);
395     }
396 
397     // Clears all the bits (i.e. sets the atom to zero).
ClearAll()398     void ClearAll() { word_ = 0; }
399 
400     // Returns the index of the nth set bit.
401     // Undefined if |n| >= |GetNumBitsSet()|.
IndexOfNthSet(uint32_t n)402     uint16_t IndexOfNthSet(uint32_t n) const {
403       PERFETTO_DCHECK(n < kBits);
404 
405       // The below code is very dense but essentially computes the nth set
406       // bit inside |atom| in the "broadword" style of programming (sometimes
407       // referred to as "SIMD within a register").
408       //
409       // Instead of treating a uint64 as an individual unit, broadword
410       // algorithms treat them as a packed vector of uint8. By doing this, they
411       // allow branchless algorithms when considering bits of a uint64.
412       //
413       // In benchmarks, this algorithm has found to be the fastest, portable
414       // way of computing the nth set bit (if we were only targetting new
415       // versions of x64, we could also use pdep + ctz but unfortunately
416       // this would fail on WASM - this about 2.5-3x faster on x64).
417       //
418       // The code below was taken from the paper
419       // http://vigna.di.unimi.it/ftp/papers/Broadword.pdf
420       uint64_t s = word_ - ((word_ & 0xAAAAAAAAAAAAAAAA) >> 1);
421       s = (s & 0x3333333333333333) + ((s >> 2) & 0x3333333333333333);
422       s = ((s + (s >> 4)) & 0x0F0F0F0F0F0F0F0F) * L8;
423 
424       uint64_t b = (BwLessThan(s, n * L8) >> 7) * L8 >> 53 & ~7ull;
425       uint64_t l = n - ((s << 8) >> b & 0xFF);
426       s = (BwGtZero(((word_ >> b & 0xFF) * L8) & 0x8040201008040201) >> 7) * L8;
427 
428       uint64_t ret = b + ((BwLessThan(s, l * L8) >> 7) * L8 >> 56);
429 
430       return static_cast<uint16_t>(ret);
431     }
432 
433     // Returns the number of set bits.
GetNumBitsSet()434     uint32_t GetNumBitsSet() const {
435       return static_cast<uint32_t>(PERFETTO_POPCOUNT(word_));
436     }
437 
438     // Returns the number of set bits up to and including the bit at |idx|.
GetNumBitsSet(uint32_t idx)439     uint32_t GetNumBitsSet(uint32_t idx) const {
440       PERFETTO_DCHECK(idx < kBits);
441       return static_cast<uint32_t>(PERFETTO_POPCOUNT(WordUntil(idx)));
442     }
443 
444     // Retains all bits up to and including the bit at |idx| and clears
445     // all bits after this point.
ClearAfter(uint32_t idx)446     void ClearAfter(uint32_t idx) {
447       PERFETTO_DCHECK(idx < kBits);
448       word_ = WordUntil(idx);
449     }
450 
451     // Sets all bits between the bit at |start| and |end| (inclusive).
Set(uint32_t start,uint32_t end)452     void Set(uint32_t start, uint32_t end) {
453       uint32_t diff = end - start;
454       word_ |= (MaskAllBitsSetUntil(diff) << static_cast<uint64_t>(start));
455     }
456 
457    private:
458     // Constant with all the low bit of every byte set.
459     static constexpr uint64_t L8 = 0x0101010101010101;
460 
461     // Constant with all the high bit of every byte set.
462     static constexpr uint64_t H8 = 0x8080808080808080;
463 
464     // Returns a packed uint64 encoding whether each byte of x is less
465     // than each corresponding byte of y.
466     // This is computed in the "broadword" style of programming; see
467     // IndexOfNthSet for details on this.
BwLessThan(uint64_t x,uint64_t y)468     static uint64_t BwLessThan(uint64_t x, uint64_t y) {
469       return (((y | H8) - (x & ~H8)) ^ x ^ y) & H8;
470     }
471 
472     // Returns a packed uint64 encoding whether each byte of x is greater
473     // than or equal zero.
474     // This is computed in the "broadword" style of programming; see
475     // IndexOfNthSet for details on this.
BwGtZero(uint64_t x)476     static uint64_t BwGtZero(uint64_t x) { return (((x | H8) - L8) | x) & H8; }
477 
478     // Returns the bits up to and including the bit at |idx|.
WordUntil(uint32_t idx)479     uint64_t WordUntil(uint32_t idx) const {
480       PERFETTO_DCHECK(idx < kBits);
481 
482       // To understand what is happeninng here, consider an example.
483       // Suppose we want to all the bits up to the 7th bit in the atom
484       //               7th
485       //                |
486       //                v
487       // atom: 01010101011111000
488       //
489       // The easiest way to do this would be if we had a mask with only
490       // the bottom 7 bits set:
491       // mask: 00000000001111111
492       uint64_t mask = MaskAllBitsSetUntil(idx);
493 
494       // Finish up by anding the the atom with the computed msk.
495       return word_ & mask;
496     }
497 
498     // Return a mask of all the bits up to and including bit at |idx|.
MaskAllBitsSetUntil(uint32_t idx)499     static uint64_t MaskAllBitsSetUntil(uint32_t idx) {
500       // Start with 1 and shift it up (idx + 1) bits we get:
501       // top : 00000000010000000
502       uint64_t top = 1ull << ((idx + 1ull) % kBits);
503 
504       // We need to handle the case where idx == 63. In this case |top| will be
505       // zero because 1 << ((idx + 1) % 64) == 1 << (64 % 64) == 1.
506       // In this case, we actually want top == 0. We can do this by shifting
507       // down by (idx + 1) / kBits - this will be a noop for every index other
508       // than idx == 63. This should also be free on x86 because of the mod
509       // instruction above.
510       top = top >> ((idx + 1) / kBits);
511 
512       // Then if we take away 1, we get precisely the mask we want.
513       return top - 1u;
514     }
515 
516     uint64_t word_ = 0;
517   };
518 
519   // Represents a group of bits with a bitcount such that it is
520   // efficient to work on these bits.
521   //
522   // On x86 architectures we generally target for trace processor, the
523   // size of a cache line is 64 bytes (or 512 bits). For this reason,
524   // we make the size of the block contain 8 atoms as 8 * 64 == 512.
525   //
526   // TODO(lalitm): investigate whether we should tune this value for
527   // WASM and ARM.
528   class Block {
529    public:
530     // See class documentation for how these constants are chosen.
531     static constexpr uint16_t kWords = 8;
532     static constexpr uint32_t kBits = kWords * BitWord::kBits;
533 
534     // Returns whether the bit at the given address is set.
IsSet(const BlockOffset & addr)535     bool IsSet(const BlockOffset& addr) const {
536       PERFETTO_DCHECK(addr.word_idx < kWords);
537 
538       return words_[addr.word_idx].IsSet(addr.bit_idx);
539     }
540 
541     // Sets the bit at the given address to true.
Set(const BlockOffset & addr)542     void Set(const BlockOffset& addr) {
543       PERFETTO_DCHECK(addr.word_idx < kWords);
544 
545       words_[addr.word_idx].Set(addr.bit_idx);
546     }
547 
548     // Sets the bit at the given address to false.
Clear(const BlockOffset & addr)549     void Clear(const BlockOffset& addr) {
550       PERFETTO_DCHECK(addr.word_idx < kWords);
551 
552       words_[addr.word_idx].Clear(addr.bit_idx);
553     }
554 
555     // Gets the offset of the nth set bit in this block.
IndexOfNthSet(uint32_t n)556     BlockOffset IndexOfNthSet(uint32_t n) const {
557       uint32_t count = 0;
558       for (uint16_t i = 0; i < kWords; ++i) {
559         // Keep a running count of all the set bits in the atom.
560         uint32_t value = count + words_[i].GetNumBitsSet();
561         if (value <= n) {
562           count = value;
563           continue;
564         }
565 
566         // The running count of set bits is more than |n|. That means this atom
567         // contains the bit we are looking for.
568 
569         // Take away the number of set bits to the start of this atom from |n|.
570         uint32_t set_in_atom = n - count;
571 
572         // Figure out the index of the set bit inside the atom and create the
573         // address of this bit from that.
574         uint16_t bit_idx = words_[i].IndexOfNthSet(set_in_atom);
575         PERFETTO_DCHECK(bit_idx < 64);
576         return BlockOffset{i, bit_idx};
577       }
578       PERFETTO_FATAL("Index out of bounds");
579     }
580 
581     // Gets the number of set bits within a block up to and including the bit
582     // at the given address.
GetNumBitsSet(const BlockOffset & addr)583     uint32_t GetNumBitsSet(const BlockOffset& addr) const {
584       PERFETTO_DCHECK(addr.word_idx < kWords);
585 
586       // Count all the set bits in the atom until we reach the last atom
587       // index.
588       uint32_t count = 0;
589       for (uint32_t i = 0; i < addr.word_idx; ++i) {
590         count += words_[i].GetNumBitsSet();
591       }
592 
593       // For the last atom, only count the bits upto and including the bit
594       // index.
595       return count + words_[addr.word_idx].GetNumBitsSet(addr.bit_idx);
596     }
597 
598     // Retains all bits up to and including the bit at |addr| and clears
599     // all bits after this point.
ClearAfter(const BlockOffset & offset)600     void ClearAfter(const BlockOffset& offset) {
601       PERFETTO_DCHECK(offset.word_idx < kWords);
602 
603       // In the first atom, keep the bits until the address specified.
604       words_[offset.word_idx].ClearAfter(offset.bit_idx);
605 
606       // For all subsequent atoms, we just clear the whole atom.
607       for (uint32_t i = offset.word_idx + 1; i < kWords; ++i) {
608         words_[i].ClearAll();
609       }
610     }
611 
612     // Set all the bits between the offsets given by |start| and |end|
613     // (inclusive).
Set(const BlockOffset & start,const BlockOffset & end)614     void Set(const BlockOffset& start, const BlockOffset& end) {
615       if (start.word_idx == end.word_idx) {
616         // If there is only one word we will change, just set the range within
617         // the word.
618         words_[start.word_idx].Set(start.bit_idx, end.bit_idx);
619         return;
620       }
621 
622       // Otherwise, we have more than one word to set. To do this, we will
623       // do this in three steps.
624 
625       // First, we set the first word from the start to the end of the word.
626       words_[start.word_idx].Set(start.bit_idx, BitWord::kBits - 1);
627 
628       // Next, we set all words (except the last).
629       for (uint32_t i = start.word_idx + 1; i < end.word_idx; ++i) {
630         words_[i].Set(0, BitWord::kBits - 1);
631       }
632 
633       // Finally, we set the word block from the start to the end offset.
634       words_[end.word_idx].Set(0, end.bit_idx);
635     }
636 
637     template <typename Filler>
FromFiller(uint32_t offset,Filler f)638     static Block FromFiller(uint32_t offset, Filler f) {
639       // We choose to iterate the bits as the outer loop as this allows us
640       // to reuse the mask and the bit offset between iterations of the loop.
641       // This makes a small (but noticable) impact in the performance of this
642       // function.
643       Block b;
644       for (uint32_t i = 0; i < BitWord::kBits; ++i) {
645         uint64_t mask = 1ull << i;
646         uint32_t offset_with_bit = offset + i;
647         for (uint32_t j = 0; j < Block::kWords; ++j) {
648           bool res = f(offset_with_bit + j * BitWord::kBits);
649           b.words_[j].Or(res ? mask : 0);
650         }
651       }
652       return b;
653     }
654 
655    private:
656     std::array<BitWord, kWords> words_{};
657   };
658 
659   BitVector(std::vector<Block> blocks,
660             std::vector<uint32_t> counts,
661             uint32_t size);
662 
663   BitVector(const BitVector&) = delete;
664   BitVector& operator=(const BitVector&) = delete;
665 
666   // Set all the bits between the addresses given by |start| and |end|
667   // (inclusive).
668   // Note: this method does not update the counts vector - that is the
669   // responibility of the caller.
Set(const Address & start,const Address & end)670   void Set(const Address& start, const Address& end) {
671     static constexpr BlockOffset kFirstBlockOffset = BlockOffset{0, 0};
672     static constexpr BlockOffset kLastBlockOffset =
673         BlockOffset{Block::kWords - 1, BitWord::kBits - 1};
674 
675     if (start.block_idx == end.block_idx) {
676       // If there is only one block we will change, just set the range within
677       // the block.
678       blocks_[start.block_idx].Set(start.block_offset, end.block_offset);
679       return;
680     }
681 
682     // Otherwise, we have more than one block to set. To do this, we will
683     // do this in three steps.
684 
685     // First, we set the first block from the start to the end of the block.
686     blocks_[start.block_idx].Set(start.block_offset, kLastBlockOffset);
687 
688     // Next, we set all blocks (except the last).
689     for (uint32_t i = start.block_idx + 1; i < end.block_idx; ++i) {
690       blocks_[i].Set(kFirstBlockOffset, kLastBlockOffset);
691     }
692 
693     // Finally, we set the last block from the start to the end offset.
694     blocks_[end.block_idx].Set(kFirstBlockOffset, end.block_offset);
695   }
696 
697   // Helper function to append a bit. Generally, prefer to call AppendTrue
698   // or AppendFalse instead of this function if you know the type - they will
699   // be faster.
Append(bool value)700   void Append(bool value) {
701     if (value) {
702       AppendTrue();
703     } else {
704       AppendFalse();
705     }
706   }
707 
IndexToAddress(uint32_t idx)708   static Address IndexToAddress(uint32_t idx) {
709     Address a;
710     a.block_idx = idx / Block::kBits;
711 
712     uint16_t bit_idx_inside_block = idx % Block::kBits;
713     a.block_offset.word_idx = bit_idx_inside_block / BitWord::kBits;
714     a.block_offset.bit_idx = bit_idx_inside_block % BitWord::kBits;
715     return a;
716   }
717 
AddressToIndex(Address addr)718   static uint32_t AddressToIndex(Address addr) {
719     return addr.block_idx * Block::kBits +
720            addr.block_offset.word_idx * BitWord::kBits +
721            addr.block_offset.bit_idx;
722   }
723 
724   // Rounds |idx| up to the nearest block boundary and returns the block
725   // index. If |idx| is already on a block boundary, the current block is
726   // returned.
727   //
728   // This is useful to be able to find indices where "fast" algorithms can start
729   // which work on entire blocks.
BlockCeil(uint32_t idx)730   static constexpr uint32_t BlockCeil(uint32_t idx) {
731     // Adding |Block::kBits - 1| gives us a quick way to get the ceil. We
732     // do this instead of adding 1 at the end because that gives incorrect
733     // answers for index % Block::kBits == 0.
734     return (idx + Block::kBits - 1) / Block::kBits;
735   }
736 
737   // Returns the index of the block which would store |idx|.
BlockFloor(uint32_t idx)738   static constexpr uint32_t BlockFloor(uint32_t idx) {
739     return idx / Block::kBits;
740   }
741 
742   // Converts a block index to a index in the BitVector.
BlockToIndex(uint32_t block)743   static constexpr uint32_t BlockToIndex(uint32_t block) {
744     return block * Block::kBits;
745   }
746 
747   uint32_t size_ = 0;
748   std::vector<uint32_t> counts_;
749   std::vector<Block> blocks_;
750 };
751 
752 }  // namespace trace_processor
753 }  // namespace perfetto
754 
755 #endif  // SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_
756