1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_INDEX_POSTING_LIST_IDENTIFIER_H_ 16 #define ICING_INDEX_POSTING_LIST_IDENTIFIER_H_ 17 18 #include "icing/index/main/index-block.h" 19 #include "icing/index/main/posting-list-free.h" 20 #include "icing/legacy/index/icing-bit-util.h" 21 22 namespace icing { 23 namespace lib { 24 25 // 1M blocks * 4K page size = 4GB index 26 inline constexpr int kBlockIndexBits = 20; 27 inline constexpr int kMaxBlockIndex = (1u << kBlockIndexBits) - 1; 28 29 // Class used to store information necessary to identify any posting list within 30 // the index. 31 // 32 // The 20 leftmost bits in this identifier encode the block index. The 12 33 // rightmost bits encode both the posting list index and the maximum number of 34 // bits required to encode a posting list index on that block. 35 // 36 // Ex. An index block containing a max of 68 posting lists each of size 60 37 // bytes (and thus 7 posting list bits), with a block index of 13 and a posting 38 // list index of 5. 39 // 0000 0000 0000 0000 1101 1111 0000 0101 40 // |__________block-index_______|__pad__|_pl-index_| 41 // 42 // "pad" is some region starting at kEncodedPostingListIndexBits (12) bit and 43 // continuing rightward until reaching a terminating "0". This padding encodes 44 // the posting list bits value - posting list bits value is the number of bits 45 // after the terminating '0' of the "pad" region. 46 // 47 // This value will eventually be stored in the Main Lexicon. 48 class PostingListIdentifier { 49 // 1 bit is wasted to encode max pl index bits so there can be at most 2^11 50 // posting lists per block. Block size would have to be >=40020 bytes for 51 // there to be more than 2K+ posting lists in a block. 52 static constexpr int kEncodedPostingListIndexBits = 12; 53 static_assert(kEncodedPostingListIndexBits + kBlockIndexBits <= 54 8 * sizeof(uint32_t), 55 "Not enough room in PostingListIdentifier value to encode " 56 "block index and posting list index."); 57 58 public: 59 static PostingListIdentifier kInvalid; 60 61 // 1. block_index - the index of this block within the FlashIndexStorage file 62 // 2. posting_list_index - the index of this posting list within the block 63 // 3. posting_list_index_bits - the number of bits needed to encode the 64 // largest posting_list_index that this block can have. PostingListIdentifier(uint32_t block_index,PostingListIndex posting_list_index,int posting_list_index_bits)65 PostingListIdentifier(uint32_t block_index, 66 PostingListIndex posting_list_index, 67 int posting_list_index_bits) { 68 val_ = 0; 69 BITFIELD_OR(val_, /*offset=*/0, /*len=*/posting_list_index_bits, 70 /*val=*/static_cast<uint64_t>(posting_list_index)); 71 BITFIELD_OR( 72 val_, /*offset=*/posting_list_index_bits + 1, 73 /*len=*/kEncodedPostingListIndexBits - posting_list_index_bits - 1, 74 /*val=*/~0u); 75 BITFIELD_OR(val_, /*offset=*/kEncodedPostingListIndexBits, 76 /*len=*/kBlockIndexBits, 77 /*val=*/block_index); 78 } 79 block_index()80 int block_index() const { 81 return BITFIELD_GET(val_, kEncodedPostingListIndexBits, kBlockIndexBits); 82 } 83 posting_list_index()84 PostingListIndex posting_list_index() const { 85 return BITFIELD_GET(val_, 0, posting_list_index_bits()); 86 } 87 88 // Returns the maximum number of bits that a posting list index on the block 89 // referred to by block_index could use. posting_list_index_bits()90 int posting_list_index_bits() const { 91 for (int bits = kEncodedPostingListIndexBits - 1; bits >= 0; --bits) { 92 if (((1u << bits) & val_) == 0) { 93 // Got to the zero bit. This is the start of pl index. 94 return bits; 95 } 96 } 97 return -1; 98 } 99 is_valid()100 bool is_valid() const { return *this != kInvalid; } 101 102 bool operator==(const PostingListIdentifier& rhs) const { 103 return val_ == rhs.val_; 104 } 105 bool operator!=(const PostingListIdentifier& rhs) const { 106 return !(*this == rhs); 107 } 108 109 private: 110 uint32_t val_; 111 }; 112 113 } // namespace lib 114 } // namespace icing 115 116 #endif // ICING_INDEX_POSTING_LIST_IDENTIFIER_H_ 117