1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_INDEX_POSTING_LIST_IDENTIFIER_H_
16 #define ICING_INDEX_POSTING_LIST_IDENTIFIER_H_
17 
18 #include "icing/index/main/index-block.h"
19 #include "icing/index/main/posting-list-free.h"
20 #include "icing/legacy/index/icing-bit-util.h"
21 
22 namespace icing {
23 namespace lib {
24 
25 // 1M blocks * 4K page size = 4GB index
26 inline constexpr int kBlockIndexBits = 20;
27 inline constexpr int kMaxBlockIndex = (1u << kBlockIndexBits) - 1;
28 
29 // Class used to store information necessary to identify any posting list within
30 // the index.
31 //
32 // The 20 leftmost bits in this identifier encode the block index. The 12
33 // rightmost bits encode both the posting list index and the maximum number of
34 // bits required to encode a posting list index on that block.
35 //
36 // Ex. An index block containing a max of 68 posting lists each of size 60
37 // bytes (and thus 7 posting list bits), with a block index of 13 and a posting
38 // list index of 5.
39 //   0000  0000  0000  0000  1101  1111  0000  0101
40 //  |__________block-index_______|__pad__|_pl-index_|
41 //
42 // "pad" is some region starting at kEncodedPostingListIndexBits (12) bit and
43 // continuing rightward until reaching a terminating "0". This padding encodes
44 // the posting list bits value - posting list bits value is the number of bits
45 // after the terminating '0' of the "pad" region.
46 //
47 // This value will eventually be stored in the Main Lexicon.
48 class PostingListIdentifier {
49   // 1 bit is wasted to encode max pl index bits so there can be at most 2^11
50   // posting lists per block. Block size would have to be >=40020 bytes for
51   // there to be more than 2K+ posting lists in a block.
52   static constexpr int kEncodedPostingListIndexBits = 12;
53   static_assert(kEncodedPostingListIndexBits + kBlockIndexBits <=
54                     8 * sizeof(uint32_t),
55                 "Not enough room in PostingListIdentifier value to encode "
56                 "block index and posting list index.");
57 
58  public:
59   static PostingListIdentifier kInvalid;
60 
61   // 1. block_index - the index of this block within the FlashIndexStorage file
62   // 2. posting_list_index - the index of this posting list within the block
63   // 3. posting_list_index_bits - the number of bits needed to encode the
64   //    largest posting_list_index that this block can have.
PostingListIdentifier(uint32_t block_index,PostingListIndex posting_list_index,int posting_list_index_bits)65   PostingListIdentifier(uint32_t block_index,
66                         PostingListIndex posting_list_index,
67                         int posting_list_index_bits) {
68     val_ = 0;
69     BITFIELD_OR(val_, /*offset=*/0, /*len=*/posting_list_index_bits,
70                 /*val=*/static_cast<uint64_t>(posting_list_index));
71     BITFIELD_OR(
72         val_, /*offset=*/posting_list_index_bits + 1,
73         /*len=*/kEncodedPostingListIndexBits - posting_list_index_bits - 1,
74         /*val=*/~0u);
75     BITFIELD_OR(val_, /*offset=*/kEncodedPostingListIndexBits,
76                 /*len=*/kBlockIndexBits,
77                 /*val=*/block_index);
78   }
79 
block_index()80   int block_index() const {
81     return BITFIELD_GET(val_, kEncodedPostingListIndexBits, kBlockIndexBits);
82   }
83 
posting_list_index()84   PostingListIndex posting_list_index() const {
85     return BITFIELD_GET(val_, 0, posting_list_index_bits());
86   }
87 
88   // Returns the maximum number of bits that a posting list index on the block
89   // referred to by block_index could use.
posting_list_index_bits()90   int posting_list_index_bits() const {
91     for (int bits = kEncodedPostingListIndexBits - 1; bits >= 0; --bits) {
92       if (((1u << bits) & val_) == 0) {
93         // Got to the zero bit. This is the start of pl index.
94         return bits;
95       }
96     }
97     return -1;
98   }
99 
is_valid()100   bool is_valid() const { return *this != kInvalid; }
101 
102   bool operator==(const PostingListIdentifier& rhs) const {
103     return val_ == rhs.val_;
104   }
105   bool operator!=(const PostingListIdentifier& rhs) const {
106     return !(*this == rhs);
107   }
108 
109  private:
110   uint32_t val_;
111 };
112 
113 }  //  namespace lib
114 }  //  namespace icing
115 
116 #endif  // ICING_INDEX_POSTING_LIST_IDENTIFIER_H_
117