1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_INDEX_MAIN_INDEX_BLOCK_H_
16 #define ICING_INDEX_MAIN_INDEX_BLOCK_H_
17 
18 #include <string.h>
19 #include <sys/mman.h>
20 
21 #include <algorithm>
22 #include <limits>
23 #include <memory>
24 #include <string>
25 #include <unordered_set>
26 #include <vector>
27 
28 #include "icing/file/memory-mapped-file.h"
29 #include "icing/index/hit/hit.h"
30 #include "icing/index/main/posting-list-free.h"
31 #include "icing/index/main/posting-list-used.h"
32 #include "icing/legacy/index/icing-bit-util.h"
33 
34 namespace icing {
35 namespace lib {
36 
37 inline constexpr uint32_t kInvalidBlockIndex = 0;
38 
39 // This class is used to manage I/O to a single flash block and to manage the
40 // division of that flash block into PostingLists. It provides an interface to
41 // allocate, free and read posting lists.
42 //
43 // An IndexBlock contains a small header and an array of fixed-size posting list
44 // buffers. Initially, all posting lists are chained in a singly-linked free
45 // list.
46 //
47 // When we want to get a new PostingList from an IndexBlock, we just
48 // pull one off the free list. When the user wants to return the
49 // PostingList to the free pool, we prepend it to the free list.
50 class IndexBlock {
51  public:
52   // What is the maximum posting list size in bytes that can be stored in this
53   // block.
CalculateMaxPostingListBytes(uint32_t block_size_in_bytes)54   static uint32_t CalculateMaxPostingListBytes(uint32_t block_size_in_bytes) {
55     return (block_size_in_bytes - sizeof(BlockHeader)) / sizeof(Hit) *
56            sizeof(Hit);
57   }
58 
59   // For a given min number of bits needed to store PostingListIndex for a
60   // block of "block_size", return the approximate number of hits that a full
61   // posting list in this block could accomodate.
62   static uint32_t ApproximateFullPostingListHitsForBlock(
63       uint32_t block_size, int posting_list_index_bits);
64 
65   // Create an IndexBlock to reference the previously used region of the
66   // mmapped_file starting at offset with size block_size
67   //
68   // RETURNS:
69   //   - a valid IndexBlock on success
70   //   - INVALID_ARGUMENT if size is too small for even just the BlockHeader or
71   //     if the posting list size stored in the region is not a valid posting
72   //     list size or it exceeds max_posting_list_bytes(size).
73   //   - INTERNAL_ERROR if unable to mmap the region [offset, offset+block_size)
74   static libtextclassifier3::StatusOr<IndexBlock>
75   CreateFromPreexistingIndexBlockRegion(const Filesystem& filesystem,
76                                         std::string_view file_path,
77                                         off_t offset, uint32_t block_size);
78 
79   // Create an IndexBlock to reference an uninitialized region of the
80   // mmapped_file starting at offset with size block_size. The IndexBlock will
81   // initialize the region to be an empty IndexBlock with posting lists of size
82   // posting_list_bytes.
83   //
84   // RETURNS:
85   //   - a valid IndexBlock on success
86   //   - INVALID_ARGUMENT if size is too small for even just the BlockHeader or
87   //   if posting_list_bytes is not a valid posting list size or it exceeds
88   //   max_posting_list_bytes(size).
89   //   - INTERNAL_ERROR if unable to mmap the region [offset, offset+block_size)
90   static libtextclassifier3::StatusOr<IndexBlock> CreateFromUninitializedRegion(
91       const Filesystem& filesystem, std::string_view file_path, off_t offset,
92       uint32_t block_size, uint32_t posting_list_bytes);
93 
94   IndexBlock(const IndexBlock&) = delete;
95   IndexBlock& operator=(const IndexBlock&) = delete;
96   IndexBlock(IndexBlock&&) = default;
97   IndexBlock& operator=(IndexBlock&&) = default;
98 
~IndexBlock()99   ~IndexBlock() {
100     if (mmapped_block_ != nullptr) {
101       mmapped_block_->PersistToDisk();
102     }
103   }
104 
105   // Instantiate a PostingListUsed at posting_list_index with the existing
106   // content in the IndexBlock.
107   //
108   // RETURNS:
109   //   - a valid PostingListUsed on success
110   //   - INVALID_ARGUMENT if posting_list_index >= max_num_posting_lists()
111   libtextclassifier3::StatusOr<PostingListUsed> GetAllocatedPostingList(
112       PostingListIndex posting_list_index);
113 
114   // Allocates a PostingListUsed in the IndexBlock, if possible.
115   //
116   // RETURNS:
117   //   - a valid PostingListIndex that can be used to retrieve the allocated
118   //     PostingListUsed via a call to GetAllocatedPostingList
119   //   - RESOURCE_EXHAUSTED if !has_free_posting_lists()
120   libtextclassifier3::StatusOr<PostingListIndex> AllocatePostingList();
121 
122   // Free posting list at posting_list_index.
123   //
124   // It is considered an error to "double-free" a posting list. You should never
125   // call FreePostingList(index) with the same index twice, unless that index
126   // was returned by an intervening AllocatePostingList() call.
127   //
128   // Ex.
129   //   PostingListIndex index = block.AllocatePostingList();
130   //   DoSomething(block.GetAllocatedPostingList(index));
131   //   block.FreePostingList(index);
132   //   block.FreePostingList(index);  // Argh! What are you doing?!
133   //   ...
134   //   PostingListIndex index = block.AllocatePostingList();
135   //   DoSomething(block.GetAllocatedPostingList(index));
136   //   block.FreePostingList(index);
137   //   index = block.AllocatePostingList();
138   //   DoSomethingElse(block.GetAllocatedPostingList(index));
139   //   // A-Ok! We called AllocatePostingList() since the last FreePostingList()
140   //   call. block.FreePostingList(index);
141   //
142   // Has no effect if posting_list_index >= max_num_posting_lists().
143   void FreePostingList(PostingListIndex posting_list_index);
144 
145   // Blocks can be chained. The interpretation of the chaining is up
146   // to the caller.
next_block_index()147   uint32_t next_block_index() const { return header_->next_block_index; }
148 
set_next_block_index(uint32_t next_block_index)149   void set_next_block_index(uint32_t next_block_index) {
150     header_->next_block_index = next_block_index;
151   }
152 
153   // Retrieves the size (in bytes) of the posting lists in this IndexBlock.
get_posting_list_bytes()154   uint32_t get_posting_list_bytes() const {
155     return header_->posting_list_bytes;
156   }
157 
158   // Maximum number of posting lists in the block.
max_num_posting_lists()159   uint32_t max_num_posting_lists() const {
160     return total_posting_lists_bytes() / get_posting_list_bytes();
161   }
162 
163   // Number of bits required to store the largest PostingListIndex in this
164   // block.
posting_list_index_bits()165   int posting_list_index_bits() const {
166     return BitsToStore(max_num_posting_lists());
167   }
168 
169   // Returns whether or not there are available posting lists in the free list.
has_free_posting_lists()170   bool has_free_posting_lists() const {
171     return header_->free_list_posting_list_index != kInvalidPostingListIndex;
172   }
173 
174  private:
175   // Assumes that mmapped_file already has established a valid mapping to the
176   // requested block.
177   IndexBlock(MemoryMappedFile mmapped_block);
178 
179   // Resets IndexBlock to hold posting lists of posting_list_bytes size and adds
180   // all posting lists to the free list.
181   //
182   // RETURNS:
183   //   - OK, on success
184   //   - INVALID_ARGUMENT if posting_list_bytes is a valid posting list size.
185   libtextclassifier3::Status Reset(int posting_list_bytes);
186 
187   char* get_posting_list_ptr(PostingListIndex posting_list_index);
188 
189   // Bytes in the block available for posting lists (minus header,
190   // alignment, etc.).
total_posting_lists_bytes()191   uint32_t total_posting_lists_bytes() const {
192     return block_size_in_bytes_ - sizeof(BlockHeader);
193   }
194 
195   struct BlockHeader {
196     // Index of the next block if this block is being chained or part of a free
197     // list.
198     uint32_t next_block_index;
199 
200     // Index to the first PostingListFree in the IndexBlock. This is the start
201     // of the free list.
202     PostingListIndex free_list_posting_list_index;
203 
204     // The size of each posting list in the IndexBlock.
205     uint32_t posting_list_bytes;
206   };
207   // Pointer to the header of this block. The header is used to store info about
208   // this block and its posting lists.
209   BlockHeader* header_;
210   // Pointer to the beginning of the posting lists region - the area the block
211   // after the header.
212   char* posting_lists_start_ptr_;
213   uint32_t block_size_in_bytes_;
214 
215   // MemoryMappedFile used to interact with the underlying flash block.
216   std::unique_ptr<MemoryMappedFile> mmapped_block_;
217 };
218 
219 }  // namespace lib
220 }  // namespace icing
221 
222 #endif  // ICING_INDEX_MAIN_INDEX_BLOCK_H_
223