1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_INDEX_MAIN_INDEX_BLOCK_H_ 16 #define ICING_INDEX_MAIN_INDEX_BLOCK_H_ 17 18 #include <string.h> 19 #include <sys/mman.h> 20 21 #include <algorithm> 22 #include <limits> 23 #include <memory> 24 #include <string> 25 #include <unordered_set> 26 #include <vector> 27 28 #include "icing/file/memory-mapped-file.h" 29 #include "icing/index/hit/hit.h" 30 #include "icing/index/main/posting-list-free.h" 31 #include "icing/index/main/posting-list-used.h" 32 #include "icing/legacy/index/icing-bit-util.h" 33 34 namespace icing { 35 namespace lib { 36 37 inline constexpr uint32_t kInvalidBlockIndex = 0; 38 39 // This class is used to manage I/O to a single flash block and to manage the 40 // division of that flash block into PostingLists. It provides an interface to 41 // allocate, free and read posting lists. 42 // 43 // An IndexBlock contains a small header and an array of fixed-size posting list 44 // buffers. Initially, all posting lists are chained in a singly-linked free 45 // list. 46 // 47 // When we want to get a new PostingList from an IndexBlock, we just 48 // pull one off the free list. When the user wants to return the 49 // PostingList to the free pool, we prepend it to the free list. 50 class IndexBlock { 51 public: 52 // What is the maximum posting list size in bytes that can be stored in this 53 // block. CalculateMaxPostingListBytes(uint32_t block_size_in_bytes)54 static uint32_t CalculateMaxPostingListBytes(uint32_t block_size_in_bytes) { 55 return (block_size_in_bytes - sizeof(BlockHeader)) / sizeof(Hit) * 56 sizeof(Hit); 57 } 58 59 // For a given min number of bits needed to store PostingListIndex for a 60 // block of "block_size", return the approximate number of hits that a full 61 // posting list in this block could accomodate. 62 static uint32_t ApproximateFullPostingListHitsForBlock( 63 uint32_t block_size, int posting_list_index_bits); 64 65 // Create an IndexBlock to reference the previously used region of the 66 // mmapped_file starting at offset with size block_size 67 // 68 // RETURNS: 69 // - a valid IndexBlock on success 70 // - INVALID_ARGUMENT if size is too small for even just the BlockHeader or 71 // if the posting list size stored in the region is not a valid posting 72 // list size or it exceeds max_posting_list_bytes(size). 73 // - INTERNAL_ERROR if unable to mmap the region [offset, offset+block_size) 74 static libtextclassifier3::StatusOr<IndexBlock> 75 CreateFromPreexistingIndexBlockRegion(const Filesystem& filesystem, 76 std::string_view file_path, 77 off_t offset, uint32_t block_size); 78 79 // Create an IndexBlock to reference an uninitialized region of the 80 // mmapped_file starting at offset with size block_size. The IndexBlock will 81 // initialize the region to be an empty IndexBlock with posting lists of size 82 // posting_list_bytes. 83 // 84 // RETURNS: 85 // - a valid IndexBlock on success 86 // - INVALID_ARGUMENT if size is too small for even just the BlockHeader or 87 // if posting_list_bytes is not a valid posting list size or it exceeds 88 // max_posting_list_bytes(size). 89 // - INTERNAL_ERROR if unable to mmap the region [offset, offset+block_size) 90 static libtextclassifier3::StatusOr<IndexBlock> CreateFromUninitializedRegion( 91 const Filesystem& filesystem, std::string_view file_path, off_t offset, 92 uint32_t block_size, uint32_t posting_list_bytes); 93 94 IndexBlock(const IndexBlock&) = delete; 95 IndexBlock& operator=(const IndexBlock&) = delete; 96 IndexBlock(IndexBlock&&) = default; 97 IndexBlock& operator=(IndexBlock&&) = default; 98 ~IndexBlock()99 ~IndexBlock() { 100 if (mmapped_block_ != nullptr) { 101 mmapped_block_->PersistToDisk(); 102 } 103 } 104 105 // Instantiate a PostingListUsed at posting_list_index with the existing 106 // content in the IndexBlock. 107 // 108 // RETURNS: 109 // - a valid PostingListUsed on success 110 // - INVALID_ARGUMENT if posting_list_index >= max_num_posting_lists() 111 libtextclassifier3::StatusOr<PostingListUsed> GetAllocatedPostingList( 112 PostingListIndex posting_list_index); 113 114 // Allocates a PostingListUsed in the IndexBlock, if possible. 115 // 116 // RETURNS: 117 // - a valid PostingListIndex that can be used to retrieve the allocated 118 // PostingListUsed via a call to GetAllocatedPostingList 119 // - RESOURCE_EXHAUSTED if !has_free_posting_lists() 120 libtextclassifier3::StatusOr<PostingListIndex> AllocatePostingList(); 121 122 // Free posting list at posting_list_index. 123 // 124 // It is considered an error to "double-free" a posting list. You should never 125 // call FreePostingList(index) with the same index twice, unless that index 126 // was returned by an intervening AllocatePostingList() call. 127 // 128 // Ex. 129 // PostingListIndex index = block.AllocatePostingList(); 130 // DoSomething(block.GetAllocatedPostingList(index)); 131 // block.FreePostingList(index); 132 // block.FreePostingList(index); // Argh! What are you doing?! 133 // ... 134 // PostingListIndex index = block.AllocatePostingList(); 135 // DoSomething(block.GetAllocatedPostingList(index)); 136 // block.FreePostingList(index); 137 // index = block.AllocatePostingList(); 138 // DoSomethingElse(block.GetAllocatedPostingList(index)); 139 // // A-Ok! We called AllocatePostingList() since the last FreePostingList() 140 // call. block.FreePostingList(index); 141 // 142 // Has no effect if posting_list_index >= max_num_posting_lists(). 143 void FreePostingList(PostingListIndex posting_list_index); 144 145 // Blocks can be chained. The interpretation of the chaining is up 146 // to the caller. next_block_index()147 uint32_t next_block_index() const { return header_->next_block_index; } 148 set_next_block_index(uint32_t next_block_index)149 void set_next_block_index(uint32_t next_block_index) { 150 header_->next_block_index = next_block_index; 151 } 152 153 // Retrieves the size (in bytes) of the posting lists in this IndexBlock. get_posting_list_bytes()154 uint32_t get_posting_list_bytes() const { 155 return header_->posting_list_bytes; 156 } 157 158 // Maximum number of posting lists in the block. max_num_posting_lists()159 uint32_t max_num_posting_lists() const { 160 return total_posting_lists_bytes() / get_posting_list_bytes(); 161 } 162 163 // Number of bits required to store the largest PostingListIndex in this 164 // block. posting_list_index_bits()165 int posting_list_index_bits() const { 166 return BitsToStore(max_num_posting_lists()); 167 } 168 169 // Returns whether or not there are available posting lists in the free list. has_free_posting_lists()170 bool has_free_posting_lists() const { 171 return header_->free_list_posting_list_index != kInvalidPostingListIndex; 172 } 173 174 private: 175 // Assumes that mmapped_file already has established a valid mapping to the 176 // requested block. 177 IndexBlock(MemoryMappedFile mmapped_block); 178 179 // Resets IndexBlock to hold posting lists of posting_list_bytes size and adds 180 // all posting lists to the free list. 181 // 182 // RETURNS: 183 // - OK, on success 184 // - INVALID_ARGUMENT if posting_list_bytes is a valid posting list size. 185 libtextclassifier3::Status Reset(int posting_list_bytes); 186 187 char* get_posting_list_ptr(PostingListIndex posting_list_index); 188 189 // Bytes in the block available for posting lists (minus header, 190 // alignment, etc.). total_posting_lists_bytes()191 uint32_t total_posting_lists_bytes() const { 192 return block_size_in_bytes_ - sizeof(BlockHeader); 193 } 194 195 struct BlockHeader { 196 // Index of the next block if this block is being chained or part of a free 197 // list. 198 uint32_t next_block_index; 199 200 // Index to the first PostingListFree in the IndexBlock. This is the start 201 // of the free list. 202 PostingListIndex free_list_posting_list_index; 203 204 // The size of each posting list in the IndexBlock. 205 uint32_t posting_list_bytes; 206 }; 207 // Pointer to the header of this block. The header is used to store info about 208 // this block and its posting lists. 209 BlockHeader* header_; 210 // Pointer to the beginning of the posting lists region - the area the block 211 // after the header. 212 char* posting_lists_start_ptr_; 213 uint32_t block_size_in_bytes_; 214 215 // MemoryMappedFile used to interact with the underlying flash block. 216 std::unique_ptr<MemoryMappedFile> mmapped_block_; 217 }; 218 219 } // namespace lib 220 } // namespace icing 221 222 #endif // ICING_INDEX_MAIN_INDEX_BLOCK_H_ 223