1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_INDEX_POSTING_LIST_ACCESSOR_H_ 16 #define ICING_INDEX_POSTING_LIST_ACCESSOR_H_ 17 18 #include <memory> 19 20 #include "icing/text_classifier/lib3/utils/base/status.h" 21 #include "icing/text_classifier/lib3/utils/base/statusor.h" 22 #include "icing/index/hit/hit.h" 23 #include "icing/index/main/flash-index-storage.h" 24 #include "icing/index/main/posting-list-identifier.h" 25 #include "icing/index/main/posting-list-used.h" 26 27 namespace icing { 28 namespace lib { 29 30 // This class serves to: 31 // 1. Expose PostingListUseds to clients of FlashIndexStorage 32 // 2. Ensure the corresponding instance of IndexBlock has the same lifecycle as 33 // the instance of PostingListUsed that the client has access to, while 34 // not exposing IndexBlock's api surface. 35 // 3. Ensure that PostingListUseds can only be freed by calling methods which 36 // will also properly maintain the FlashIndexStorage free list and prevent 37 // callers from modifying the Posting List after freeing. 38 39 // This class is used to provide a simple abstraction for adding hits to posting 40 // lists. PostingListAccessor handles 1) selection of properly-sized posting 41 // lists for the accumulated hits during Finalize() and 2) chaining of max-sized 42 // posting lists. 43 class PostingListAccessor { 44 public: 45 // Creates an empty PostingListAccessor. 46 // 47 // RETURNS: 48 // - On success, a valid instance of PostingListAccessor 49 // - INVALID_ARGUMENT error if storage has an invalid block_size. 50 static libtextclassifier3::StatusOr<PostingListAccessor> Create( 51 FlashIndexStorage* storage); 52 53 // Create a PostingListAccessor with an existing posting list identified by 54 // existing_posting_list_id. 55 // 56 // The PostingListAccessor will add hits to this posting list until it is 57 // necessary either to 1) chain the posting list (if it is max-sized) or 2) 58 // move its hits to a larger posting list. 59 // 60 // RETURNS: 61 // - On success, a valid instance of PostingListAccessor 62 // - INVALID_ARGUMENT if storage has an invalid block_size. 63 static libtextclassifier3::StatusOr<PostingListAccessor> CreateFromExisting( 64 FlashIndexStorage* storage, 65 PostingListIdentifier existing_posting_list_id); 66 67 // Retrieve the next batch of hits for the posting list chain 68 // 69 // RETURNS: 70 // - On success, a vector of hits in the posting list chain 71 // - INTERNAL if called on an instance of PostingListAccessor that was 72 // created via PostingListAccessor::Create, if unable to read the next 73 // posting list in the chain or if the posting list has been corrupted 74 // somehow. 75 libtextclassifier3::StatusOr<std::vector<Hit>> GetNextHitsBatch(); 76 77 // Prepend one hit. This may result in flushing the posting list to disk (if 78 // the PostingListAccessor holds a max-sized posting list that is full) or 79 // freeing a pre-existing posting list if it is too small to fit all hits 80 // necessary. 81 // 82 // RETURNS: 83 // - OK, on success 84 // - INVALID_ARGUMENT if !hit.is_valid() or if hit is not less than the 85 // previously added hit. 86 // - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new 87 // posting list. 88 libtextclassifier3::Status PrependHit(const Hit& hit); 89 90 struct FinalizeResult { 91 // - OK on success 92 // - INVALID_ARGUMENT if there was no pre-existing posting list and no 93 // hits were added 94 // - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a 95 // new posting list. 96 libtextclassifier3::Status status; 97 // Id of the posting list chain that was finalized. Guaranteed to be valid 98 // if status is OK. May be valid if status is non-OK, but previous blocks 99 // were written. 100 PostingListIdentifier id; 101 }; 102 // Write all accumulated hits to storage. 103 // 104 // If accessor points to a posting list chain with multiple posting lists in 105 // the chain and unable to write the last posting list in the chain, Finalize 106 // will return the error and also populate id with the id of the 107 // second-to-last posting list. 108 static FinalizeResult Finalize(PostingListAccessor accessor); 109 110 private: PostingListAccessor(FlashIndexStorage * storage,std::unique_ptr<uint8_t[]> posting_list_buffer_array,PostingListUsed posting_list_buffer)111 explicit PostingListAccessor( 112 FlashIndexStorage* storage, 113 std::unique_ptr<uint8_t[]> posting_list_buffer_array, 114 PostingListUsed posting_list_buffer) 115 : storage_(storage), 116 prev_block_identifier_(PostingListIdentifier::kInvalid), 117 posting_list_buffer_array_(std::move(posting_list_buffer_array)), 118 posting_list_buffer_(std::move(posting_list_buffer)), 119 has_reached_posting_list_chain_end_(false) {} 120 121 // Flushes preexisting_posting_list_ to disk if it's a max-sized posting list 122 // and populates prev_block_identifier. 123 // If it's not a max-sized posting list, moves the contents of 124 // preexisting_posting_list_ to posting_list_buffer_ and frees 125 // preexisting_posting_list_. 126 // Sets preexisting_posting_list_ to nullptr. 127 void FlushPreexistingPostingList(); 128 129 // Flushes posting_list_buffer_ to a max-sized posting list on disk, setting 130 // its next pointer to prev_block_identifier_ and updating 131 // prev_block_identifier_ to point to the just-written posting list. 132 libtextclassifier3::Status FlushInMemoryPostingList(); 133 134 // Frees all posting lists in the posting list chain starting at 135 // prev_block_identifier_. 136 libtextclassifier3::Status FreePostingListChain(); 137 138 FlashIndexStorage* storage_; // Does not own. 139 140 // The PostingListIdentifier of the first max-sized posting list in the 141 // posting list chain or PostingListIdentifier::kInvalid if there is no 142 // posting list chain. 143 PostingListIdentifier prev_block_identifier_; 144 145 // An editor to an existing posting list on disk. If available (non-NULL), 146 // we'll try to add all hits to this posting list. Once this posting list 147 // fills up, we'll either 1) chain it (if a max-sized posting list) and put 148 // future hits in posting_list_buffer_ or 2) copy all of its hits into 149 // posting_list_buffer_ and free this pl (if not a max-sized posting list). 150 // TODO(tjbarron) provide a benchmark to demonstrate the effects that re-using 151 // existing posting lists has on latency. 152 std::unique_ptr<PostingListHolder> preexisting_posting_list_; 153 154 // In-memory posting list used to buffer hits before writing them to the 155 // smallest on-disk posting list that will fit them. 156 // posting_list_buffer_array_ owns the memory region that posting_list_buffer_ 157 // interprets. Therefore, posting_list_buffer_array_ must have the same 158 // lifecycle as posting_list_buffer_. 159 std::unique_ptr<uint8_t[]> posting_list_buffer_array_; 160 PostingListUsed posting_list_buffer_; 161 162 bool has_reached_posting_list_chain_end_; 163 }; 164 165 } // namespace lib 166 } // namespace icing 167 168 #endif // ICING_INDEX_POSTING_LIST_ACCESSOR_H_ 169