1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/index/main/posting-list-accessor.h"
16 
17 #include <memory>
18 
19 #include "icing/absl_ports/canonical_errors.h"
20 #include "icing/index/main/flash-index-storage.h"
21 #include "icing/index/main/index-block.h"
22 #include "icing/index/main/posting-list-identifier.h"
23 #include "icing/index/main/posting-list-used.h"
24 #include "icing/util/status-macros.h"
25 
26 namespace icing {
27 namespace lib {
28 
Create(FlashIndexStorage * storage)29 libtextclassifier3::StatusOr<PostingListAccessor> PostingListAccessor::Create(
30     FlashIndexStorage *storage) {
31   uint32_t max_posting_list_bytes =
32       IndexBlock::CalculateMaxPostingListBytes(storage->block_size());
33   std::unique_ptr<uint8_t[]> posting_list_buffer_array =
34       std::make_unique<uint8_t[]>(max_posting_list_bytes);
35   ICING_ASSIGN_OR_RETURN(
36       PostingListUsed posting_list_buffer,
37       PostingListUsed::CreateFromUnitializedRegion(
38           posting_list_buffer_array.get(), max_posting_list_bytes));
39   return PostingListAccessor(storage, std::move(posting_list_buffer_array),
40                              std::move(posting_list_buffer));
41 }
42 
43 libtextclassifier3::StatusOr<PostingListAccessor>
CreateFromExisting(FlashIndexStorage * storage,PostingListIdentifier existing_posting_list_id)44 PostingListAccessor::CreateFromExisting(
45     FlashIndexStorage *storage,
46     PostingListIdentifier existing_posting_list_id) {
47   // Our posting_list_buffer_ will start as empty.
48   ICING_ASSIGN_OR_RETURN(PostingListAccessor pl_accessor, Create(storage));
49   ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
50                          storage->GetPostingList(existing_posting_list_id));
51   pl_accessor.preexisting_posting_list_ =
52       std::make_unique<PostingListHolder>(std::move(holder));
53   return pl_accessor;
54 }
55 
56 // Returns the next batch of hits for the provided posting list.
57 libtextclassifier3::StatusOr<std::vector<Hit>>
GetNextHitsBatch()58 PostingListAccessor::GetNextHitsBatch() {
59   if (preexisting_posting_list_ == nullptr) {
60     if (has_reached_posting_list_chain_end_) {
61       return std::vector<Hit>();
62     }
63     return absl_ports::FailedPreconditionError(
64         "Cannot retrieve hits from a PostingListAccessor that was not creaated "
65         "from a preexisting posting list.");
66   }
67   ICING_ASSIGN_OR_RETURN(std::vector<Hit> batch,
68                          preexisting_posting_list_->posting_list.GetHits());
69   uint32_t block_index = preexisting_posting_list_->block.next_block_index();
70   if (block_index != kInvalidBlockIndex) {
71     PostingListIdentifier next_posting_list_id(
72         block_index, /*posting_list_index=*/0,
73         preexisting_posting_list_->block.posting_list_index_bits());
74     ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
75                            storage_->GetPostingList(next_posting_list_id));
76     preexisting_posting_list_ =
77         std::make_unique<PostingListHolder>(std::move(holder));
78   } else {
79     has_reached_posting_list_chain_end_ = true;
80     preexisting_posting_list_.reset();
81   }
82   return batch;
83 }
84 
PrependHit(const Hit & hit)85 libtextclassifier3::Status PostingListAccessor::PrependHit(const Hit &hit) {
86   PostingListUsed &active_pl = (preexisting_posting_list_ != nullptr)
87                                    ? preexisting_posting_list_->posting_list
88                                    : posting_list_buffer_;
89   libtextclassifier3::Status status = active_pl.PrependHit(hit);
90   if (!absl_ports::IsResourceExhausted(status)) {
91     return status;
92   }
93   // There is no more room to add hits to this current posting list! Therefore,
94   // we need to either move those hits to a larger posting list or flush this
95   // posting list and create another max-sized posting list in the chain.
96   if (preexisting_posting_list_ != nullptr) {
97     FlushPreexistingPostingList();
98   } else {
99     ICING_RETURN_IF_ERROR(FlushInMemoryPostingList());
100   }
101 
102   // Re-add hit. Should always fit since we just cleared posting_list_buffer_.
103   // It's fine to explicitly reference posting_list_buffer_ here because there's
104   // no way of reaching this line while preexisting_posting_list_ is still in
105   // use.
106   return posting_list_buffer_.PrependHit(hit);
107 }
108 
FlushPreexistingPostingList()109 void PostingListAccessor::FlushPreexistingPostingList() {
110   if (preexisting_posting_list_->block.max_num_posting_lists() == 1) {
111     // If this is a max-sized posting list, then just keep track of the id for
112     // chaining. It'll be flushed to disk when preexisting_posting_list_ is
113     // destructed.
114     prev_block_identifier_ = preexisting_posting_list_->id;
115   } else {
116     // If this is NOT a max-sized posting list, then our hits have outgrown this
117     // particular posting list. Move the hits into the in-memory posting list
118     // and free this posting list.
119     //
120     // Move will always succeed since posting_list_buffer_ is max_pl_bytes.
121     posting_list_buffer_.MoveFrom(&preexisting_posting_list_->posting_list);
122 
123     // Now that all the contents of this posting list have been copied, there's
124     // no more use for it. Make it available to be used for another posting
125     // list.
126     storage_->FreePostingList(std::move(*preexisting_posting_list_));
127   }
128   preexisting_posting_list_.reset();
129 }
130 
FlushInMemoryPostingList()131 libtextclassifier3::Status PostingListAccessor::FlushInMemoryPostingList() {
132   // We exceeded max_pl_bytes(). Need to flush posting_list_buffer_ and update
133   // the chain.
134   uint32_t max_posting_list_bytes =
135       IndexBlock::CalculateMaxPostingListBytes(storage_->block_size());
136   ICING_ASSIGN_OR_RETURN(PostingListHolder holder,
137                          storage_->AllocatePostingList(max_posting_list_bytes));
138   holder.block.set_next_block_index(prev_block_identifier_.block_index());
139   prev_block_identifier_ = holder.id;
140   return holder.posting_list.MoveFrom(&posting_list_buffer_);
141 }
142 
Finalize(PostingListAccessor accessor)143 PostingListAccessor::FinalizeResult PostingListAccessor::Finalize(
144     PostingListAccessor accessor) {
145   if (accessor.preexisting_posting_list_ != nullptr) {
146     // Our hits are already in an existing posting list. Nothing else to do, but
147     // return its id.
148     FinalizeResult result = {libtextclassifier3::Status::OK,
149                              accessor.preexisting_posting_list_->id};
150     return result;
151   }
152   if (accessor.posting_list_buffer_.BytesUsed() <= 0) {
153     FinalizeResult result = {absl_ports::InvalidArgumentError(
154                                  "Can't finalize an empty PostingListAccessor. "
155                                  "There's nothing to Finalize!"),
156                              PostingListIdentifier::kInvalid};
157     return result;
158   }
159   uint32_t posting_list_bytes =
160       accessor.posting_list_buffer_.MinPostingListSizeToFit();
161   if (accessor.prev_block_identifier_.is_valid()) {
162     posting_list_bytes = IndexBlock::CalculateMaxPostingListBytes(
163         accessor.storage_->block_size());
164   }
165   auto holder_or = accessor.storage_->AllocatePostingList(posting_list_bytes);
166   if (!holder_or.ok()) {
167     FinalizeResult result = {holder_or.status(),
168                              accessor.prev_block_identifier_};
169     return result;
170   }
171   PostingListHolder holder = std::move(holder_or).ValueOrDie();
172   if (accessor.prev_block_identifier_.is_valid()) {
173     holder.block.set_next_block_index(
174         accessor.prev_block_identifier_.block_index());
175   }
176 
177   // Move to allocated area. This should never actually return an error. We know
178   // that editor.posting_list() is valid because it wouldn't have successfully
179   // returned by AllocatePostingList if it wasn't. We know posting_list_buffer_
180   // is valid because we created it in-memory. And finally, we know that the
181   // hits from posting_list_buffer_ will fit in editor.posting_list() because we
182   // requested it be at at least posting_list_bytes large.
183   auto status = holder.posting_list.MoveFrom(&accessor.posting_list_buffer_);
184   if (!status.ok()) {
185     FinalizeResult result = {std::move(status),
186                              accessor.prev_block_identifier_};
187     return result;
188   }
189   FinalizeResult result = {libtextclassifier3::Status::OK, holder.id};
190   return result;
191 }
192 
193 }  // namespace lib
194 }  // namespace icing
195