1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_INDEX_POSTING_LIST_ACCESSOR_H_
16 #define ICING_INDEX_POSTING_LIST_ACCESSOR_H_
17 
18 #include <memory>
19 
20 #include "icing/text_classifier/lib3/utils/base/status.h"
21 #include "icing/text_classifier/lib3/utils/base/statusor.h"
22 #include "icing/index/hit/hit.h"
23 #include "icing/index/main/flash-index-storage.h"
24 #include "icing/index/main/posting-list-identifier.h"
25 #include "icing/index/main/posting-list-used.h"
26 
27 namespace icing {
28 namespace lib {
29 
30 // This class serves to:
31 //  1. Expose PostingListUseds to clients of FlashIndexStorage
32 //  2. Ensure the corresponding instance of IndexBlock has the same lifecycle as
33 //     the instance of PostingListUsed that the client has access to, while
34 //     not exposing IndexBlock's api surface.
35 //  3. Ensure that PostingListUseds can only be freed by calling methods which
36 //     will also properly maintain the FlashIndexStorage free list and prevent
37 //     callers from modifying the Posting List after freeing.
38 
39 // This class is used to provide a simple abstraction for adding hits to posting
40 // lists. PostingListAccessor handles 1) selection of properly-sized posting
41 // lists for the accumulated hits during Finalize() and 2) chaining of max-sized
42 // posting lists.
43 class PostingListAccessor {
44  public:
45   // Creates an empty PostingListAccessor.
46   //
47   // RETURNS:
48   //   - On success, a valid instance of PostingListAccessor
49   //   - INVALID_ARGUMENT error if storage has an invalid block_size.
50   static libtextclassifier3::StatusOr<PostingListAccessor> Create(
51       FlashIndexStorage* storage);
52 
53   // Create a PostingListAccessor with an existing posting list identified by
54   // existing_posting_list_id.
55   //
56   // The PostingListAccessor will add hits to this posting list until it is
57   // necessary either to 1) chain the posting list (if it is max-sized) or 2)
58   // move its hits to a larger posting list.
59   //
60   // RETURNS:
61   //   - On success, a valid instance of PostingListAccessor
62   //   - INVALID_ARGUMENT if storage has an invalid block_size.
63   static libtextclassifier3::StatusOr<PostingListAccessor> CreateFromExisting(
64       FlashIndexStorage* storage,
65       PostingListIdentifier existing_posting_list_id);
66 
67   // Retrieve the next batch of hits for the posting list chain
68   //
69   // RETURNS:
70   //   - On success, a vector of hits in the posting list chain
71   //   - INTERNAL if called on an instance of PostingListAccessor that was
72   //     created via PostingListAccessor::Create, if unable to read the next
73   //     posting list in the chain or if the posting list has been corrupted
74   //     somehow.
75   libtextclassifier3::StatusOr<std::vector<Hit>> GetNextHitsBatch();
76 
77   // Prepend one hit. This may result in flushing the posting list to disk (if
78   // the PostingListAccessor holds a max-sized posting list that is full) or
79   // freeing a pre-existing posting list if it is too small to fit all hits
80   // necessary.
81   //
82   // RETURNS:
83   //   - OK, on success
84   //   - INVALID_ARGUMENT if !hit.is_valid() or if hit is not less than the
85   //   previously added hit.
86   //   - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a new
87   //   posting list.
88   libtextclassifier3::Status PrependHit(const Hit& hit);
89 
90   struct FinalizeResult {
91     //   - OK on success
92     //   - INVALID_ARGUMENT if there was no pre-existing posting list and no
93     //     hits were added
94     //   - RESOURCE_EXHAUSTED error if unable to grow the index to allocate a
95     //     new posting list.
96     libtextclassifier3::Status status;
97     // Id of the posting list chain that was finalized. Guaranteed to be valid
98     // if status is OK. May be valid if status is non-OK, but previous blocks
99     // were written.
100     PostingListIdentifier id;
101   };
102   // Write all accumulated hits to storage.
103   //
104   // If accessor points to a posting list chain with multiple posting lists in
105   // the chain and unable to write the last posting list in the chain, Finalize
106   // will return the error and also populate id with the id of the
107   // second-to-last posting list.
108   static FinalizeResult Finalize(PostingListAccessor accessor);
109 
110  private:
PostingListAccessor(FlashIndexStorage * storage,std::unique_ptr<uint8_t[]> posting_list_buffer_array,PostingListUsed posting_list_buffer)111   explicit PostingListAccessor(
112       FlashIndexStorage* storage,
113       std::unique_ptr<uint8_t[]> posting_list_buffer_array,
114       PostingListUsed posting_list_buffer)
115       : storage_(storage),
116         prev_block_identifier_(PostingListIdentifier::kInvalid),
117         posting_list_buffer_array_(std::move(posting_list_buffer_array)),
118         posting_list_buffer_(std::move(posting_list_buffer)),
119         has_reached_posting_list_chain_end_(false) {}
120 
121   // Flushes preexisting_posting_list_ to disk if it's a max-sized posting list
122   // and populates prev_block_identifier.
123   // If it's not a max-sized posting list, moves the contents of
124   // preexisting_posting_list_ to posting_list_buffer_ and frees
125   // preexisting_posting_list_.
126   // Sets preexisting_posting_list_ to nullptr.
127   void FlushPreexistingPostingList();
128 
129   // Flushes posting_list_buffer_ to a max-sized posting list on disk, setting
130   // its next pointer to prev_block_identifier_ and updating
131   // prev_block_identifier_ to point to the just-written posting list.
132   libtextclassifier3::Status FlushInMemoryPostingList();
133 
134   // Frees all posting lists in the posting list chain starting at
135   // prev_block_identifier_.
136   libtextclassifier3::Status FreePostingListChain();
137 
138   FlashIndexStorage* storage_;  // Does not own.
139 
140   // The PostingListIdentifier of the first max-sized posting list in the
141   // posting list chain or PostingListIdentifier::kInvalid if there is no
142   // posting list chain.
143   PostingListIdentifier prev_block_identifier_;
144 
145   // An editor to an existing posting list on disk. If available (non-NULL),
146   // we'll try to add all hits to this posting list. Once this posting list
147   // fills up, we'll either 1) chain it (if a max-sized posting list) and put
148   // future hits in posting_list_buffer_ or 2) copy all of its hits into
149   // posting_list_buffer_ and free this pl (if not a max-sized posting list).
150   // TODO(tjbarron) provide a benchmark to demonstrate the effects that re-using
151   // existing posting lists has on latency.
152   std::unique_ptr<PostingListHolder> preexisting_posting_list_;
153 
154   // In-memory posting list used to buffer hits before writing them to the
155   // smallest on-disk posting list that will fit them.
156   // posting_list_buffer_array_ owns the memory region that posting_list_buffer_
157   // interprets. Therefore, posting_list_buffer_array_ must have the same
158   // lifecycle as posting_list_buffer_.
159   std::unique_ptr<uint8_t[]> posting_list_buffer_array_;
160   PostingListUsed posting_list_buffer_;
161 
162   bool has_reached_posting_list_chain_end_;
163 };
164 
165 }  // namespace lib
166 }  // namespace icing
167 
168 #endif  // ICING_INDEX_POSTING_LIST_ACCESSOR_H_
169