1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ICING_STORE_KEY_MAPPER_H_
16 #define ICING_STORE_KEY_MAPPER_H_
17
18 #include <cstdint>
19 #include <cstring>
20 #include <memory>
21 #include <string>
22 #include <string_view>
23 #include <type_traits>
24
25 #include "icing/text_classifier/lib3/utils/base/status.h"
26 #include "icing/text_classifier/lib3/utils/base/statusor.h"
27 #include "icing/absl_ports/canonical_errors.h"
28 #include "icing/absl_ports/str_cat.h"
29 #include "icing/file/filesystem.h"
30 #include "icing/legacy/index/icing-dynamic-trie.h"
31 #include "icing/legacy/index/icing-filesystem.h"
32 #include "icing/util/crc32.h"
33 #include "icing/util/status-macros.h"
34
35 namespace icing {
36 namespace lib {
37
38 // File-backed mapping between the string key and a trivially copyable value
39 // type.
40 //
41 // KeyMapper is thread-compatible
42 template <typename T>
43 class KeyMapper {
44 public:
45 // Returns an initialized instance of KeyMapper that can immediately handle
46 // read/write operations.
47 // Returns any encountered IO errors.
48 //
49 // base_dir : Base directory used to save all the files required to persist
50 // KeyMapper. If this base_dir was previously used to create a
51 // KeyMapper, then this existing data would be loaded. Otherwise,
52 // an empty KeyMapper would be created.
53 // maximum_size_bytes : The maximum allowable size of the key mapper storage.
54 static libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<T>>> Create(
55 const Filesystem& filesystem, std::string_view base_dir,
56 int maximum_size_bytes);
57
58 // Deletes all the files associated with the KeyMapper. Returns success or any
59 // encountered IO errors
60 //
61 // base_dir : Base directory used to save all the files required to persist
62 // KeyMapper. Should be the same as passed into Create().
63 static libtextclassifier3::Status Delete(const Filesystem& filesystem,
64 std::string_view base_dir);
65
66 ~KeyMapper() = default;
67
68 // Inserts/Updates value for key.
69 // Returns any encountered IO errors.
70 //
71 // NOTE: Put() doesn't automatically flush changes to disk and relies on
72 // either explicit calls to PersistToDisk() or a clean shutdown of the class.
73 libtextclassifier3::Status Put(std::string_view key, T value);
74
75 // Finds the current value for key and returns it. If key is not present, it
76 // is inserted with next_value and next_value is returned.
77 //
78 // Returns any IO errors that may occur during Put.
79 libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key, T next_value);
80
81 // Returns the value corresponding to the key.
82 //
83 // Returns NOT_FOUND error if the key was missing.
84 // Returns any encountered IO errors.
85 libtextclassifier3::StatusOr<T> Get(std::string_view key) const;
86
87 // Deletes data related to the given key. Returns true on success.
88 bool Delete(std::string_view key);
89
90 // Returns a map of values to keys. Empty map if the mapper is empty.
91 std::unordered_map<T, std::string> GetValuesToKeys() const;
92
93 // Count of unique keys stored in the KeyMapper.
num_keys()94 int32_t num_keys() const { return trie_.size(); }
95
96 // Syncs all the changes made to the KeyMapper to disk.
97 // Returns any encountered IO errors.
98 //
99 // NOTE: To control disk-churn, Put() doesn't automatically persist every
100 // change to disk. The caller should explicitly call PersistToDisk() to make
101 // sure that the data is durable.
102 //
103 // Returns:
104 // OK on success
105 // INTERNAL on I/O error
106 libtextclassifier3::Status PersistToDisk();
107
108 // Calculates and returns the disk usage in bytes. Rounds up to the nearest
109 // block size.
110 //
111 // Returns:
112 // Disk usage on success
113 // INTERNAL_ERROR on IO error
114 libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
115
116 // Returns the size of the elements held in the key mapper. This excludes the
117 // size of any internal metadata of the key mapper, e.g. the key mapper's
118 // header.
119 //
120 // Returns:
121 // File size on success
122 // INTERNAL_ERROR on IO error
123 libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
124
125 // Computes and returns the checksum of the header and contents.
126 Crc32 ComputeChecksum();
127
128 private:
129 static constexpr char kKeyMapperDir[] = "key_mapper_dir";
130 static constexpr char kKeyMapperPrefix[] = "key_mapper";
131
132 // Use KeyMapper::Create() to instantiate.
133 explicit KeyMapper(std::string_view key_mapper_dir);
134
135 // Load any existing KeyMapper data from disk, or creates a new instance
136 // of KeyMapper on disk and gets ready to process read/write operations.
137 //
138 // Returns any encountered IO errors.
139 libtextclassifier3::Status Initialize(int maximum_size_bytes);
140
141 const std::string file_prefix_;
142
143 // TODO(adorokhine) Filesystem is a forked class that's available both in
144 // icing and icing namespaces. We will need icing::Filesystem in order
145 // to use IcingDynamicTrie. Filesystem class should be fully refactored
146 // to have a single definition across both namespaces. Such a class should
147 // use icing (and general google3) coding conventions and behave like
148 // a proper C++ class.
149 const IcingFilesystem icing_filesystem_;
150 IcingDynamicTrie trie_;
151
152 static_assert(std::is_trivially_copyable<T>::value,
153 "T must be trivially copyable");
154 };
155
156 template <typename T>
157 libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<T>>>
Create(const Filesystem & filesystem,std::string_view base_dir,int maximum_size_bytes)158 KeyMapper<T>::Create(const Filesystem& filesystem, std::string_view base_dir,
159 int maximum_size_bytes) {
160 // We create a subdirectory since the trie creates and stores multiple files.
161 // This makes it easier to isolate the trie files away from other files that
162 // could potentially be in the same base_dir, and makes it easier to delete.
163 const std::string key_mapper_dir =
164 absl_ports::StrCat(base_dir, "/", kKeyMapperDir);
165 if (!filesystem.CreateDirectoryRecursively(key_mapper_dir.c_str())) {
166 return absl_ports::InternalError(absl_ports::StrCat(
167 "Failed to create KeyMapper directory: ", key_mapper_dir));
168 }
169 auto mapper = std::unique_ptr<KeyMapper<T>>(new KeyMapper<T>(key_mapper_dir));
170 ICING_RETURN_IF_ERROR(mapper->Initialize(maximum_size_bytes));
171 return mapper;
172 }
173
174 template <typename T>
Delete(const Filesystem & filesystem,std::string_view base_dir)175 libtextclassifier3::Status KeyMapper<T>::Delete(const Filesystem& filesystem,
176 std::string_view base_dir) {
177 std::string key_mapper_dir = absl_ports::StrCat(base_dir, "/", kKeyMapperDir);
178 if (!filesystem.DeleteDirectoryRecursively(key_mapper_dir.c_str())) {
179 return absl_ports::InternalError(absl_ports::StrCat(
180 "Failed to delete KeyMapper directory: ", key_mapper_dir));
181 }
182 return libtextclassifier3::Status::OK;
183 }
184
185 template <typename T>
KeyMapper(std::string_view key_mapper_dir)186 KeyMapper<T>::KeyMapper(std::string_view key_mapper_dir)
187 : file_prefix_(absl_ports::StrCat(key_mapper_dir, "/", kKeyMapperPrefix)),
188 trie_(file_prefix_,
189 IcingDynamicTrie::RuntimeOptions().set_storage_policy(
190 IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc),
191 &icing_filesystem_) {}
192
193 template <typename T>
Initialize(int maximum_size_bytes)194 libtextclassifier3::Status KeyMapper<T>::Initialize(int maximum_size_bytes) {
195 IcingDynamicTrie::Options options;
196 // Divide the max space between the three internal arrays: nodes, nexts and
197 // suffixes. MaxNodes and MaxNexts are in units of their own data structures.
198 // MaxSuffixesSize is in units of bytes.
199 options.max_nodes = maximum_size_bytes / (3 * sizeof(IcingDynamicTrie::Node));
200 options.max_nexts = options.max_nodes;
201 options.max_suffixes_size =
202 sizeof(IcingDynamicTrie::Node) * options.max_nodes;
203 options.value_size = sizeof(T);
204
205 if (!trie_.CreateIfNotExist(options)) {
206 return absl_ports::InternalError(
207 absl_ports::StrCat("Failed to create KeyMapper file: ", file_prefix_));
208 }
209 if (!trie_.Init()) {
210 return absl_ports::InternalError(
211 absl_ports::StrCat("Failed to init KeyMapper file: ", file_prefix_));
212 }
213 return libtextclassifier3::Status::OK;
214 }
215
216 template <typename T>
GetOrPut(std::string_view key,T next_value)217 libtextclassifier3::StatusOr<T> KeyMapper<T>::GetOrPut(std::string_view key,
218 T next_value) {
219 std::string string_key(key);
220 uint32_t value_index;
221 if (!trie_.Insert(string_key.c_str(), &next_value, &value_index,
222 /*replace=*/false)) {
223 return absl_ports::InternalError(absl_ports::StrCat(
224 "Unable to insert key ", key, " into KeyMapper ", file_prefix_, "."));
225 }
226 // This memory address could be unaligned since we're just grabbing the value
227 // from somewhere in the trie's suffix array. The suffix array is filled with
228 // chars, so the address might not be aligned to T values.
229 const T* unaligned_value =
230 static_cast<const T*>(trie_.GetValueAtIndex(value_index));
231
232 // memcpy the value to ensure that the returned value here is in a T-aligned
233 // address
234 T aligned_value;
235 memcpy(&aligned_value, unaligned_value, sizeof(T));
236 return aligned_value;
237 }
238
239 template <typename T>
Put(std::string_view key,T value)240 libtextclassifier3::Status KeyMapper<T>::Put(std::string_view key, T value) {
241 std::string string_key(key);
242 if (!trie_.Insert(string_key.c_str(), &value)) {
243 return absl_ports::InternalError(absl_ports::StrCat(
244 "Unable to insert key ", key, " into KeyMapper ", file_prefix_, "."));
245 }
246 return libtextclassifier3::Status::OK;
247 }
248
249 template <typename T>
Get(std::string_view key)250 libtextclassifier3::StatusOr<T> KeyMapper<T>::Get(std::string_view key) const {
251 std::string string_key(key);
252 T value;
253 if (!trie_.Find(string_key.c_str(), &value)) {
254 return absl_ports::NotFoundError(absl_ports::StrCat(
255 "Key not found ", key, " in KeyMapper ", file_prefix_, "."));
256 }
257 return value;
258 }
259
260 template <typename T>
Delete(std::string_view key)261 bool KeyMapper<T>::Delete(std::string_view key) {
262 return trie_.Delete(key);
263 }
264
265 template <typename T>
GetValuesToKeys()266 std::unordered_map<T, std::string> KeyMapper<T>::GetValuesToKeys() const {
267 std::unordered_map<T, std::string> values_to_keys;
268 for (IcingDynamicTrie::Iterator itr(trie_, /*prefix=*/""); itr.IsValid();
269 itr.Advance()) {
270 if (itr.IsValid()) {
271 T value;
272 memcpy(&value, itr.GetValue(), sizeof(T));
273 values_to_keys.insert({value, itr.GetKey()});
274 }
275 }
276
277 return values_to_keys;
278 }
279
280 template <typename T>
PersistToDisk()281 libtextclassifier3::Status KeyMapper<T>::PersistToDisk() {
282 if (!trie_.Sync()) {
283 return absl_ports::InternalError(
284 absl_ports::StrCat("Failed to sync KeyMapper file: ", file_prefix_));
285 }
286
287 return libtextclassifier3::Status::OK;
288 }
289
290 template <typename T>
GetDiskUsage()291 libtextclassifier3::StatusOr<int64_t> KeyMapper<T>::GetDiskUsage() const {
292 int64_t size = trie_.GetDiskUsage();
293 if (size == IcingFilesystem::kBadFileSize || size < 0) {
294 return absl_ports::InternalError("Failed to get disk usage of key mapper");
295 }
296 return size;
297 }
298
299 template <typename T>
GetElementsSize()300 libtextclassifier3::StatusOr<int64_t> KeyMapper<T>::GetElementsSize() const {
301 int64_t size = trie_.GetElementsSize();
302 if (size == IcingFilesystem::kBadFileSize || size < 0) {
303 return absl_ports::InternalError(
304 "Failed to get disk usage of elements in the key mapper");
305 }
306 return size;
307 }
308
309 template <typename T>
ComputeChecksum()310 Crc32 KeyMapper<T>::ComputeChecksum() {
311 return Crc32(trie_.UpdateCrc());
312 }
313
314 } // namespace lib
315 } // namespace icing
316
317 #endif // ICING_STORE_KEY_MAPPER_H_
318