1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_STORE_KEY_MAPPER_H_
16 #define ICING_STORE_KEY_MAPPER_H_
17 
18 #include <cstdint>
19 #include <cstring>
20 #include <memory>
21 #include <string>
22 #include <string_view>
23 #include <type_traits>
24 
25 #include "icing/text_classifier/lib3/utils/base/status.h"
26 #include "icing/text_classifier/lib3/utils/base/statusor.h"
27 #include "icing/absl_ports/canonical_errors.h"
28 #include "icing/absl_ports/str_cat.h"
29 #include "icing/file/filesystem.h"
30 #include "icing/legacy/index/icing-dynamic-trie.h"
31 #include "icing/legacy/index/icing-filesystem.h"
32 #include "icing/util/crc32.h"
33 #include "icing/util/status-macros.h"
34 
35 namespace icing {
36 namespace lib {
37 
38 // File-backed mapping between the string key and a trivially copyable value
39 // type.
40 //
41 // KeyMapper is thread-compatible
42 template <typename T>
43 class KeyMapper {
44  public:
45   // Returns an initialized instance of KeyMapper that can immediately handle
46   // read/write operations.
47   // Returns any encountered IO errors.
48   //
49   // base_dir : Base directory used to save all the files required to persist
50   //            KeyMapper. If this base_dir was previously used to create a
51   //            KeyMapper, then this existing data would be loaded. Otherwise,
52   //            an empty KeyMapper would be created.
53   // maximum_size_bytes : The maximum allowable size of the key mapper storage.
54   static libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<T>>> Create(
55       const Filesystem& filesystem, std::string_view base_dir,
56       int maximum_size_bytes);
57 
58   // Deletes all the files associated with the KeyMapper. Returns success or any
59   // encountered IO errors
60   //
61   // base_dir : Base directory used to save all the files required to persist
62   //            KeyMapper. Should be the same as passed into Create().
63   static libtextclassifier3::Status Delete(const Filesystem& filesystem,
64                                            std::string_view base_dir);
65 
66   ~KeyMapper() = default;
67 
68   // Inserts/Updates value for key.
69   // Returns any encountered IO errors.
70   //
71   // NOTE: Put() doesn't automatically flush changes to disk and relies on
72   // either explicit calls to PersistToDisk() or a clean shutdown of the class.
73   libtextclassifier3::Status Put(std::string_view key, T value);
74 
75   // Finds the current value for key and returns it. If key is not present, it
76   // is inserted with next_value and next_value is returned.
77   //
78   // Returns any IO errors that may occur during Put.
79   libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key, T next_value);
80 
81   // Returns the value corresponding to the key.
82   //
83   // Returns NOT_FOUND error if the key was missing.
84   // Returns any encountered IO errors.
85   libtextclassifier3::StatusOr<T> Get(std::string_view key) const;
86 
87   // Deletes data related to the given key. Returns true on success.
88   bool Delete(std::string_view key);
89 
90   // Returns a map of values to keys. Empty map if the mapper is empty.
91   std::unordered_map<T, std::string> GetValuesToKeys() const;
92 
93   // Count of unique keys stored in the KeyMapper.
num_keys()94   int32_t num_keys() const { return trie_.size(); }
95 
96   // Syncs all the changes made to the KeyMapper to disk.
97   // Returns any encountered IO errors.
98   //
99   // NOTE: To control disk-churn, Put() doesn't automatically persist every
100   // change to disk. The caller should explicitly call PersistToDisk() to make
101   // sure that the data is durable.
102   //
103   // Returns:
104   //   OK on success
105   //   INTERNAL on I/O error
106   libtextclassifier3::Status PersistToDisk();
107 
108   // Calculates and returns the disk usage in bytes. Rounds up to the nearest
109   // block size.
110   //
111   // Returns:
112   //   Disk usage on success
113   //   INTERNAL_ERROR on IO error
114   libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
115 
116   // Returns the size of the elements held in the key mapper. This excludes the
117   // size of any internal metadata of the key mapper, e.g. the key mapper's
118   // header.
119   //
120   // Returns:
121   //   File size on success
122   //   INTERNAL_ERROR on IO error
123   libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
124 
125   // Computes and returns the checksum of the header and contents.
126   Crc32 ComputeChecksum();
127 
128  private:
129   static constexpr char kKeyMapperDir[] = "key_mapper_dir";
130   static constexpr char kKeyMapperPrefix[] = "key_mapper";
131 
132   // Use KeyMapper::Create() to instantiate.
133   explicit KeyMapper(std::string_view key_mapper_dir);
134 
135   // Load any existing KeyMapper data from disk, or creates a new instance
136   // of KeyMapper on disk and gets ready to process read/write operations.
137   //
138   // Returns any encountered IO errors.
139   libtextclassifier3::Status Initialize(int maximum_size_bytes);
140 
141   const std::string file_prefix_;
142 
143   // TODO(adorokhine) Filesystem is a forked class that's available both in
144   // icing and icing namespaces. We will need icing::Filesystem in order
145   // to use IcingDynamicTrie. Filesystem class should be fully refactored
146   // to have a single definition across both namespaces. Such a class should
147   // use icing (and general google3) coding conventions and behave like
148   // a proper C++ class.
149   const IcingFilesystem icing_filesystem_;
150   IcingDynamicTrie trie_;
151 
152   static_assert(std::is_trivially_copyable<T>::value,
153                 "T must be trivially copyable");
154 };
155 
156 template <typename T>
157 libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<T>>>
Create(const Filesystem & filesystem,std::string_view base_dir,int maximum_size_bytes)158 KeyMapper<T>::Create(const Filesystem& filesystem, std::string_view base_dir,
159                      int maximum_size_bytes) {
160   // We create a subdirectory since the trie creates and stores multiple files.
161   // This makes it easier to isolate the trie files away from other files that
162   // could potentially be in the same base_dir, and makes it easier to delete.
163   const std::string key_mapper_dir =
164       absl_ports::StrCat(base_dir, "/", kKeyMapperDir);
165   if (!filesystem.CreateDirectoryRecursively(key_mapper_dir.c_str())) {
166     return absl_ports::InternalError(absl_ports::StrCat(
167         "Failed to create KeyMapper directory: ", key_mapper_dir));
168   }
169   auto mapper = std::unique_ptr<KeyMapper<T>>(new KeyMapper<T>(key_mapper_dir));
170   ICING_RETURN_IF_ERROR(mapper->Initialize(maximum_size_bytes));
171   return mapper;
172 }
173 
174 template <typename T>
Delete(const Filesystem & filesystem,std::string_view base_dir)175 libtextclassifier3::Status KeyMapper<T>::Delete(const Filesystem& filesystem,
176                                                 std::string_view base_dir) {
177   std::string key_mapper_dir = absl_ports::StrCat(base_dir, "/", kKeyMapperDir);
178   if (!filesystem.DeleteDirectoryRecursively(key_mapper_dir.c_str())) {
179     return absl_ports::InternalError(absl_ports::StrCat(
180         "Failed to delete KeyMapper directory: ", key_mapper_dir));
181   }
182   return libtextclassifier3::Status::OK;
183 }
184 
185 template <typename T>
KeyMapper(std::string_view key_mapper_dir)186 KeyMapper<T>::KeyMapper(std::string_view key_mapper_dir)
187     : file_prefix_(absl_ports::StrCat(key_mapper_dir, "/", kKeyMapperPrefix)),
188       trie_(file_prefix_,
189             IcingDynamicTrie::RuntimeOptions().set_storage_policy(
190                 IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc),
191             &icing_filesystem_) {}
192 
193 template <typename T>
Initialize(int maximum_size_bytes)194 libtextclassifier3::Status KeyMapper<T>::Initialize(int maximum_size_bytes) {
195   IcingDynamicTrie::Options options;
196   // Divide the max space between the three internal arrays: nodes, nexts and
197   // suffixes. MaxNodes and MaxNexts are in units of their own data structures.
198   // MaxSuffixesSize is in units of bytes.
199   options.max_nodes = maximum_size_bytes / (3 * sizeof(IcingDynamicTrie::Node));
200   options.max_nexts = options.max_nodes;
201   options.max_suffixes_size =
202       sizeof(IcingDynamicTrie::Node) * options.max_nodes;
203   options.value_size = sizeof(T);
204 
205   if (!trie_.CreateIfNotExist(options)) {
206     return absl_ports::InternalError(
207         absl_ports::StrCat("Failed to create KeyMapper file: ", file_prefix_));
208   }
209   if (!trie_.Init()) {
210     return absl_ports::InternalError(
211         absl_ports::StrCat("Failed to init KeyMapper file: ", file_prefix_));
212   }
213   return libtextclassifier3::Status::OK;
214 }
215 
216 template <typename T>
GetOrPut(std::string_view key,T next_value)217 libtextclassifier3::StatusOr<T> KeyMapper<T>::GetOrPut(std::string_view key,
218                                                        T next_value) {
219   std::string string_key(key);
220   uint32_t value_index;
221   if (!trie_.Insert(string_key.c_str(), &next_value, &value_index,
222                     /*replace=*/false)) {
223     return absl_ports::InternalError(absl_ports::StrCat(
224         "Unable to insert key ", key, " into KeyMapper ", file_prefix_, "."));
225   }
226   // This memory address could be unaligned since we're just grabbing the value
227   // from somewhere in the trie's suffix array. The suffix array is filled with
228   // chars, so the address might not be aligned to T values.
229   const T* unaligned_value =
230       static_cast<const T*>(trie_.GetValueAtIndex(value_index));
231 
232   // memcpy the value to ensure that the returned value here is in a T-aligned
233   // address
234   T aligned_value;
235   memcpy(&aligned_value, unaligned_value, sizeof(T));
236   return aligned_value;
237 }
238 
239 template <typename T>
Put(std::string_view key,T value)240 libtextclassifier3::Status KeyMapper<T>::Put(std::string_view key, T value) {
241   std::string string_key(key);
242   if (!trie_.Insert(string_key.c_str(), &value)) {
243     return absl_ports::InternalError(absl_ports::StrCat(
244         "Unable to insert key ", key, " into KeyMapper ", file_prefix_, "."));
245   }
246   return libtextclassifier3::Status::OK;
247 }
248 
249 template <typename T>
Get(std::string_view key)250 libtextclassifier3::StatusOr<T> KeyMapper<T>::Get(std::string_view key) const {
251   std::string string_key(key);
252   T value;
253   if (!trie_.Find(string_key.c_str(), &value)) {
254     return absl_ports::NotFoundError(absl_ports::StrCat(
255         "Key not found ", key, " in KeyMapper ", file_prefix_, "."));
256   }
257   return value;
258 }
259 
260 template <typename T>
Delete(std::string_view key)261 bool KeyMapper<T>::Delete(std::string_view key) {
262   return trie_.Delete(key);
263 }
264 
265 template <typename T>
GetValuesToKeys()266 std::unordered_map<T, std::string> KeyMapper<T>::GetValuesToKeys() const {
267   std::unordered_map<T, std::string> values_to_keys;
268   for (IcingDynamicTrie::Iterator itr(trie_, /*prefix=*/""); itr.IsValid();
269        itr.Advance()) {
270     if (itr.IsValid()) {
271       T value;
272       memcpy(&value, itr.GetValue(), sizeof(T));
273       values_to_keys.insert({value, itr.GetKey()});
274     }
275   }
276 
277   return values_to_keys;
278 }
279 
280 template <typename T>
PersistToDisk()281 libtextclassifier3::Status KeyMapper<T>::PersistToDisk() {
282   if (!trie_.Sync()) {
283     return absl_ports::InternalError(
284         absl_ports::StrCat("Failed to sync KeyMapper file: ", file_prefix_));
285   }
286 
287   return libtextclassifier3::Status::OK;
288 }
289 
290 template <typename T>
GetDiskUsage()291 libtextclassifier3::StatusOr<int64_t> KeyMapper<T>::GetDiskUsage() const {
292   int64_t size = trie_.GetDiskUsage();
293   if (size == IcingFilesystem::kBadFileSize || size < 0) {
294     return absl_ports::InternalError("Failed to get disk usage of key mapper");
295   }
296   return size;
297 }
298 
299 template <typename T>
GetElementsSize()300 libtextclassifier3::StatusOr<int64_t> KeyMapper<T>::GetElementsSize() const {
301   int64_t size = trie_.GetElementsSize();
302   if (size == IcingFilesystem::kBadFileSize || size < 0) {
303     return absl_ports::InternalError(
304         "Failed to get disk usage of elements in the key mapper");
305   }
306   return size;
307 }
308 
309 template <typename T>
ComputeChecksum()310 Crc32 KeyMapper<T>::ComputeChecksum() {
311   return Crc32(trie_.UpdateCrc());
312 }
313 
314 }  // namespace lib
315 }  // namespace icing
316 
317 #endif  // ICING_STORE_KEY_MAPPER_H_
318