1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/file/file-backed-bitmap.h"
16 
17 #include <cstdint>
18 
19 #include "icing/text_classifier/lib3/utils/base/status.h"
20 #include "icing/text_classifier/lib3/utils/base/statusor.h"
21 #include "icing/absl_ports/canonical_errors.h"
22 #include "icing/absl_ports/str_cat.h"
23 #include "icing/file/filesystem.h"
24 #include "icing/file/memory-mapped-file.h"
25 #include "icing/legacy/core/icing-string-util.h"
26 #include "icing/util/crc32.h"
27 #include "icing/util/logging.h"
28 #include "icing/util/math-util.h"
29 #include "icing/util/status-macros.h"
30 
31 namespace icing {
32 namespace lib {
33 
GetBlockCapacity(int num_blocks)34 int FileBackedBitmap::GetBlockCapacity(int num_blocks) {
35   // The first block has a lower capacity due to the Header.
36   const int capacity_bytes = kBlockByteSize * num_blocks - kHeaderByteSize;
37   return capacity_bytes * 8;
38 }
39 
40 libtextclassifier3::StatusOr<std::unique_ptr<FileBackedBitmap>>
Create(const Filesystem * filesystem,std::string_view file_path,MemoryMappedFile::Strategy mmap_strategy)41 FileBackedBitmap::Create(const Filesystem* filesystem,
42                          std::string_view file_path,
43                          MemoryMappedFile::Strategy mmap_strategy) {
44   if (mmap_strategy == MemoryMappedFile::Strategy::READ_WRITE_MANUAL_SYNC) {
45     return absl_ports::UnimplementedError(
46         "FileBackedBitmap currently doesn't support READ_WRITE_MANUAL_SYNC "
47         "mmap strategy.");
48   }
49 
50   auto bitmap = std::unique_ptr<FileBackedBitmap>(
51       new FileBackedBitmap(filesystem, file_path, mmap_strategy));
52 
53   // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
54   // that can support error logging.
55   libtextclassifier3::Status status = bitmap->Initialize();
56   if (!status.ok()) {
57     ICING_LOG(ERROR) << status.error_message();
58     return status;
59   }
60   return bitmap;
61 }
62 
FileBackedBitmap(const Filesystem * filesystem,std::string_view file_path,MemoryMappedFile::Strategy mmap_strategy)63 FileBackedBitmap::FileBackedBitmap(const Filesystem* filesystem,
64                                    std::string_view file_path,
65                                    MemoryMappedFile::Strategy mmap_strategy)
66     : filesystem_(filesystem),
67       file_path_(file_path),
68       mmapper_(new MemoryMappedFile(*filesystem, file_path, mmap_strategy)) {}
69 
~FileBackedBitmap()70 FileBackedBitmap::~FileBackedBitmap() {
71   // Only update if we have auto_sync setup, otherwise the checksum will be
72   // updated when the client calls PersistToDisk
73   if (mmapper_->strategy() ==
74       MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC) {
75     // Any valid, initialized file should at least have 1 block.
76     if (mmapper_->region_size() >= kBlockByteSize &&
77         header().version == kCurrentVersion &&
78         header().state == Header::ChecksumState::kStale) {
79       if (!PersistToDisk().ok()) {
80         ICING_LOG(WARNING)
81             << "Failed to persist bitmap to disk while destructing "
82             << file_path_;
83       }
84     }
85   }
86 }
87 
header() const88 const FileBackedBitmap::Header& FileBackedBitmap::header() const {
89   return reinterpret_cast<const Header&>(*mmapper_->region());
90 }
91 
mutable_header()92 FileBackedBitmap::Header* FileBackedBitmap::mutable_header() {
93   return reinterpret_cast<Header*>(mmapper_->mutable_region());
94 }
95 
Initialize()96 libtextclassifier3::Status FileBackedBitmap::FileBackedBitmap::Initialize() {
97   ICING_VLOG(1) << "Initialize bitmap file: " << file_path_;
98 
99   const bool is_new_bitmap = !filesystem_->FileExists(file_path_.c_str());
100 
101   int64_t file_size = 0;
102   if (is_new_bitmap) {
103     file_size = kBlockByteSize;
104     if (!filesystem_->Grow(file_path_.c_str(), file_size)) {
105       return absl_ports::InternalError(IcingStringUtil::StringPrintf(
106           "Unable to create a minimal bitmap; "
107           "filename: %s; target size: %lld",
108           file_path_.c_str(), static_cast<long long>(file_size)));
109     }
110 
111     ICING_VLOG(1) << "Creating new bitmap in file: " << file_path_
112                   << " of size: " << file_size;
113   } else {
114     file_size = filesystem_->GetFileSize(file_path_.c_str());
115     if (file_size == Filesystem::kBadFileSize) {
116       return absl_ports::InternalError(IcingStringUtil::StringPrintf(
117           "File corrupted; filename: %s; size: %lld.", file_path_.c_str(),
118           static_cast<long long>(file_size)));
119     }
120 
121     ICING_VLOG(1) << "Loading bitmap from  file: " << file_path_
122                   << " of size: " << file_size;
123   }
124 
125   // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
126   // that can support error logging.
127   libtextclassifier3::Status status = mmapper_->Remap(0, file_size);
128   if (!status.ok()) {
129     ICING_LOG(ERROR) << status.error_message();
130     return status;
131   }
132 
133   if (is_new_bitmap) {
134     mutable_header()->version = kCurrentVersion;
135     mutable_header()->state = Header::ChecksumState::kStale;
136     mutable_header()->checksum = 0;
137 
138     return mmapper_->PersistToDisk();
139   }
140 
141   if (header().state == Header::ChecksumState::kStale) {
142     return absl_ports::InternalError(absl_ports::StrCat(
143         "File corrupted, has partially flushed data; filename: ", file_path_));
144   }
145 
146   if (header().checksum != ComputeChecksum()) {
147     return absl_ports::InternalError(absl_ports::StrCat(
148         "File corrupted, checksum doesn't match; filename: ", file_path_));
149   }
150 
151   if (header().version != kCurrentVersion) {
152     return UpgradeToCurrentVersion();
153   }
154 
155   return libtextclassifier3::Status::OK;
156 }
157 
UpgradeToCurrentVersion()158 libtextclassifier3::Status FileBackedBitmap::UpgradeToCurrentVersion() {
159   // Currently, only 1 format is supported.
160   return absl_ports::InternalError(IcingStringUtil::StringPrintf(
161       "File corrupted, mismatched version; filename: %s; %d vs %d.",
162       file_path_.c_str(), header().version, kCurrentVersion));
163 }
164 
SetWord(int word_index,Word word)165 libtextclassifier3::Status FileBackedBitmap::SetWord(int word_index,
166                                                      Word word) {
167   if (word_index >= NumBits() / kNumWordBits) {
168     ICING_LOG(ERROR) << "word_index: " << word_index
169                      << ", number of words: " << NumBits() / kNumWordBits;
170     return absl_ports::InternalError("Trying to access invalid memory");
171   }
172 
173   Word* bitmap_data =
174       reinterpret_cast<Word*>(mmapper_->mutable_region() + kHeaderByteSize);
175 
176   bitmap_data[word_index] = word;
177 
178   return libtextclassifier3::Status::OK;
179 }
180 
GetWord(int word_index) const181 libtextclassifier3::StatusOr<FileBackedBitmap::Word> FileBackedBitmap::GetWord(
182     int word_index) const {
183   if (word_index >= NumBits() / kNumWordBits) {
184     ICING_LOG(ERROR) << "word_index: " << word_index
185                      << ", number of words: " << NumBits() / kNumWordBits;
186     return absl_ports::InternalError("Trying to access invalid memory");
187   }
188 
189   const Word* bitmap_data = reinterpret_cast<const Word*>(
190       mmapper_->mutable_region() + kHeaderByteSize);
191   return bitmap_data[word_index];
192 }
193 
NumBits() const194 int FileBackedBitmap::NumBits() const {
195   return (mmapper_->region_size() - kHeaderByteSize) * 8;
196 }
197 
Set(int bit_index,bool bit_value)198 libtextclassifier3::Status FileBackedBitmap::Set(int bit_index,
199                                                  bool bit_value) {
200   if (bit_index >= NumBits()) {
201     // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
202     // that can support error logging.
203     libtextclassifier3::Status status = GrowTo(bit_index);
204     if (!status.ok()) {
205       ICING_LOG(ERROR) << status.error_message();
206       return status;
207     }
208 
209     if (!bit_value) {
210       // All newly added bits are set to false.
211       return libtextclassifier3::Status::OK;
212     }
213   }
214 
215   // Figure out which word needs to be modified.
216   const int word_index = bit_index / kNumWordBits;
217   const int word_mask = 1u << (bit_index % kNumWordBits);
218 
219   ICING_ASSIGN_OR_RETURN(Word old_word, GetWord(word_index));
220   Word new_word = bit_value ? (old_word | word_mask) : old_word & ~word_mask;
221   if (new_word != old_word) {
222     ICING_RETURN_IF_ERROR(SetWord(word_index, new_word));
223     mutable_header()->state = Header::ChecksumState::kStale;
224   }
225 
226   return libtextclassifier3::Status::OK;
227 }
228 
Get(int bit_index) const229 libtextclassifier3::StatusOr<bool> FileBackedBitmap::Get(int bit_index) const {
230   if (bit_index >= NumBits()) {
231     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
232         "Bitmap file %s is of size %d and can't read bit_index %d.",
233         file_path_.c_str(), NumBits(), bit_index));
234   }
235 
236   const Word word_index = bit_index / kNumWordBits;
237   const Word word_mask = 1u << (bit_index % kNumWordBits);
238 
239   ICING_ASSIGN_OR_RETURN(Word word, GetWord(word_index));
240   return word & word_mask;
241 }
242 
FileSizeForBits(int num_bits)243 size_t FileBackedBitmap::FileSizeForBits(int num_bits) {
244   const int word_index = num_bits / kNumWordBits;
245   size_t new_file_size = kHeaderByteSize + (word_index + 1) * sizeof(Word);
246   return math_util::RoundUpTo(new_file_size,
247                               static_cast<size_t>(kBlockByteSize));
248 }
249 
GrowTo(int new_num_bits)250 libtextclassifier3::Status FileBackedBitmap::GrowTo(int new_num_bits) {
251   if (new_num_bits > kMaxNumBits) {
252     return absl_ports::ResourceExhaustedError(IcingStringUtil::StringPrintf(
253         "Bitmap file %s has a max-capacity of %d bits and cannot fit %d bits",
254         file_path_.c_str(), kMaxNumBits, new_num_bits));
255   }
256 
257   const size_t new_file_size = FileSizeForBits(new_num_bits);
258   if (!filesystem_->Grow(file_path_.c_str(), new_file_size)) {
259     return absl_ports::InternalError(
260         IcingStringUtil::StringPrintf("Growing file %s to new size %zd failed",
261                                       file_path_.c_str(), new_file_size));
262   }
263 
264   // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
265   // that can support error logging.
266   libtextclassifier3::Status status = mmapper_->Remap(0, new_file_size);
267   if (!status.ok()) {
268     ICING_LOG(ERROR) << status.error_message();
269     return status;
270   }
271 
272   ICING_VLOG(1) << IcingStringUtil::StringPrintf(
273       "Grew file %s to new size %zd", file_path_.c_str(), new_file_size);
274   mutable_header()->state = Header::ChecksumState::kStale;
275   return libtextclassifier3::Status::OK;
276 }
277 
TruncateTo(int new_num_bits)278 libtextclassifier3::Status FileBackedBitmap::TruncateTo(int new_num_bits) {
279   if (new_num_bits > NumBits()) {
280     return libtextclassifier3::Status::OK;
281   }
282 
283   const size_t new_file_size = FileSizeForBits(new_num_bits);
284   // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
285   // that can support error logging.
286   libtextclassifier3::Status status = mmapper_->Remap(0, new_file_size);
287   if (!status.ok()) {
288     ICING_LOG(ERROR) << status.error_message();
289     return status;
290   }
291   if (!filesystem_->Truncate(file_path_.c_str(), new_file_size)) {
292     return absl_ports::InternalError(IcingStringUtil::StringPrintf(
293         "Truncating file %s to new size %zd failed", file_path_.c_str(),
294         new_file_size));
295   }
296 
297   const int word_index = new_num_bits / kNumWordBits;
298   // Mask to only keep bits <= new_num_bits and clear everything else.
299   const Word word_mask = (1u << (new_num_bits % kNumWordBits)) - 1;
300 
301   ICING_ASSIGN_OR_RETURN(Word old_word, GetWord(word_index));
302   Word new_word = old_word & word_mask;
303   ICING_RETURN_IF_ERROR(SetWord(word_index, new_word));
304 
305   // TODO(cassiewang) It might be worth replacing this with memset().
306   const int num_words = NumBits() / kNumWordBits;
307   for (int i = word_index + 1; i < num_words; ++i) {
308     ICING_RETURN_IF_ERROR(SetWord(i, 0));
309   }
310 
311   mutable_header()->state = Header::ChecksumState::kStale;
312   return libtextclassifier3::Status::OK;
313 }
314 
PersistToDisk()315 libtextclassifier3::Status FileBackedBitmap::PersistToDisk() {
316   mutable_header()->checksum = ComputeChecksum();
317   mutable_header()->state = Header::ChecksumState::kFresh;
318   return mmapper_->PersistToDisk();
319 }
320 
ComputeChecksum() const321 uint32_t FileBackedBitmap::ComputeChecksum() const {
322   std::string_view bitmap_bytes(mmapper_->region() + kHeaderByteSize,
323                                 NumBits() / 8);
324   return Crc32().Append(bitmap_bytes);
325 }
326 
327 }  // namespace lib
328 }  // namespace icing
329