1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/file/file-backed-bitmap.h"
16
17 #include <cstdint>
18
19 #include "icing/text_classifier/lib3/utils/base/status.h"
20 #include "icing/text_classifier/lib3/utils/base/statusor.h"
21 #include "icing/absl_ports/canonical_errors.h"
22 #include "icing/absl_ports/str_cat.h"
23 #include "icing/file/filesystem.h"
24 #include "icing/file/memory-mapped-file.h"
25 #include "icing/legacy/core/icing-string-util.h"
26 #include "icing/util/crc32.h"
27 #include "icing/util/logging.h"
28 #include "icing/util/math-util.h"
29 #include "icing/util/status-macros.h"
30
31 namespace icing {
32 namespace lib {
33
GetBlockCapacity(int num_blocks)34 int FileBackedBitmap::GetBlockCapacity(int num_blocks) {
35 // The first block has a lower capacity due to the Header.
36 const int capacity_bytes = kBlockByteSize * num_blocks - kHeaderByteSize;
37 return capacity_bytes * 8;
38 }
39
40 libtextclassifier3::StatusOr<std::unique_ptr<FileBackedBitmap>>
Create(const Filesystem * filesystem,std::string_view file_path,MemoryMappedFile::Strategy mmap_strategy)41 FileBackedBitmap::Create(const Filesystem* filesystem,
42 std::string_view file_path,
43 MemoryMappedFile::Strategy mmap_strategy) {
44 if (mmap_strategy == MemoryMappedFile::Strategy::READ_WRITE_MANUAL_SYNC) {
45 return absl_ports::UnimplementedError(
46 "FileBackedBitmap currently doesn't support READ_WRITE_MANUAL_SYNC "
47 "mmap strategy.");
48 }
49
50 auto bitmap = std::unique_ptr<FileBackedBitmap>(
51 new FileBackedBitmap(filesystem, file_path, mmap_strategy));
52
53 // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
54 // that can support error logging.
55 libtextclassifier3::Status status = bitmap->Initialize();
56 if (!status.ok()) {
57 ICING_LOG(ERROR) << status.error_message();
58 return status;
59 }
60 return bitmap;
61 }
62
FileBackedBitmap(const Filesystem * filesystem,std::string_view file_path,MemoryMappedFile::Strategy mmap_strategy)63 FileBackedBitmap::FileBackedBitmap(const Filesystem* filesystem,
64 std::string_view file_path,
65 MemoryMappedFile::Strategy mmap_strategy)
66 : filesystem_(filesystem),
67 file_path_(file_path),
68 mmapper_(new MemoryMappedFile(*filesystem, file_path, mmap_strategy)) {}
69
~FileBackedBitmap()70 FileBackedBitmap::~FileBackedBitmap() {
71 // Only update if we have auto_sync setup, otherwise the checksum will be
72 // updated when the client calls PersistToDisk
73 if (mmapper_->strategy() ==
74 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC) {
75 // Any valid, initialized file should at least have 1 block.
76 if (mmapper_->region_size() >= kBlockByteSize &&
77 header().version == kCurrentVersion &&
78 header().state == Header::ChecksumState::kStale) {
79 if (!PersistToDisk().ok()) {
80 ICING_LOG(WARNING)
81 << "Failed to persist bitmap to disk while destructing "
82 << file_path_;
83 }
84 }
85 }
86 }
87
header() const88 const FileBackedBitmap::Header& FileBackedBitmap::header() const {
89 return reinterpret_cast<const Header&>(*mmapper_->region());
90 }
91
mutable_header()92 FileBackedBitmap::Header* FileBackedBitmap::mutable_header() {
93 return reinterpret_cast<Header*>(mmapper_->mutable_region());
94 }
95
Initialize()96 libtextclassifier3::Status FileBackedBitmap::FileBackedBitmap::Initialize() {
97 ICING_VLOG(1) << "Initialize bitmap file: " << file_path_;
98
99 const bool is_new_bitmap = !filesystem_->FileExists(file_path_.c_str());
100
101 int64_t file_size = 0;
102 if (is_new_bitmap) {
103 file_size = kBlockByteSize;
104 if (!filesystem_->Grow(file_path_.c_str(), file_size)) {
105 return absl_ports::InternalError(IcingStringUtil::StringPrintf(
106 "Unable to create a minimal bitmap; "
107 "filename: %s; target size: %lld",
108 file_path_.c_str(), static_cast<long long>(file_size)));
109 }
110
111 ICING_VLOG(1) << "Creating new bitmap in file: " << file_path_
112 << " of size: " << file_size;
113 } else {
114 file_size = filesystem_->GetFileSize(file_path_.c_str());
115 if (file_size == Filesystem::kBadFileSize) {
116 return absl_ports::InternalError(IcingStringUtil::StringPrintf(
117 "File corrupted; filename: %s; size: %lld.", file_path_.c_str(),
118 static_cast<long long>(file_size)));
119 }
120
121 ICING_VLOG(1) << "Loading bitmap from file: " << file_path_
122 << " of size: " << file_size;
123 }
124
125 // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
126 // that can support error logging.
127 libtextclassifier3::Status status = mmapper_->Remap(0, file_size);
128 if (!status.ok()) {
129 ICING_LOG(ERROR) << status.error_message();
130 return status;
131 }
132
133 if (is_new_bitmap) {
134 mutable_header()->version = kCurrentVersion;
135 mutable_header()->state = Header::ChecksumState::kStale;
136 mutable_header()->checksum = 0;
137
138 return mmapper_->PersistToDisk();
139 }
140
141 if (header().state == Header::ChecksumState::kStale) {
142 return absl_ports::InternalError(absl_ports::StrCat(
143 "File corrupted, has partially flushed data; filename: ", file_path_));
144 }
145
146 if (header().checksum != ComputeChecksum()) {
147 return absl_ports::InternalError(absl_ports::StrCat(
148 "File corrupted, checksum doesn't match; filename: ", file_path_));
149 }
150
151 if (header().version != kCurrentVersion) {
152 return UpgradeToCurrentVersion();
153 }
154
155 return libtextclassifier3::Status::OK;
156 }
157
UpgradeToCurrentVersion()158 libtextclassifier3::Status FileBackedBitmap::UpgradeToCurrentVersion() {
159 // Currently, only 1 format is supported.
160 return absl_ports::InternalError(IcingStringUtil::StringPrintf(
161 "File corrupted, mismatched version; filename: %s; %d vs %d.",
162 file_path_.c_str(), header().version, kCurrentVersion));
163 }
164
SetWord(int word_index,Word word)165 libtextclassifier3::Status FileBackedBitmap::SetWord(int word_index,
166 Word word) {
167 if (word_index >= NumBits() / kNumWordBits) {
168 ICING_LOG(ERROR) << "word_index: " << word_index
169 << ", number of words: " << NumBits() / kNumWordBits;
170 return absl_ports::InternalError("Trying to access invalid memory");
171 }
172
173 Word* bitmap_data =
174 reinterpret_cast<Word*>(mmapper_->mutable_region() + kHeaderByteSize);
175
176 bitmap_data[word_index] = word;
177
178 return libtextclassifier3::Status::OK;
179 }
180
GetWord(int word_index) const181 libtextclassifier3::StatusOr<FileBackedBitmap::Word> FileBackedBitmap::GetWord(
182 int word_index) const {
183 if (word_index >= NumBits() / kNumWordBits) {
184 ICING_LOG(ERROR) << "word_index: " << word_index
185 << ", number of words: " << NumBits() / kNumWordBits;
186 return absl_ports::InternalError("Trying to access invalid memory");
187 }
188
189 const Word* bitmap_data = reinterpret_cast<const Word*>(
190 mmapper_->mutable_region() + kHeaderByteSize);
191 return bitmap_data[word_index];
192 }
193
NumBits() const194 int FileBackedBitmap::NumBits() const {
195 return (mmapper_->region_size() - kHeaderByteSize) * 8;
196 }
197
Set(int bit_index,bool bit_value)198 libtextclassifier3::Status FileBackedBitmap::Set(int bit_index,
199 bool bit_value) {
200 if (bit_index >= NumBits()) {
201 // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
202 // that can support error logging.
203 libtextclassifier3::Status status = GrowTo(bit_index);
204 if (!status.ok()) {
205 ICING_LOG(ERROR) << status.error_message();
206 return status;
207 }
208
209 if (!bit_value) {
210 // All newly added bits are set to false.
211 return libtextclassifier3::Status::OK;
212 }
213 }
214
215 // Figure out which word needs to be modified.
216 const int word_index = bit_index / kNumWordBits;
217 const int word_mask = 1u << (bit_index % kNumWordBits);
218
219 ICING_ASSIGN_OR_RETURN(Word old_word, GetWord(word_index));
220 Word new_word = bit_value ? (old_word | word_mask) : old_word & ~word_mask;
221 if (new_word != old_word) {
222 ICING_RETURN_IF_ERROR(SetWord(word_index, new_word));
223 mutable_header()->state = Header::ChecksumState::kStale;
224 }
225
226 return libtextclassifier3::Status::OK;
227 }
228
Get(int bit_index) const229 libtextclassifier3::StatusOr<bool> FileBackedBitmap::Get(int bit_index) const {
230 if (bit_index >= NumBits()) {
231 return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
232 "Bitmap file %s is of size %d and can't read bit_index %d.",
233 file_path_.c_str(), NumBits(), bit_index));
234 }
235
236 const Word word_index = bit_index / kNumWordBits;
237 const Word word_mask = 1u << (bit_index % kNumWordBits);
238
239 ICING_ASSIGN_OR_RETURN(Word word, GetWord(word_index));
240 return word & word_mask;
241 }
242
FileSizeForBits(int num_bits)243 size_t FileBackedBitmap::FileSizeForBits(int num_bits) {
244 const int word_index = num_bits / kNumWordBits;
245 size_t new_file_size = kHeaderByteSize + (word_index + 1) * sizeof(Word);
246 return math_util::RoundUpTo(new_file_size,
247 static_cast<size_t>(kBlockByteSize));
248 }
249
GrowTo(int new_num_bits)250 libtextclassifier3::Status FileBackedBitmap::GrowTo(int new_num_bits) {
251 if (new_num_bits > kMaxNumBits) {
252 return absl_ports::ResourceExhaustedError(IcingStringUtil::StringPrintf(
253 "Bitmap file %s has a max-capacity of %d bits and cannot fit %d bits",
254 file_path_.c_str(), kMaxNumBits, new_num_bits));
255 }
256
257 const size_t new_file_size = FileSizeForBits(new_num_bits);
258 if (!filesystem_->Grow(file_path_.c_str(), new_file_size)) {
259 return absl_ports::InternalError(
260 IcingStringUtil::StringPrintf("Growing file %s to new size %zd failed",
261 file_path_.c_str(), new_file_size));
262 }
263
264 // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
265 // that can support error logging.
266 libtextclassifier3::Status status = mmapper_->Remap(0, new_file_size);
267 if (!status.ok()) {
268 ICING_LOG(ERROR) << status.error_message();
269 return status;
270 }
271
272 ICING_VLOG(1) << IcingStringUtil::StringPrintf(
273 "Grew file %s to new size %zd", file_path_.c_str(), new_file_size);
274 mutable_header()->state = Header::ChecksumState::kStale;
275 return libtextclassifier3::Status::OK;
276 }
277
TruncateTo(int new_num_bits)278 libtextclassifier3::Status FileBackedBitmap::TruncateTo(int new_num_bits) {
279 if (new_num_bits > NumBits()) {
280 return libtextclassifier3::Status::OK;
281 }
282
283 const size_t new_file_size = FileSizeForBits(new_num_bits);
284 // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
285 // that can support error logging.
286 libtextclassifier3::Status status = mmapper_->Remap(0, new_file_size);
287 if (!status.ok()) {
288 ICING_LOG(ERROR) << status.error_message();
289 return status;
290 }
291 if (!filesystem_->Truncate(file_path_.c_str(), new_file_size)) {
292 return absl_ports::InternalError(IcingStringUtil::StringPrintf(
293 "Truncating file %s to new size %zd failed", file_path_.c_str(),
294 new_file_size));
295 }
296
297 const int word_index = new_num_bits / kNumWordBits;
298 // Mask to only keep bits <= new_num_bits and clear everything else.
299 const Word word_mask = (1u << (new_num_bits % kNumWordBits)) - 1;
300
301 ICING_ASSIGN_OR_RETURN(Word old_word, GetWord(word_index));
302 Word new_word = old_word & word_mask;
303 ICING_RETURN_IF_ERROR(SetWord(word_index, new_word));
304
305 // TODO(cassiewang) It might be worth replacing this with memset().
306 const int num_words = NumBits() / kNumWordBits;
307 for (int i = word_index + 1; i < num_words; ++i) {
308 ICING_RETURN_IF_ERROR(SetWord(i, 0));
309 }
310
311 mutable_header()->state = Header::ChecksumState::kStale;
312 return libtextclassifier3::Status::OK;
313 }
314
PersistToDisk()315 libtextclassifier3::Status FileBackedBitmap::PersistToDisk() {
316 mutable_header()->checksum = ComputeChecksum();
317 mutable_header()->state = Header::ChecksumState::kFresh;
318 return mmapper_->PersistToDisk();
319 }
320
ComputeChecksum() const321 uint32_t FileBackedBitmap::ComputeChecksum() const {
322 std::string_view bitmap_bytes(mmapper_->region() + kHeaderByteSize,
323 NumBits() / 8);
324 return Crc32().Append(bitmap_bytes);
325 }
326
327 } // namespace lib
328 } // namespace icing
329