1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_UTIL_CRC32_H_ 16 #define ICING_UTIL_CRC32_H_ 17 18 #include <cstdint> 19 #include <string_view> 20 21 #include "icing/text_classifier/lib3/utils/base/statusor.h" 22 23 namespace icing { 24 namespace lib { 25 26 // Efficient mechanism to incrementally compute checksum of a file and keep it 27 // updated when its content changes. Internally uses zlib based crc32() 28 // implementation. 29 // 30 // See https://www.zlib.net/manual.html#Checksum for more details. 31 class Crc32 { 32 public: 33 // Default to the checksum of an empty string, that is "0". Crc32()34 Crc32() : crc_(0) {} 35 Crc32(uint32_t init_crc)36 explicit Crc32(uint32_t init_crc) : crc_(init_crc) {} 37 38 inline bool operator==(const Crc32& other) const { 39 return crc_ == other.Get(); 40 } 41 42 // Returns the checksum of all the data that has been processed till now. 43 uint32_t Get() const; 44 45 // Incrementally update the current checksum to reflect the fact that the 46 // underlying data has been appended with 'str'. It calculates a new crc32 47 // based on the current crc value and the newly appended string. 48 // 49 // NOTE: As this method accepts incremental appends, all these 3 will lead to 50 // the same checksum: 51 // 1) crc32.Append("AAA"); crc32.Append("BBB"); 52 // 2) crc32.Append("AAABBB"); 53 // 3) crc32.Append("AA"); crc32.Append("AB"); crc32.Append("BB"); 54 // 55 // NOTE: While this class internally uses zlib's crc32(), 56 // Crc32(base_crc).Append(str) is not the same as zlib::crc32(base_crc, str); 57 uint32_t Append(std::string_view str); 58 59 // Update a string's rolling crc when some content is modified in the middle 60 // at an offset. We need the xored_str, which is the new value xored with the 61 // original value. 62 // 63 // Original string: 64 // string(original_start | original_mid | original_end) 65 // -------------------------------------------> full_data_size 66 // ^ offset position 67 // 68 // Modified string: 69 // string(original_start | changed_mid | original_end) 70 // ^ offset position 71 // 72 // And where 73 // xored_str = changed_mid ^ original_mid 74 // xored_len = length(xored_str) 75 // full_data_size = the length of all the strings that have been Appended to 76 // generate the current checksum 77 // 78 // REQUIRES: offset position + xored_len <= full_data_size. 79 // 80 // E.g. 81 // Old data: ABCDEF; New data: ABXYZF 82 // 83 // Crc32 crc32; crc32.Append("ABCDEF"); 84 // crc32.UpdateWithXor("CDE" xor "XYZ", 6, 2); 85 // 86 // This is the same as 87 // Crc32 crc32; crc32.Append("ABXYZF"); 88 // 89 // See .cc file for implementation notes. 90 // 91 // Returns: 92 // Updated crc on success 93 // INVALID_ARGUMENT if offset position + xored_len > full_data_size 94 libtextclassifier3::StatusOr<uint32_t> UpdateWithXor( 95 std::string_view xored_str, int full_data_size, int position); 96 97 private: 98 uint32_t crc_; 99 }; 100 101 } // namespace lib 102 } // namespace icing 103 104 #endif // ICING_UTIL_CRC32_H_ 105