1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_UTIL_CRC32_H_
16 #define ICING_UTIL_CRC32_H_
17 
18 #include <cstdint>
19 #include <string_view>
20 
21 #include "icing/text_classifier/lib3/utils/base/statusor.h"
22 
23 namespace icing {
24 namespace lib {
25 
26 // Efficient mechanism to incrementally compute checksum of a file and keep it
27 // updated when its content changes. Internally uses zlib based crc32()
28 // implementation.
29 //
30 // See https://www.zlib.net/manual.html#Checksum for more details.
31 class Crc32 {
32  public:
33   // Default to the checksum of an empty string, that is "0".
Crc32()34   Crc32() : crc_(0) {}
35 
Crc32(uint32_t init_crc)36   explicit Crc32(uint32_t init_crc) : crc_(init_crc) {}
37 
38   inline bool operator==(const Crc32& other) const {
39     return crc_ == other.Get();
40   }
41 
42   // Returns the checksum of all the data that has been processed till now.
43   uint32_t Get() const;
44 
45   // Incrementally update the current checksum to reflect the fact that the
46   // underlying data has been appended with 'str'. It calculates a new crc32
47   // based on the current crc value and the newly appended string.
48   //
49   // NOTE: As this method accepts incremental appends, all these 3 will lead to
50   // the same checksum:
51   // 1) crc32.Append("AAA"); crc32.Append("BBB");
52   // 2) crc32.Append("AAABBB");
53   // 3) crc32.Append("AA"); crc32.Append("AB"); crc32.Append("BB");
54   //
55   // NOTE: While this class internally uses zlib's crc32(),
56   // Crc32(base_crc).Append(str) is not the same as zlib::crc32(base_crc, str);
57   uint32_t Append(std::string_view str);
58 
59   // Update a string's rolling crc when some content is modified in the middle
60   // at an offset. We need the xored_str, which is the new value xored with the
61   // original value.
62   //
63   // Original string:
64   //   string(original_start | original_mid | original_end)
65   //          -------------------------------------------> full_data_size
66   //                         ^ offset position
67   //
68   // Modified string:
69   //   string(original_start | changed_mid | original_end)
70   //                         ^ offset position
71   //
72   // And where
73   //   xored_str = changed_mid ^ original_mid
74   //   xored_len = length(xored_str)
75   //   full_data_size = the length of all the strings that have been Appended to
76   //                    generate the current checksum
77   //
78   // REQUIRES: offset position + xored_len <= full_data_size.
79   //
80   // E.g.
81   // Old data: ABCDEF; New data: ABXYZF
82   //
83   // Crc32 crc32; crc32.Append("ABCDEF");
84   // crc32.UpdateWithXor("CDE" xor "XYZ", 6, 2);
85   //
86   // This is the same as
87   // Crc32 crc32; crc32.Append("ABXYZF");
88   //
89   // See .cc file for implementation notes.
90   //
91   // Returns:
92   //   Updated crc on success
93   //   INVALID_ARGUMENT if offset position + xored_len > full_data_size
94   libtextclassifier3::StatusOr<uint32_t> UpdateWithXor(
95       std::string_view xored_str, int full_data_size, int position);
96 
97  private:
98   uint32_t crc_;
99 };
100 
101 }  // namespace lib
102 }  // namespace icing
103 
104 #endif  // ICING_UTIL_CRC32_H_
105