1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/util/crc32.h"
16 
17 #include <cstdint>
18 
19 #include "icing/text_classifier/lib3/utils/base/statusor.h"
20 #include "icing/absl_ports/canonical_errors.h"
21 #include "icing/legacy/core/icing-string-util.h"
22 #include "icing/portable/zlib.h"
23 
24 namespace icing {
25 namespace lib {
26 
27 namespace {
UpdateCrc32(uint32_t crc,const std::string_view str)28 uint32_t UpdateCrc32(uint32_t crc, const std::string_view str) {
29   if (str.length() > 0) {
30     // crc32() already includes a pre- and post-condition of taking the one's
31     // complement of the value.
32     crc =
33         ~crc32(~crc, reinterpret_cast<const Bytef*>(str.data()), str.length());
34   }
35   return crc;
36 }
37 }  // namespace
38 
Get() const39 uint32_t Crc32::Get() const { return crc_; }
40 
Append(const std::string_view str)41 uint32_t Crc32::Append(const std::string_view str) {
42   crc_ = UpdateCrc32(crc_, str);
43   return crc_;
44 }
45 
UpdateWithXor(const std::string_view xored_str,int full_data_size,int position)46 libtextclassifier3::StatusOr<uint32_t> Crc32::UpdateWithXor(
47     const std::string_view xored_str, int full_data_size, int position) {
48   // For appending, use Append().
49   if (position + xored_str.length() > full_data_size) {
50     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
51         "offset position %d + length %zd > full data size %d", position,
52         xored_str.length(), full_data_size));
53   }
54 
55   // We have CRC(A|U|B) and we want CRC(A|V|B) where U is the slice
56   // that updated to V.
57   //
58   // xored_str = X = U ^ V
59   //
60   // Some terminology:
61   //   `|`: denotes concatenation, NOT the bitwise operator OR
62   //
63   //   (A|U|B): a concatenated string of A+U+B
64   //
65   //   CRC(A|U|B): The crc of a concatenated string of A+U+B
66   //
67   //   0_lenA: a string of 0's of the length of string A
68   //
69   //
70   // (A|V|B) = (0_lenA|X|0_lenB) ^ (A|U|B)
71   //
72   // since CRC(D) = CRC(E) ^ CRC(F), where D = E ^ F:
73   // CRC(A|V|B)
74   //   = CRC(0_lenA|X|0_lenB) ^ CRC(A|U|B)
75   //
76   // and CRC(D|E) = CRC_COMBINE(D, E), so
77   //   = CRC_COMBINE(CRC(0_lenA), CRC_COMBINE(CRC(X), CRC(0_lenB)) ^ CRC(A|U|B)
78   //
79   // and CRC(0) = 0, so
80   //   = CRC_COMBINE(0, CRC_COMBINE(CRC(X), CRC(0_lenB)) ^ CRC(A|U|B)
81   //
82   // and CRC(0|B) = CRC(B), so
83   //   = CRC_COMBINE(CRC(X), CRC(0_lenB)) ^ CRC(A|U|B)
84   //
85   // For more details, see this post by Mark Adler, one of the authors of zlib:
86   // https://stackoverflow.com/questions/23122312/crc-calculation-of-a-mostly-static-data-stream/23126768#23126768
87 
88   uint32_t update_crc = UpdateCrc32(0, xored_str);
89   update_crc = crc32_combine(update_crc, 0,
90                              full_data_size - (position + xored_str.length()));
91   crc_ ^= update_crc;
92   return crc_;
93 }
94 
95 }  // namespace lib
96 }  // namespace icing
97