1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/util/crc32.h"
16
17 #include <cstdint>
18
19 #include "icing/text_classifier/lib3/utils/base/statusor.h"
20 #include "icing/absl_ports/canonical_errors.h"
21 #include "icing/legacy/core/icing-string-util.h"
22 #include "icing/portable/zlib.h"
23
24 namespace icing {
25 namespace lib {
26
27 namespace {
UpdateCrc32(uint32_t crc,const std::string_view str)28 uint32_t UpdateCrc32(uint32_t crc, const std::string_view str) {
29 if (str.length() > 0) {
30 // crc32() already includes a pre- and post-condition of taking the one's
31 // complement of the value.
32 crc =
33 ~crc32(~crc, reinterpret_cast<const Bytef*>(str.data()), str.length());
34 }
35 return crc;
36 }
37 } // namespace
38
Get() const39 uint32_t Crc32::Get() const { return crc_; }
40
Append(const std::string_view str)41 uint32_t Crc32::Append(const std::string_view str) {
42 crc_ = UpdateCrc32(crc_, str);
43 return crc_;
44 }
45
UpdateWithXor(const std::string_view xored_str,int full_data_size,int position)46 libtextclassifier3::StatusOr<uint32_t> Crc32::UpdateWithXor(
47 const std::string_view xored_str, int full_data_size, int position) {
48 // For appending, use Append().
49 if (position + xored_str.length() > full_data_size) {
50 return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
51 "offset position %d + length %zd > full data size %d", position,
52 xored_str.length(), full_data_size));
53 }
54
55 // We have CRC(A|U|B) and we want CRC(A|V|B) where U is the slice
56 // that updated to V.
57 //
58 // xored_str = X = U ^ V
59 //
60 // Some terminology:
61 // `|`: denotes concatenation, NOT the bitwise operator OR
62 //
63 // (A|U|B): a concatenated string of A+U+B
64 //
65 // CRC(A|U|B): The crc of a concatenated string of A+U+B
66 //
67 // 0_lenA: a string of 0's of the length of string A
68 //
69 //
70 // (A|V|B) = (0_lenA|X|0_lenB) ^ (A|U|B)
71 //
72 // since CRC(D) = CRC(E) ^ CRC(F), where D = E ^ F:
73 // CRC(A|V|B)
74 // = CRC(0_lenA|X|0_lenB) ^ CRC(A|U|B)
75 //
76 // and CRC(D|E) = CRC_COMBINE(D, E), so
77 // = CRC_COMBINE(CRC(0_lenA), CRC_COMBINE(CRC(X), CRC(0_lenB)) ^ CRC(A|U|B)
78 //
79 // and CRC(0) = 0, so
80 // = CRC_COMBINE(0, CRC_COMBINE(CRC(X), CRC(0_lenB)) ^ CRC(A|U|B)
81 //
82 // and CRC(0|B) = CRC(B), so
83 // = CRC_COMBINE(CRC(X), CRC(0_lenB)) ^ CRC(A|U|B)
84 //
85 // For more details, see this post by Mark Adler, one of the authors of zlib:
86 // https://stackoverflow.com/questions/23122312/crc-calculation-of-a-mostly-static-data-stream/23126768#23126768
87
88 uint32_t update_crc = UpdateCrc32(0, xored_str);
89 update_crc = crc32_combine(update_crc, 0,
90 full_data_size - (position + xored_str.length()));
91 crc_ ^= update_crc;
92 return crc_;
93 }
94
95 } // namespace lib
96 } // namespace icing
97