1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14
15 // This tests the C hashing macros and C++ hashing functions.
16
17 #include "pw_tokenizer/hash.h"
18
19 #include <cstddef>
20 #include <cstdint>
21
22 #include "gtest/gtest.h"
23 #include "pw_preprocessor/util.h"
24 #include "pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_128_hash_macro.h"
25 #include "pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_256_hash_macro.h"
26 #include "pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_80_hash_macro.h"
27 #include "pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_96_hash_macro.h"
28 #include "pw_tokenizer_private/generated_hash_test_cases.h"
29
30 namespace pw::tokenizer {
31 namespace {
32
33 // Make sure the array test cases file isn't empty.
34 static_assert(PW_ARRAY_SIZE(kHashTests) > 10,
35 "The test cases array is suspiciously small");
36
CheckGeneratedCases()37 constexpr bool CheckGeneratedCases() {
38 for (const auto [string, hash_length, python_hash, macro_hash] : kHashTests) {
39 const uint32_t calculated_hash =
40 PwTokenizer65599FixedLengthHash(string, hash_length);
41 if (calculated_hash != python_hash || calculated_hash != macro_hash) {
42 return false;
43 }
44 }
45
46 return true;
47 }
48
49 static_assert(CheckGeneratedCases(),
50 "Hashes in the generated test cases must match");
TEST(Hashing,GeneratedCasesAtRuntime)51 TEST(Hashing, GeneratedCasesAtRuntime) {
52 for (const auto [string, hash_length, python_hash, macro_hash] : kHashTests) {
53 const uint32_t calculated_hash =
54 PwTokenizer65599FixedLengthHash(string, hash_length);
55 EXPECT_EQ(calculated_hash, python_hash);
56 EXPECT_EQ(calculated_hash, macro_hash);
57 }
58 }
59
60 // Gets the size of the string, excluding the null terminator. A uint32_t is
61 // used instead of a size_t since the hash calculation requires a uint32_t.
62 template <uint32_t kSizeWithNull>
StringLength(const char (&)[kSizeWithNull])63 constexpr uint32_t StringLength(const char (&)[kSizeWithNull]) {
64 static_assert(kSizeWithNull > 0u);
65 return kSizeWithNull - 1; // subtract the null terminator
66 }
67
TEST(Hashing,Runtime)68 TEST(Hashing, Runtime) PW_NO_SANITIZE("unsigned-integer-overflow") {
69 // Coefficients for the hash terms; k1 is 1.
70 static constexpr uint32_t k2 = k65599HashConstant;
71 static constexpr uint32_t k3 = k65599HashConstant * k2;
72 static constexpr uint32_t k4 = k65599HashConstant * k3;
73 static constexpr uint32_t k5 = k65599HashConstant * k4;
74
75 // Hash a few things at hash length 4
76 // The coefficient calculation is done modulo 0x100000000, so the unsigned
77 // integer overflows of the hash terms are intentional.
78 EXPECT_EQ(PwTokenizer65599FixedLengthHash("", 4), StringLength(""));
79 EXPECT_EQ(PwTokenizer65599FixedLengthHash("1", 4),
80 StringLength("1") + k2 * '1');
81 EXPECT_EQ(PwTokenizer65599FixedLengthHash("12", 4),
82 StringLength("12") + k2 * '1' + k3 * '2');
83 EXPECT_EQ(PwTokenizer65599FixedLengthHash("123", 4),
84 StringLength("123") + k2 * '1' + k3 * '2' + k4 * '3');
85 EXPECT_EQ(PwTokenizer65599FixedLengthHash("1234", 4),
86 StringLength("1234") + k2 * '1' + k3 * '2' + k4 * '3' + k5 * '4');
87 EXPECT_EQ(PwTokenizer65599FixedLengthHash("12345", 4),
88 StringLength("12345") + k2 * '1' + k3 * '2' + k4 * '3' + k5 * '4');
89 EXPECT_EQ(PwTokenizer65599FixedLengthHash("123456", 4),
90 StringLength("123456") + k2 * '1' + k3 * '2' + k4 * '3' + k5 * '4');
91 }
92
93 #define _CHECK_HASH_LENGTH(string, length) \
94 static_assert(PwTokenizer65599FixedLengthHash( \
95 std::string_view(string, sizeof(string) - 1), length) == \
96 PW_TOKENIZER_65599_FIXED_LENGTH_##length##_HASH(string), \
97 #length "-byte hash mismatch!")
98
99 // Use std::string_view so that \0 can appear in strings.
100 #define TEST_SUPPORTED_HASHES(string_literal) \
101 _CHECK_HASH_LENGTH(string_literal, 80); \
102 _CHECK_HASH_LENGTH(string_literal, 96); \
103 _CHECK_HASH_LENGTH(string_literal, 128); \
104 _CHECK_HASH_LENGTH(string_literal, 256); \
105 static_assert( \
106 PwTokenizer65599FixedLengthHash( \
107 std::string_view(string_literal, sizeof(string_literal) - 1), \
108 sizeof(string_literal) - 1) == Hash(string_literal), \
109 "Hash function mismatch!"); \
110 EXPECT_EQ(PwTokenizer65599FixedLengthHash( \
111 std::string_view(string_literal, sizeof(string_literal) - 1), \
112 sizeof(string_literal) - 1), \
113 pw_tokenizer_65599FixedLengthHash(string_literal, \
114 sizeof(string_literal) - 1, \
115 sizeof(string_literal) - 1))
116
TEST(HashMacro,Empty)117 TEST(HashMacro, Empty) { TEST_SUPPORTED_HASHES(""); }
118
TEST(HashMacro,NullTerminatorsAreIncludedInHash)119 TEST(HashMacro, NullTerminatorsAreIncludedInHash) {
120 using namespace std::literals; // use an sv literal to include the \0
121
122 TEST_SUPPORTED_HASHES("hello\0there");
123 TEST_SUPPORTED_HASHES("\0");
124 TEST_SUPPORTED_HASHES("\0\0");
125 TEST_SUPPORTED_HASHES("\0\0\0");
126
127 static_assert(
128 PwTokenizer65599FixedLengthHash(""sv, 80) !=
129 PwTokenizer65599FixedLengthHash("\0"sv, 80),
130 "Hashes of \\0 strings should differ since the lengths are included");
131 static_assert(
132 PwTokenizer65599FixedLengthHash("\0"sv, 80) !=
133 PwTokenizer65599FixedLengthHash("\0\0"sv, 80),
134 "Hashes of \\0 strings should differ since the lengths are included");
135 static_assert(
136 PwTokenizer65599FixedLengthHash("\0\0"sv, 80) !=
137 PwTokenizer65599FixedLengthHash("\0\0\0"sv, 80),
138 "Hashes of \\0 strings should differ since the lengths are included");
139 }
140
TEST(HashMacro,OneChar)141 TEST(HashMacro, OneChar) {
142 TEST_SUPPORTED_HASHES("a");
143 TEST_SUPPORTED_HASHES("c");
144 TEST_SUPPORTED_HASHES("A");
145 TEST_SUPPORTED_HASHES("B");
146 TEST_SUPPORTED_HASHES("C");
147 TEST_SUPPORTED_HASHES("$");
148 TEST_SUPPORTED_HASHES("%");
149 TEST_SUPPORTED_HASHES("^");
150 TEST_SUPPORTED_HASHES("\xa1");
151 TEST_SUPPORTED_HASHES("\xff");
152 }
153
TEST(HashMacro,Phrases)154 TEST(HashMacro, Phrases) {
155 TEST_SUPPORTED_HASHES("WASD");
156 TEST_SUPPORTED_HASHES("hello, world");
157 TEST_SUPPORTED_HASHES("Let the wookie win.");
158 TEST_SUPPORTED_HASHES("\x01 more test, just for \xffun");
159 }
160
TEST(HashMacro,HashesChange)161 TEST(HashMacro, HashesChange) {
162 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("") !=
163 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("\0"));
164 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("a") !=
165 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("b"));
166 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("z") !=
167 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("aa"));
168 static_assert(
169 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("make sure hashes change") !=
170 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("MAKE SURE HASHES CHANGE"));
171 }
172
173 // Define string literals to test boundary conditions.
174 #define LITERAL_79 \
175 "0123456789ABCDEF" \
176 "0123456789ABCDEF" \
177 "0123456789ABCDEF" \
178 "0123456789ABCDEF" \
179 "0123456789ABCDE"
180 #define LITERAL_80 LITERAL_79 "F"
181
182 #define LITERAL_96 LITERAL_80 "0123456789ABCDEF"
183
184 #define LITERAL_128 \
185 LITERAL_96 \
186 "0123456789ABCDEF" \
187 "0123456789ABCDEF"
188
189 static_assert(sizeof(LITERAL_79) - 1 == 79);
190 static_assert(sizeof(LITERAL_80) - 1 == 80);
191 static_assert(sizeof(LITERAL_96) - 1 == 96);
192 static_assert(sizeof(LITERAL_128) - 1 == 128);
193
TEST(HashMacro,HashNearFixedHashLength_80)194 TEST(HashMacro, HashNearFixedHashLength_80) {
195 // 79, 80, 81, 82 bytes
196 TEST_SUPPORTED_HASHES(LITERAL_79);
197 TEST_SUPPORTED_HASHES(LITERAL_80);
198 TEST_SUPPORTED_HASHES(LITERAL_80 "!");
199 TEST_SUPPORTED_HASHES(LITERAL_80 "!!");
200 }
201
TEST(HashMacro,HashNearFixedHashLength_96)202 TEST(HashMacro, HashNearFixedHashLength_96) {
203 // 95, 96, 97, 98 bytes
204 TEST_SUPPORTED_HASHES(LITERAL_80 "0123456789ABCDE");
205 TEST_SUPPORTED_HASHES(LITERAL_96);
206 TEST_SUPPORTED_HASHES(LITERAL_96 "!");
207 TEST_SUPPORTED_HASHES(LITERAL_96 "XY");
208 }
209
TEST(HashMacro,HashNearFixedHashLength_128)210 TEST(HashMacro, HashNearFixedHashLength_128) {
211 // 127, 128, 129, 130 bytes
212 TEST_SUPPORTED_HASHES(LITERAL_96
213 "0123456789ABCDEF"
214 "0123456789ABCDE");
215 TEST_SUPPORTED_HASHES(LITERAL_128);
216 TEST_SUPPORTED_HASHES(LITERAL_128 "!");
217 TEST_SUPPORTED_HASHES(LITERAL_128 "AB");
218 }
219
TEST(HashMacro,HashVeryLongStrings)220 TEST(HashMacro, HashVeryLongStrings) {
221 TEST_SUPPORTED_HASHES(
222 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
223 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
224 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
225 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
226 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0");
227 TEST_SUPPORTED_HASHES(LITERAL_128 LITERAL_128 LITERAL_128);
228 }
229
TEST(HashMacro,ExtraCharactersAreIgnored)230 TEST(HashMacro, ExtraCharactersAreIgnored) {
231 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH(LITERAL_80 "?") ==
232 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH(LITERAL_80 "."));
233
234 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH(LITERAL_80 "ABCD") ==
235 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH(LITERAL_80 "zyxw"));
236
237 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_96_HASH(LITERAL_96 "?") ==
238 PW_TOKENIZER_65599_FIXED_LENGTH_96_HASH(LITERAL_96 "."));
239
240 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_96_HASH(LITERAL_96 "ABCD") ==
241 PW_TOKENIZER_65599_FIXED_LENGTH_96_HASH(LITERAL_96 "zyxw"));
242
243 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_128_HASH(LITERAL_128 "?") ==
244 PW_TOKENIZER_65599_FIXED_LENGTH_128_HASH(LITERAL_128 "."));
245
246 static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_128_HASH(LITERAL_128 "ABCD") ==
247 PW_TOKENIZER_65599_FIXED_LENGTH_128_HASH(LITERAL_128 "zyxw"));
248 }
249
250 } // namespace
251 } // namespace pw::tokenizer
252