1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 // This tests the C hashing macros and C++ hashing functions.
16 
17 #include "pw_tokenizer/hash.h"
18 
19 #include <cstddef>
20 #include <cstdint>
21 
22 #include "gtest/gtest.h"
23 #include "pw_preprocessor/util.h"
24 #include "pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_128_hash_macro.h"
25 #include "pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_256_hash_macro.h"
26 #include "pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_80_hash_macro.h"
27 #include "pw_tokenizer/internal/pw_tokenizer_65599_fixed_length_96_hash_macro.h"
28 #include "pw_tokenizer_private/generated_hash_test_cases.h"
29 
30 namespace pw::tokenizer {
31 namespace {
32 
33 // Make sure the array test cases file isn't empty.
34 static_assert(PW_ARRAY_SIZE(kHashTests) > 10,
35               "The test cases array is suspiciously small");
36 
CheckGeneratedCases()37 constexpr bool CheckGeneratedCases() {
38   for (const auto [string, hash_length, python_hash, macro_hash] : kHashTests) {
39     const uint32_t calculated_hash =
40         PwTokenizer65599FixedLengthHash(string, hash_length);
41     if (calculated_hash != python_hash || calculated_hash != macro_hash) {
42       return false;
43     }
44   }
45 
46   return true;
47 }
48 
49 static_assert(CheckGeneratedCases(),
50               "Hashes in the generated test cases must match");
TEST(Hashing,GeneratedCasesAtRuntime)51 TEST(Hashing, GeneratedCasesAtRuntime) {
52   for (const auto [string, hash_length, python_hash, macro_hash] : kHashTests) {
53     const uint32_t calculated_hash =
54         PwTokenizer65599FixedLengthHash(string, hash_length);
55     EXPECT_EQ(calculated_hash, python_hash);
56     EXPECT_EQ(calculated_hash, macro_hash);
57   }
58 }
59 
60 // Gets the size of the string, excluding the null terminator. A uint32_t is
61 // used instead of a size_t since the hash calculation requires a uint32_t.
62 template <uint32_t kSizeWithNull>
StringLength(const char (&)[kSizeWithNull])63 constexpr uint32_t StringLength(const char (&)[kSizeWithNull]) {
64   static_assert(kSizeWithNull > 0u);
65   return kSizeWithNull - 1;  // subtract the null terminator
66 }
67 
TEST(Hashing,Runtime)68 TEST(Hashing, Runtime) PW_NO_SANITIZE("unsigned-integer-overflow") {
69   // Coefficients for the hash terms; k1 is 1.
70   static constexpr uint32_t k2 = k65599HashConstant;
71   static constexpr uint32_t k3 = k65599HashConstant * k2;
72   static constexpr uint32_t k4 = k65599HashConstant * k3;
73   static constexpr uint32_t k5 = k65599HashConstant * k4;
74 
75   // Hash a few things at hash length 4
76   // The coefficient calculation is done modulo 0x100000000, so the unsigned
77   // integer overflows of the hash terms are intentional.
78   EXPECT_EQ(PwTokenizer65599FixedLengthHash("", 4), StringLength(""));
79   EXPECT_EQ(PwTokenizer65599FixedLengthHash("1", 4),
80             StringLength("1") + k2 * '1');
81   EXPECT_EQ(PwTokenizer65599FixedLengthHash("12", 4),
82             StringLength("12") + k2 * '1' + k3 * '2');
83   EXPECT_EQ(PwTokenizer65599FixedLengthHash("123", 4),
84             StringLength("123") + k2 * '1' + k3 * '2' + k4 * '3');
85   EXPECT_EQ(PwTokenizer65599FixedLengthHash("1234", 4),
86             StringLength("1234") + k2 * '1' + k3 * '2' + k4 * '3' + k5 * '4');
87   EXPECT_EQ(PwTokenizer65599FixedLengthHash("12345", 4),
88             StringLength("12345") + k2 * '1' + k3 * '2' + k4 * '3' + k5 * '4');
89   EXPECT_EQ(PwTokenizer65599FixedLengthHash("123456", 4),
90             StringLength("123456") + k2 * '1' + k3 * '2' + k4 * '3' + k5 * '4');
91 }
92 
93 #define _CHECK_HASH_LENGTH(string, length)                                   \
94   static_assert(PwTokenizer65599FixedLengthHash(                             \
95                     std::string_view(string, sizeof(string) - 1), length) == \
96                     PW_TOKENIZER_65599_FIXED_LENGTH_##length##_HASH(string), \
97                 #length "-byte hash mismatch!")
98 
99 // Use std::string_view so that \0 can appear in strings.
100 #define TEST_SUPPORTED_HASHES(string_literal)                                 \
101   _CHECK_HASH_LENGTH(string_literal, 80);                                     \
102   _CHECK_HASH_LENGTH(string_literal, 96);                                     \
103   _CHECK_HASH_LENGTH(string_literal, 128);                                    \
104   _CHECK_HASH_LENGTH(string_literal, 256);                                    \
105   static_assert(                                                              \
106       PwTokenizer65599FixedLengthHash(                                        \
107           std::string_view(string_literal, sizeof(string_literal) - 1),       \
108           sizeof(string_literal) - 1) == Hash(string_literal),                \
109       "Hash function mismatch!");                                             \
110   EXPECT_EQ(PwTokenizer65599FixedLengthHash(                                  \
111                 std::string_view(string_literal, sizeof(string_literal) - 1), \
112                 sizeof(string_literal) - 1),                                  \
113             pw_tokenizer_65599FixedLengthHash(string_literal,                 \
114                                               sizeof(string_literal) - 1,     \
115                                               sizeof(string_literal) - 1))
116 
TEST(HashMacro,Empty)117 TEST(HashMacro, Empty) { TEST_SUPPORTED_HASHES(""); }
118 
TEST(HashMacro,NullTerminatorsAreIncludedInHash)119 TEST(HashMacro, NullTerminatorsAreIncludedInHash) {
120   using namespace std::literals;  // use an sv literal to include the \0
121 
122   TEST_SUPPORTED_HASHES("hello\0there");
123   TEST_SUPPORTED_HASHES("\0");
124   TEST_SUPPORTED_HASHES("\0\0");
125   TEST_SUPPORTED_HASHES("\0\0\0");
126 
127   static_assert(
128       PwTokenizer65599FixedLengthHash(""sv, 80) !=
129           PwTokenizer65599FixedLengthHash("\0"sv, 80),
130       "Hashes of \\0 strings should differ since the lengths are included");
131   static_assert(
132       PwTokenizer65599FixedLengthHash("\0"sv, 80) !=
133           PwTokenizer65599FixedLengthHash("\0\0"sv, 80),
134       "Hashes of \\0 strings should differ since the lengths are included");
135   static_assert(
136       PwTokenizer65599FixedLengthHash("\0\0"sv, 80) !=
137           PwTokenizer65599FixedLengthHash("\0\0\0"sv, 80),
138       "Hashes of \\0 strings should differ since the lengths are included");
139 }
140 
TEST(HashMacro,OneChar)141 TEST(HashMacro, OneChar) {
142   TEST_SUPPORTED_HASHES("a");
143   TEST_SUPPORTED_HASHES("c");
144   TEST_SUPPORTED_HASHES("A");
145   TEST_SUPPORTED_HASHES("B");
146   TEST_SUPPORTED_HASHES("C");
147   TEST_SUPPORTED_HASHES("$");
148   TEST_SUPPORTED_HASHES("%");
149   TEST_SUPPORTED_HASHES("^");
150   TEST_SUPPORTED_HASHES("\xa1");
151   TEST_SUPPORTED_HASHES("\xff");
152 }
153 
TEST(HashMacro,Phrases)154 TEST(HashMacro, Phrases) {
155   TEST_SUPPORTED_HASHES("WASD");
156   TEST_SUPPORTED_HASHES("hello, world");
157   TEST_SUPPORTED_HASHES("Let the wookie win.");
158   TEST_SUPPORTED_HASHES("\x01 more test, just for \xffun");
159 }
160 
TEST(HashMacro,HashesChange)161 TEST(HashMacro, HashesChange) {
162   static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("") !=
163                 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("\0"));
164   static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("a") !=
165                 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("b"));
166   static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("z") !=
167                 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("aa"));
168   static_assert(
169       PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("make sure hashes change") !=
170       PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH("MAKE SURE HASHES CHANGE"));
171 }
172 
173 // Define string literals to test boundary conditions.
174 #define LITERAL_79   \
175   "0123456789ABCDEF" \
176   "0123456789ABCDEF" \
177   "0123456789ABCDEF" \
178   "0123456789ABCDEF" \
179   "0123456789ABCDE"
180 #define LITERAL_80 LITERAL_79 "F"
181 
182 #define LITERAL_96 LITERAL_80 "0123456789ABCDEF"
183 
184 #define LITERAL_128  \
185   LITERAL_96         \
186   "0123456789ABCDEF" \
187   "0123456789ABCDEF"
188 
189 static_assert(sizeof(LITERAL_79) - 1 == 79);
190 static_assert(sizeof(LITERAL_80) - 1 == 80);
191 static_assert(sizeof(LITERAL_96) - 1 == 96);
192 static_assert(sizeof(LITERAL_128) - 1 == 128);
193 
TEST(HashMacro,HashNearFixedHashLength_80)194 TEST(HashMacro, HashNearFixedHashLength_80) {
195   // 79, 80, 81, 82 bytes
196   TEST_SUPPORTED_HASHES(LITERAL_79);
197   TEST_SUPPORTED_HASHES(LITERAL_80);
198   TEST_SUPPORTED_HASHES(LITERAL_80 "!");
199   TEST_SUPPORTED_HASHES(LITERAL_80 "!!");
200 }
201 
TEST(HashMacro,HashNearFixedHashLength_96)202 TEST(HashMacro, HashNearFixedHashLength_96) {
203   // 95, 96, 97, 98 bytes
204   TEST_SUPPORTED_HASHES(LITERAL_80 "0123456789ABCDE");
205   TEST_SUPPORTED_HASHES(LITERAL_96);
206   TEST_SUPPORTED_HASHES(LITERAL_96 "!");
207   TEST_SUPPORTED_HASHES(LITERAL_96 "XY");
208 }
209 
TEST(HashMacro,HashNearFixedHashLength_128)210 TEST(HashMacro, HashNearFixedHashLength_128) {
211   // 127, 128, 129, 130 bytes
212   TEST_SUPPORTED_HASHES(LITERAL_96
213                         "0123456789ABCDEF"
214                         "0123456789ABCDE");
215   TEST_SUPPORTED_HASHES(LITERAL_128);
216   TEST_SUPPORTED_HASHES(LITERAL_128 "!");
217   TEST_SUPPORTED_HASHES(LITERAL_128 "AB");
218 }
219 
TEST(HashMacro,HashVeryLongStrings)220 TEST(HashMacro, HashVeryLongStrings) {
221   TEST_SUPPORTED_HASHES(
222       "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
223       "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
224       "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
225       "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
226       "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0");
227   TEST_SUPPORTED_HASHES(LITERAL_128 LITERAL_128 LITERAL_128);
228 }
229 
TEST(HashMacro,ExtraCharactersAreIgnored)230 TEST(HashMacro, ExtraCharactersAreIgnored) {
231   static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH(LITERAL_80 "?") ==
232                 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH(LITERAL_80 "."));
233 
234   static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH(LITERAL_80 "ABCD") ==
235                 PW_TOKENIZER_65599_FIXED_LENGTH_80_HASH(LITERAL_80 "zyxw"));
236 
237   static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_96_HASH(LITERAL_96 "?") ==
238                 PW_TOKENIZER_65599_FIXED_LENGTH_96_HASH(LITERAL_96 "."));
239 
240   static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_96_HASH(LITERAL_96 "ABCD") ==
241                 PW_TOKENIZER_65599_FIXED_LENGTH_96_HASH(LITERAL_96 "zyxw"));
242 
243   static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_128_HASH(LITERAL_128 "?") ==
244                 PW_TOKENIZER_65599_FIXED_LENGTH_128_HASH(LITERAL_128 "."));
245 
246   static_assert(PW_TOKENIZER_65599_FIXED_LENGTH_128_HASH(LITERAL_128 "ABCD") ==
247                 PW_TOKENIZER_65599_FIXED_LENGTH_128_HASH(LITERAL_128 "zyxw"));
248 }
249 
250 }  // namespace
251 }  // namespace pw::tokenizer
252