1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14
15 #include "pw_tokenizer/token_database.h"
16
17 #include <cstring>
18 #include <string>
19 #include <string_view>
20
21 #include "gtest/gtest.h"
22
23 namespace pw::tokenizer {
24 namespace {
25
26 using namespace std::literals::string_view_literals;
27
28 // Use alignas to ensure that the data is properly aligned for database entries.
29 // This avoids unaligned memory reads.
30 alignas(TokenDatabase::RawEntry) constexpr char kBasicData[] =
31 "TOKENS\0\0\x03\x00\x00\x00\0\0\0\0"
32 "\x01\0\0\0\0\0\0\0"
33 "\x02\0\0\0\0\0\0\0"
34 "\xFF\0\0\0\0\0\0\0"
35 "hi!\0"
36 "goodbye\0"
37 ":)";
38
39 alignas(TokenDatabase::RawEntry) constexpr char kEmptyData[] =
40 "TOKENS\0\0\x00\x00\x00\x00\0\0\0"; // Last byte is null terminator.
41
42 alignas(TokenDatabase::RawEntry) constexpr char kBadMagic[] =
43 "TOKENs\0\0\x03\x00\x00\x00\0\0\0\0"
44 "\x01\0\0\0\0\0\0\0"
45 "hi!\0";
46
47 alignas(TokenDatabase::RawEntry) constexpr char kBadVersion[] =
48 "TOKENS\0\1\x00\0\0\0\0\0\0\0";
49
50 alignas(TokenDatabase::RawEntry) constexpr char kBadEntryCount[] =
51 "TOKENS\0\0\xff\x00\x00\x00\0\0\0\0";
52
53 // Use signed data and a size with the top bit set to test that the entry count
54 // is read correctly, without per-byte sign extension.
55 alignas(TokenDatabase::RawEntry) constexpr signed char kSignedWithTopBit[] =
56 "TOKENS\0\0\x80\x00\x00\x00\0\0\0\0"
57 // Entries
58 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
59 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
60 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
61 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 32
62 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
63 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
64 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
65 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 64
66 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
67 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
68 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
69 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 96
70 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
71 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
72 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
73 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 128
74 // Strings (empty)
75 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 32
76 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 64
77 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 96
78 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; // 128
79
80 constexpr TokenDatabase kBasicDatabase = TokenDatabase::Create<kBasicData>();
81 static_assert(kBasicDatabase.size() == 3u);
82
TEST(TokenDatabase,EntryCount)83 TEST(TokenDatabase, EntryCount) {
84 static_assert(TokenDatabase::Create<kBasicData>().size() == 3u);
85 static_assert(TokenDatabase::Create(kEmptyData).size() == 0u);
86 EXPECT_EQ(TokenDatabase::Create<kSignedWithTopBit>().size(), 0x80u);
87 }
88
TEST(TokenDatabase,ValidCheck)89 TEST(TokenDatabase, ValidCheck) {
90 char basic_data[sizeof(kBasicData)];
91 std::memcpy(basic_data, kBasicData, sizeof(basic_data));
92 EXPECT_TRUE(TokenDatabase::IsValid(basic_data));
93
94 static_assert(TokenDatabase::IsValid(kBasicData));
95 static_assert(TokenDatabase::IsValid(kEmptyData));
96 static_assert(TokenDatabase::IsValid(kSignedWithTopBit));
97
98 static_assert(!TokenDatabase::IsValid(kBadMagic));
99 static_assert(!TokenDatabase::IsValid(kBadVersion));
100 static_assert(!TokenDatabase::IsValid(kBadEntryCount));
101
102 static_assert(!TokenDatabase::IsValid("TOKENS\0\0\0\0")); // too short
103 static_assert(!TokenDatabase::IsValid("TOKENS\0\1\0\0\0\0\0\0\0\0"));
104 static_assert(!TokenDatabase::IsValid("TOKENSv0\0\0\0\0\0\0\0\0"));
105 static_assert(!TokenDatabase::IsValid("tokens\0\0\0\0\0\0\0\0\0\0"));
106
107 // No string table; this is one byte too short.
108 static_assert(
109 !TokenDatabase::IsValid("TOKENS\0\0\x01\x00\x00\x00\0\0\0\0WXYZdate"sv));
110
111 // Add one byte for the string table.
112 static_assert(
113 TokenDatabase::IsValid("TOKENS\0\0\x01\x00\x00\x00\0\0\0\0WXYZdate\0"sv));
114
115 static_assert(
116 !TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
117 "WXYZdate"
118 "WXYZdate"
119 "\0"sv));
120 static_assert(
121 TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
122 "WXYZdate"
123 "WXYZdate"
124 "hi\0\0"sv));
125 static_assert(
126 TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
127 "WXYZdate"
128 "WXYZdate"
129 "hi\0hello\0"sv));
130 }
131
TEST(TokenDatabase,Iterator)132 TEST(TokenDatabase, Iterator) {
133 auto it = kBasicDatabase.begin();
134 EXPECT_EQ(it->token, 1u);
135 EXPECT_STREQ(it.entry().string, "hi!");
136
137 ++it;
138 EXPECT_EQ(it->token, 2u);
139 EXPECT_STREQ(it.entry().string, "goodbye");
140 EXPECT_EQ(it - kBasicDatabase.begin(), 1);
141
142 ++it;
143 EXPECT_EQ(it->token, 0xFFu);
144 EXPECT_STREQ(it.entry().string, ":)");
145 EXPECT_EQ(it - kBasicDatabase.begin(), 2);
146
147 ++it;
148 EXPECT_EQ(it, kBasicDatabase.end());
149 EXPECT_EQ(static_cast<size_t>(it - kBasicDatabase.begin()),
150 kBasicDatabase.size());
151 }
152
TEST(TokenDatabase,Iterator_PreIncrement)153 TEST(TokenDatabase, Iterator_PreIncrement) {
154 auto it = kBasicDatabase.begin();
155 EXPECT_EQ((++it)->token, 2u);
156 EXPECT_STREQ(it.entry().string, "goodbye");
157 }
158
TEST(TokenDatabase,Iterator_PostIncrement)159 TEST(TokenDatabase, Iterator_PostIncrement) {
160 auto it = kBasicDatabase.begin();
161 EXPECT_EQ((it++)->token, 1u);
162
163 EXPECT_EQ(it->token, 2u);
164 EXPECT_STREQ(it.entry().string, "goodbye");
165 }
166
TEST(TokenDatabase,SingleEntryLookup_FirstEntry)167 TEST(TokenDatabase, SingleEntryLookup_FirstEntry) {
168 auto match = kBasicDatabase.Find(1);
169 ASSERT_EQ(match.size(), 1u);
170 EXPECT_FALSE(match.empty());
171 EXPECT_STREQ(match[0].string, "hi!");
172
173 for (const auto& entry : match) {
174 EXPECT_EQ(entry.token, 1u);
175 EXPECT_STREQ(entry.string, "hi!");
176 }
177 }
178
TEST(TokenDatabase,SingleEntryLookup_MiddleEntry)179 TEST(TokenDatabase, SingleEntryLookup_MiddleEntry) {
180 auto match = kBasicDatabase.Find(2);
181 ASSERT_EQ(match.size(), 1u);
182 EXPECT_FALSE(match.empty());
183 EXPECT_STREQ(match[0].string, "goodbye");
184 }
185
TEST(TokenDatabase,SingleEntryLookup_LastEntry)186 TEST(TokenDatabase, SingleEntryLookup_LastEntry) {
187 auto match = kBasicDatabase.Find(0xff);
188 ASSERT_EQ(match.size(), 1u);
189 EXPECT_STREQ(match[0].string, ":)");
190 EXPECT_FALSE(match.empty());
191 }
192
TEST(TokenDatabase,SingleEntryLookup_NonPresent)193 TEST(TokenDatabase, SingleEntryLookup_NonPresent) {
194 EXPECT_TRUE(kBasicDatabase.Find(0).empty());
195 EXPECT_TRUE(kBasicDatabase.Find(3).empty());
196 EXPECT_TRUE(kBasicDatabase.Find(10239).empty());
197 EXPECT_TRUE(kBasicDatabase.Find(0xFFFFFFFFu).empty());
198 }
199
TEST(TokenDatabase,SingleEntryLookup_NoMatches)200 TEST(TokenDatabase, SingleEntryLookup_NoMatches) {
201 // Can also create the database at runtime.
202 TokenDatabase tokens = TokenDatabase::Create(kBasicData);
203 const auto match = tokens.Find(42);
204 ASSERT_EQ(match.size(), 0u);
205 EXPECT_TRUE(match.empty());
206
207 for (const auto& entry : match) {
208 FAIL(); // There were no matches, so this code should never execute.
209 static_cast<void>(entry);
210 }
211 }
212
213 alignas(TokenDatabase::RawEntry) constexpr char kCollisionsData[] =
214 "TOKENS\0\0\x05\0\0\0\0\0\0\0"
215 "\x01\0\0\0date"
216 "\x01\0\0\0date"
217 "\x01\0\0\0date"
218 "\x02\0\0\0date"
219 "\xFF\0\0\0date"
220 "hi!\0goodbye\0:)\0\0";
221
222 constexpr TokenDatabase kCollisions = TokenDatabase::Create<kCollisionsData>();
223 static_assert(kCollisions.size() == 5u);
224
TEST(TokenDatabase,MultipleEntriesWithSameToken)225 TEST(TokenDatabase, MultipleEntriesWithSameToken) {
226 TokenDatabase::Entries match = kCollisions.Find(1);
227
228 EXPECT_EQ(match.begin()->token, 1u);
229 EXPECT_EQ(match.end()->token, 2u);
230 ASSERT_EQ(match.size(), 3u);
231
232 EXPECT_STREQ(match[0].string, "hi!");
233 EXPECT_STREQ(match[1].string, "goodbye");
234 EXPECT_STREQ(match[2].string, ":)");
235
236 for (const auto& entry : match) {
237 EXPECT_EQ(entry.token, 1u);
238 }
239 }
240
TEST(TokenDatabase,Empty)241 TEST(TokenDatabase, Empty) {
242 constexpr TokenDatabase empty_db = TokenDatabase::Create<kEmptyData>();
243 static_assert(empty_db.size() == 0u);
244 static_assert(empty_db.ok());
245
246 EXPECT_TRUE(empty_db.Find(0).empty());
247 EXPECT_TRUE(empty_db.Find(123).empty());
248
249 for (const auto& entry : empty_db) {
250 FAIL(); // The database is empty; this should never execute.
251 static_cast<void>(entry);
252 }
253 }
254
TEST(TokenDatabase,NullDatabase)255 TEST(TokenDatabase, NullDatabase) {
256 constexpr TokenDatabase empty_db;
257
258 static_assert(empty_db.size() == 0u);
259 static_assert(!empty_db.ok());
260 EXPECT_TRUE(empty_db.Find(0).empty());
261 }
262
TEST(TokenDatabase,InvalidData)263 TEST(TokenDatabase, InvalidData) {
264 constexpr TokenDatabase bad_db = TokenDatabase::Create("TOKENS\0\0");
265
266 static_assert(!bad_db.ok());
267 EXPECT_TRUE(bad_db.Find(0).empty());
268 }
269
TEST(TokenDatabase,FromString)270 TEST(TokenDatabase, FromString) {
271 TokenDatabase bad_db = TokenDatabase::Create(std::string("wow!"));
272
273 EXPECT_FALSE(bad_db.ok());
274 }
275
276 } // namespace
277 } // namespace pw::tokenizer
278