1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 #include "pw_tokenizer/token_database.h"
16 
17 #include <cstring>
18 #include <string>
19 #include <string_view>
20 
21 #include "gtest/gtest.h"
22 
23 namespace pw::tokenizer {
24 namespace {
25 
26 using namespace std::literals::string_view_literals;
27 
28 // Use alignas to ensure that the data is properly aligned for database entries.
29 // This avoids unaligned memory reads.
30 alignas(TokenDatabase::RawEntry) constexpr char kBasicData[] =
31     "TOKENS\0\0\x03\x00\x00\x00\0\0\0\0"
32     "\x01\0\0\0\0\0\0\0"
33     "\x02\0\0\0\0\0\0\0"
34     "\xFF\0\0\0\0\0\0\0"
35     "hi!\0"
36     "goodbye\0"
37     ":)";
38 
39 alignas(TokenDatabase::RawEntry) constexpr char kEmptyData[] =
40     "TOKENS\0\0\x00\x00\x00\x00\0\0\0";  // Last byte is null terminator.
41 
42 alignas(TokenDatabase::RawEntry) constexpr char kBadMagic[] =
43     "TOKENs\0\0\x03\x00\x00\x00\0\0\0\0"
44     "\x01\0\0\0\0\0\0\0"
45     "hi!\0";
46 
47 alignas(TokenDatabase::RawEntry) constexpr char kBadVersion[] =
48     "TOKENS\0\1\x00\0\0\0\0\0\0\0";
49 
50 alignas(TokenDatabase::RawEntry) constexpr char kBadEntryCount[] =
51     "TOKENS\0\0\xff\x00\x00\x00\0\0\0\0";
52 
53 // Use signed data and a size with the top bit set to test that the entry count
54 // is read correctly, without per-byte sign extension.
55 alignas(TokenDatabase::RawEntry) constexpr signed char kSignedWithTopBit[] =
56     "TOKENS\0\0\x80\x00\x00\x00\0\0\0\0"
57     // Entries
58     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
59     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
60     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
61     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"  // 32
62     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
63     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
64     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
65     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"  // 64
66     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
67     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
68     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
69     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"  // 96
70     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
71     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
72     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
73     "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"  // 128
74     // Strings (empty)
75     "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"  //  32
76     "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"  //  64
77     "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"  //  96
78     "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";   // 128
79 
80 constexpr TokenDatabase kBasicDatabase = TokenDatabase::Create<kBasicData>();
81 static_assert(kBasicDatabase.size() == 3u);
82 
TEST(TokenDatabase,EntryCount)83 TEST(TokenDatabase, EntryCount) {
84   static_assert(TokenDatabase::Create<kBasicData>().size() == 3u);
85   static_assert(TokenDatabase::Create(kEmptyData).size() == 0u);
86   EXPECT_EQ(TokenDatabase::Create<kSignedWithTopBit>().size(), 0x80u);
87 }
88 
TEST(TokenDatabase,ValidCheck)89 TEST(TokenDatabase, ValidCheck) {
90   char basic_data[sizeof(kBasicData)];
91   std::memcpy(basic_data, kBasicData, sizeof(basic_data));
92   EXPECT_TRUE(TokenDatabase::IsValid(basic_data));
93 
94   static_assert(TokenDatabase::IsValid(kBasicData));
95   static_assert(TokenDatabase::IsValid(kEmptyData));
96   static_assert(TokenDatabase::IsValid(kSignedWithTopBit));
97 
98   static_assert(!TokenDatabase::IsValid(kBadMagic));
99   static_assert(!TokenDatabase::IsValid(kBadVersion));
100   static_assert(!TokenDatabase::IsValid(kBadEntryCount));
101 
102   static_assert(!TokenDatabase::IsValid("TOKENS\0\0\0\0"));  // too short
103   static_assert(!TokenDatabase::IsValid("TOKENS\0\1\0\0\0\0\0\0\0\0"));
104   static_assert(!TokenDatabase::IsValid("TOKENSv0\0\0\0\0\0\0\0\0"));
105   static_assert(!TokenDatabase::IsValid("tokens\0\0\0\0\0\0\0\0\0\0"));
106 
107   // No string table; this is one byte too short.
108   static_assert(
109       !TokenDatabase::IsValid("TOKENS\0\0\x01\x00\x00\x00\0\0\0\0WXYZdate"sv));
110 
111   // Add one byte for the string table.
112   static_assert(
113       TokenDatabase::IsValid("TOKENS\0\0\x01\x00\x00\x00\0\0\0\0WXYZdate\0"sv));
114 
115   static_assert(
116       !TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
117                               "WXYZdate"
118                               "WXYZdate"
119                               "\0"sv));
120   static_assert(
121       TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
122                              "WXYZdate"
123                              "WXYZdate"
124                              "hi\0\0"sv));
125   static_assert(
126       TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
127                              "WXYZdate"
128                              "WXYZdate"
129                              "hi\0hello\0"sv));
130 }
131 
TEST(TokenDatabase,Iterator)132 TEST(TokenDatabase, Iterator) {
133   auto it = kBasicDatabase.begin();
134   EXPECT_EQ(it->token, 1u);
135   EXPECT_STREQ(it.entry().string, "hi!");
136 
137   ++it;
138   EXPECT_EQ(it->token, 2u);
139   EXPECT_STREQ(it.entry().string, "goodbye");
140   EXPECT_EQ(it - kBasicDatabase.begin(), 1);
141 
142   ++it;
143   EXPECT_EQ(it->token, 0xFFu);
144   EXPECT_STREQ(it.entry().string, ":)");
145   EXPECT_EQ(it - kBasicDatabase.begin(), 2);
146 
147   ++it;
148   EXPECT_EQ(it, kBasicDatabase.end());
149   EXPECT_EQ(static_cast<size_t>(it - kBasicDatabase.begin()),
150             kBasicDatabase.size());
151 }
152 
TEST(TokenDatabase,Iterator_PreIncrement)153 TEST(TokenDatabase, Iterator_PreIncrement) {
154   auto it = kBasicDatabase.begin();
155   EXPECT_EQ((++it)->token, 2u);
156   EXPECT_STREQ(it.entry().string, "goodbye");
157 }
158 
TEST(TokenDatabase,Iterator_PostIncrement)159 TEST(TokenDatabase, Iterator_PostIncrement) {
160   auto it = kBasicDatabase.begin();
161   EXPECT_EQ((it++)->token, 1u);
162 
163   EXPECT_EQ(it->token, 2u);
164   EXPECT_STREQ(it.entry().string, "goodbye");
165 }
166 
TEST(TokenDatabase,SingleEntryLookup_FirstEntry)167 TEST(TokenDatabase, SingleEntryLookup_FirstEntry) {
168   auto match = kBasicDatabase.Find(1);
169   ASSERT_EQ(match.size(), 1u);
170   EXPECT_FALSE(match.empty());
171   EXPECT_STREQ(match[0].string, "hi!");
172 
173   for (const auto& entry : match) {
174     EXPECT_EQ(entry.token, 1u);
175     EXPECT_STREQ(entry.string, "hi!");
176   }
177 }
178 
TEST(TokenDatabase,SingleEntryLookup_MiddleEntry)179 TEST(TokenDatabase, SingleEntryLookup_MiddleEntry) {
180   auto match = kBasicDatabase.Find(2);
181   ASSERT_EQ(match.size(), 1u);
182   EXPECT_FALSE(match.empty());
183   EXPECT_STREQ(match[0].string, "goodbye");
184 }
185 
TEST(TokenDatabase,SingleEntryLookup_LastEntry)186 TEST(TokenDatabase, SingleEntryLookup_LastEntry) {
187   auto match = kBasicDatabase.Find(0xff);
188   ASSERT_EQ(match.size(), 1u);
189   EXPECT_STREQ(match[0].string, ":)");
190   EXPECT_FALSE(match.empty());
191 }
192 
TEST(TokenDatabase,SingleEntryLookup_NonPresent)193 TEST(TokenDatabase, SingleEntryLookup_NonPresent) {
194   EXPECT_TRUE(kBasicDatabase.Find(0).empty());
195   EXPECT_TRUE(kBasicDatabase.Find(3).empty());
196   EXPECT_TRUE(kBasicDatabase.Find(10239).empty());
197   EXPECT_TRUE(kBasicDatabase.Find(0xFFFFFFFFu).empty());
198 }
199 
TEST(TokenDatabase,SingleEntryLookup_NoMatches)200 TEST(TokenDatabase, SingleEntryLookup_NoMatches) {
201   // Can also create the database at runtime.
202   TokenDatabase tokens = TokenDatabase::Create(kBasicData);
203   const auto match = tokens.Find(42);
204   ASSERT_EQ(match.size(), 0u);
205   EXPECT_TRUE(match.empty());
206 
207   for (const auto& entry : match) {
208     FAIL();  // There were no matches, so this code should never execute.
209     static_cast<void>(entry);
210   }
211 }
212 
213 alignas(TokenDatabase::RawEntry) constexpr char kCollisionsData[] =
214     "TOKENS\0\0\x05\0\0\0\0\0\0\0"
215     "\x01\0\0\0date"
216     "\x01\0\0\0date"
217     "\x01\0\0\0date"
218     "\x02\0\0\0date"
219     "\xFF\0\0\0date"
220     "hi!\0goodbye\0:)\0\0";
221 
222 constexpr TokenDatabase kCollisions = TokenDatabase::Create<kCollisionsData>();
223 static_assert(kCollisions.size() == 5u);
224 
TEST(TokenDatabase,MultipleEntriesWithSameToken)225 TEST(TokenDatabase, MultipleEntriesWithSameToken) {
226   TokenDatabase::Entries match = kCollisions.Find(1);
227 
228   EXPECT_EQ(match.begin()->token, 1u);
229   EXPECT_EQ(match.end()->token, 2u);
230   ASSERT_EQ(match.size(), 3u);
231 
232   EXPECT_STREQ(match[0].string, "hi!");
233   EXPECT_STREQ(match[1].string, "goodbye");
234   EXPECT_STREQ(match[2].string, ":)");
235 
236   for (const auto& entry : match) {
237     EXPECT_EQ(entry.token, 1u);
238   }
239 }
240 
TEST(TokenDatabase,Empty)241 TEST(TokenDatabase, Empty) {
242   constexpr TokenDatabase empty_db = TokenDatabase::Create<kEmptyData>();
243   static_assert(empty_db.size() == 0u);
244   static_assert(empty_db.ok());
245 
246   EXPECT_TRUE(empty_db.Find(0).empty());
247   EXPECT_TRUE(empty_db.Find(123).empty());
248 
249   for (const auto& entry : empty_db) {
250     FAIL();  // The database is empty; this should never execute.
251     static_cast<void>(entry);
252   }
253 }
254 
TEST(TokenDatabase,NullDatabase)255 TEST(TokenDatabase, NullDatabase) {
256   constexpr TokenDatabase empty_db;
257 
258   static_assert(empty_db.size() == 0u);
259   static_assert(!empty_db.ok());
260   EXPECT_TRUE(empty_db.Find(0).empty());
261 }
262 
TEST(TokenDatabase,InvalidData)263 TEST(TokenDatabase, InvalidData) {
264   constexpr TokenDatabase bad_db = TokenDatabase::Create("TOKENS\0\0");
265 
266   static_assert(!bad_db.ok());
267   EXPECT_TRUE(bad_db.Find(0).empty());
268 }
269 
TEST(TokenDatabase,FromString)270 TEST(TokenDatabase, FromString) {
271   TokenDatabase bad_db = TokenDatabase::Create(std::string("wow!"));
272 
273   EXPECT_FALSE(bad_db.ok());
274 }
275 
276 }  // namespace
277 }  // namespace pw::tokenizer
278