1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14
15 #include "pw_tokenizer/detokenize.h"
16
17 #include <string_view>
18
19 #include "gtest/gtest.h"
20
21 namespace pw::tokenizer {
22 namespace {
23
24 using namespace std::literals::string_view_literals;
25
26 // Use a shorter name for the error string macro.
27 #define ERR PW_TOKENIZER_ARG_DECODING_ERROR
28
29 // Use alignas to ensure that the data is properly aligned to be read from a
30 // token database entry struct. This avoids unaligned memory reads.
31 alignas(TokenDatabase::RawEntry) constexpr char kBasicData[] =
32 "TOKENS\0\0"
33 "\x04\x00\x00\x00"
34 "\0\0\0\0"
35 "\x01\x00\x00\x00----"
36 "\x05\x00\x00\x00----"
37 "\xFF\x00\x00\x00----"
38 "\xFF\xEE\xEE\xDD----"
39 "One\0"
40 "TWO\0"
41 "333\0"
42 "FOUR";
43
44 class Detokenize : public ::testing::Test {
45 protected:
Detokenize()46 Detokenize() : detok_(TokenDatabase::Create<kBasicData>()) {}
47 Detokenizer detok_;
48 };
49
TEST_F(Detokenize,NoFormatting)50 TEST_F(Detokenize, NoFormatting) {
51 EXPECT_EQ(detok_.Detokenize("\1\0\0\0"sv).BestString(), "One");
52 EXPECT_EQ(detok_.Detokenize("\5\0\0\0"sv).BestString(), "TWO");
53 EXPECT_EQ(detok_.Detokenize("\xff\x00\x00\x00"sv).BestString(), "333");
54 EXPECT_EQ(detok_.Detokenize("\xff\xee\xee\xdd"sv).BestString(), "FOUR");
55 }
56
TEST_F(Detokenize,BestString_MissingToken_IsEmpty)57 TEST_F(Detokenize, BestString_MissingToken_IsEmpty) {
58 EXPECT_FALSE(detok_.Detokenize("").ok());
59 EXPECT_TRUE(detok_.Detokenize("", 0u).BestString().empty());
60 EXPECT_TRUE(detok_.Detokenize("\1", 1u).BestString().empty());
61 EXPECT_TRUE(detok_.Detokenize("\1\0"sv).BestString().empty());
62 EXPECT_TRUE(detok_.Detokenize("\1\0\0"sv).BestString().empty());
63 EXPECT_TRUE(detok_.Detokenize("\0\0\0"sv).BestString().empty());
64 }
65
TEST_F(Detokenize,BestString_UnknownToken_IsEmpty)66 TEST_F(Detokenize, BestString_UnknownToken_IsEmpty) {
67 EXPECT_FALSE(detok_.Detokenize("\0\0\0\0"sv).ok());
68 EXPECT_TRUE(detok_.Detokenize("\0\0\0\0"sv).BestString().empty());
69 EXPECT_TRUE(detok_.Detokenize("\2\0\0\0"sv).BestString().empty());
70 EXPECT_TRUE(detok_.Detokenize("\x10\x32\x54\x76\x99"sv).BestString().empty());
71 EXPECT_TRUE(detok_.Detokenize("\x98\xba\xdc\xfe"sv).BestString().empty());
72 }
73
TEST_F(Detokenize,BestStringWithErrors_MissingToken_ErrorMessage)74 TEST_F(Detokenize, BestStringWithErrors_MissingToken_ErrorMessage) {
75 EXPECT_FALSE(detok_.Detokenize("").ok());
76 EXPECT_EQ(detok_.Detokenize("", 0u).BestStringWithErrors(),
77 ERR("missing token"));
78 EXPECT_EQ(detok_.Detokenize("\1", 1u).BestStringWithErrors(),
79 ERR("missing token"));
80 EXPECT_EQ(detok_.Detokenize("\1\0"sv).BestStringWithErrors(),
81 ERR("missing token"));
82 EXPECT_EQ(detok_.Detokenize("\1\0\0"sv).BestStringWithErrors(),
83 ERR("missing token"));
84 EXPECT_EQ(detok_.Detokenize("\0\0\0"sv).BestStringWithErrors(),
85 ERR("missing token"));
86 }
87
TEST_F(Detokenize,BestStringWithErrors_UnknownToken_ErrorMessage)88 TEST_F(Detokenize, BestStringWithErrors_UnknownToken_ErrorMessage) {
89 EXPECT_FALSE(detok_.Detokenize("\0\0\0\0"sv).ok());
90 EXPECT_EQ(detok_.Detokenize("\0\0\0\0"sv).BestStringWithErrors(),
91 ERR("unknown token 00000000"));
92 EXPECT_EQ(detok_.Detokenize("\2\0\0\0"sv).BestStringWithErrors(),
93 ERR("unknown token 00000002"));
94 EXPECT_EQ(detok_.Detokenize("\x10\x32\x54\x76\x99"sv).BestStringWithErrors(),
95 ERR("unknown token 76543210"));
96 EXPECT_EQ(detok_.Detokenize("\x98\xba\xdc\xfe"sv).BestStringWithErrors(),
97 ERR("unknown token fedcba98"));
98 }
99
100 alignas(TokenDatabase::RawEntry) constexpr char kDataWithArguments[] =
101 "TOKENS\0\0"
102 "\x09\x00\x00\x00"
103 "\0\0\0\0"
104 "\x00\x00\x00\x00----"
105 "\x0A\x0B\x0C\x0D----"
106 "\x0E\x0F\x00\x01----"
107 "\xAA\xAA\xAA\xAA----"
108 "\xBB\xBB\xBB\xBB----"
109 "\xCC\xCC\xCC\xCC----"
110 "\xDD\xDD\xDD\xDD----"
111 "\xEE\xEE\xEE\xEE----"
112 "\xFF\xFF\xFF\xFF----"
113 "\0"
114 "Use the %s, %s.\0"
115 "Now there are %d of %s!\0"
116 "%c!\0" // AA
117 "%hhu!\0" // BB
118 "%hu!\0" // CC
119 "%u!\0" // DD
120 "%lu!\0" // EE
121 "%llu!"; // FF
122
123 constexpr TokenDatabase kWithArgs = TokenDatabase::Create<kDataWithArguments>();
124
125 using Case = std::pair<std::string_view, std::string_view>;
126
127 template <typename... Args>
TestCases(Args...args)128 auto TestCases(Args... args) {
129 return std::array<Case, sizeof...(Args)>{args...};
130 }
131
132 class DetokenizeWithArgs : public ::testing::Test {
133 protected:
DetokenizeWithArgs()134 DetokenizeWithArgs() : detok_(kWithArgs) {}
135
136 Detokenizer detok_;
137 };
138
TEST_F(DetokenizeWithArgs,NoMatches)139 TEST_F(DetokenizeWithArgs, NoMatches) {
140 EXPECT_TRUE(detok_.Detokenize("\x23\xab\xc9\x87"sv).matches().empty());
141 }
142
TEST_F(DetokenizeWithArgs,SingleMatch)143 TEST_F(DetokenizeWithArgs, SingleMatch) {
144 EXPECT_EQ(detok_.Detokenize("\x00\x00\x00\x00"sv).matches().size(), 1u);
145 }
146
TEST_F(DetokenizeWithArgs,Empty)147 TEST_F(DetokenizeWithArgs, Empty) {
148 EXPECT_EQ(detok_.Detokenize("\x00\x00\x00\x00"sv).BestString(), "");
149 }
150
TEST_F(DetokenizeWithArgs,Successful)151 TEST_F(DetokenizeWithArgs, Successful) {
152 // Run through test cases, but don't include cases that use %hhu or %llu since
153 // these are not currently supported in arm-none-eabi-gcc.
154 for (auto [data, expected] : TestCases(
155 Case{"\x0A\x0B\x0C\x0D\5force\4Luke"sv, "Use the force, Luke."},
156 Case{"\x0E\x0F\x00\x01\4\4them"sv, "Now there are 2 of them!"},
157 Case{"\xAA\xAA\xAA\xAA\xfc\x01"sv, "~!"},
158 Case{"\xCC\xCC\xCC\xCC\xfe\xff\x07"sv, "65535!"},
159 Case{"\xDD\xDD\xDD\xDD\xfe\xff\x07"sv, "65535!"},
160 Case{"\xDD\xDD\xDD\xDD\xfe\xff\xff\xff\x1f"sv, "4294967295!"},
161 Case{"\xEE\xEE\xEE\xEE\xfe\xff\x07"sv, "65535!"},
162 Case{"\xEE\xEE\xEE\xEE\xfe\xff\xff\xff\x1f"sv, "4294967295!"})) {
163 EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
164 }
165 }
166
TEST_F(DetokenizeWithArgs,ExtraDataError)167 TEST_F(DetokenizeWithArgs, ExtraDataError) {
168 auto error = detok_.Detokenize("\x00\x00\x00\x00MORE data"sv);
169 EXPECT_FALSE(error.ok());
170 EXPECT_EQ("", error.BestString());
171 }
172
TEST_F(DetokenizeWithArgs,MissingArgumentError)173 TEST_F(DetokenizeWithArgs, MissingArgumentError) {
174 auto error = detok_.Detokenize("\x0A\x0B\x0C\x0D\5force"sv);
175 EXPECT_FALSE(error.ok());
176 EXPECT_EQ(error.BestString(), "Use the force, %s.");
177 EXPECT_EQ(error.BestStringWithErrors(),
178 "Use the force, " ERR("%s MISSING") ".");
179 }
180
TEST_F(DetokenizeWithArgs,DecodingError)181 TEST_F(DetokenizeWithArgs, DecodingError) {
182 auto error = detok_.Detokenize("\x0E\x0F\x00\x01\xFF"sv);
183 EXPECT_FALSE(error.ok());
184 EXPECT_EQ(error.BestString(), "Now there are %d of %s!");
185 EXPECT_EQ(error.BestStringWithErrors(),
186 "Now there are " ERR("%d ERROR") " of " ERR("%s SKIPPED") "!");
187 }
188
189 alignas(TokenDatabase::RawEntry) constexpr char kDataWithCollisions[] =
190 "TOKENS\0\0"
191 "\x0F\x00\x00\x00"
192 "\0\0\0\0"
193 "\x00\x00\x00\x00\xff\xff\xff\xff" // 1
194 "\x00\x00\x00\x00\x01\x02\x03\x04" // 2
195 "\x00\x00\x00\x00\xff\xff\xff\xff" // 3
196 "\x00\x00\x00\x00\xff\xff\xff\xff" // 4
197 "\x00\x00\x00\x00\xff\xff\xff\xff" // 5
198 "\x00\x00\x00\x00\xff\xff\xff\xff" // 6
199 "\x00\x00\x00\x00\xff\xff\xff\xff" // 7
200 "\xAA\xAA\xAA\xAA\x00\x00\x00\x00" // 8
201 "\xAA\xAA\xAA\xAA\xff\xff\xff\xff" // 9
202 "\xBB\xBB\xBB\xBB\xff\xff\xff\xff" // A
203 "\xBB\xBB\xBB\xBB\xff\xff\xff\xff" // B
204 "\xCC\xCC\xCC\xCC\xff\xff\xff\xff" // C
205 "\xCC\xCC\xCC\xCC\xff\xff\xff\xff" // D
206 "\xDD\xDD\xDD\xDD\xff\xff\xff\xff" // E
207 "\xDD\xDD\xDD\xDD\xff\xff\xff\xff" // F
208 // String table
209 "This string is present\0" // 1
210 "This string is removed\0" // 2
211 "One arg %d\0" // 3
212 "One arg %s\0" // 4
213 "Two args %s %u\0" // 5
214 "Two args %s %s %% %% %%\0" // 6
215 "Four args %d %d %d %d\0" // 7
216 "This one is removed\0" // 8
217 "This one is present\0" // 9
218 "Two ints %d %d\0" // A
219 "Three ints %d %d %d\0" // B
220 "Three strings %s %s %s\0" // C
221 "Two strings %s %s\0" // D
222 "Three %s %s %s\0" // E
223 "Five %d %d %d %d %s\0"; // F
224
225 constexpr TokenDatabase kWithCollisions =
226 TokenDatabase::Create<kDataWithCollisions>();
227
228 class DetokenizeWithCollisions : public ::testing::Test {
229 protected:
DetokenizeWithCollisions()230 DetokenizeWithCollisions() : detok_(kWithCollisions) {}
231
232 Detokenizer detok_;
233 };
234
TEST_F(DetokenizeWithCollisions,Collision_AlwaysPreferSuccessfulDecode)235 TEST_F(DetokenizeWithCollisions, Collision_AlwaysPreferSuccessfulDecode) {
236 for (auto [data, expected] :
237 TestCases(Case{"\0\0\0\0"sv, "This string is present"},
238 Case{"\0\0\0\0\x01"sv, "One arg -1"},
239 Case{"\0\0\0\0\x80"sv, "One arg [...]"},
240 Case{"\0\0\0\0\4Hey!\x04"sv, "Two args Hey! 2"})) {
241 EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
242 }
243 }
244
TEST_F(DetokenizeWithCollisions,Collision_PreferDecodingAllBytes)245 TEST_F(DetokenizeWithCollisions, Collision_PreferDecodingAllBytes) {
246 for (auto [data, expected] :
247 TestCases(Case{"\0\0\0\0\x80\x80\x80\x80\x00"sv, "Two args [...] 0"},
248 Case{"\0\0\0\0\x08?"sv, "One arg %s"},
249 Case{"\0\0\0\0\x01!\x01\x80"sv, "Two args ! \x80 % % %"})) {
250 EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
251 }
252 }
253
TEST_F(DetokenizeWithCollisions,Collision_PreferFewestDecodingErrors)254 TEST_F(DetokenizeWithCollisions, Collision_PreferFewestDecodingErrors) {
255 for (auto [data, expected] :
256 TestCases(Case{"\xBB\xBB\xBB\xBB\x00"sv, "Two ints 0 %d"},
257 Case{"\xCC\xCC\xCC\xCC\2Yo\5?"sv, "Two strings Yo %s"})) {
258 EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
259 }
260 }
261
TEST_F(DetokenizeWithCollisions,Collision_PreferMostDecodedArgs)262 TEST_F(DetokenizeWithCollisions, Collision_PreferMostDecodedArgs) {
263 auto result = detok_.Detokenize("\xDD\xDD\xDD\xDD\x01\x02\x01\x04\x05"sv);
264 EXPECT_EQ((std::string_view)result.matches()[0].value(), "Five -1 1 -1 2 %s");
265 EXPECT_EQ((std::string_view)result.matches()[1].value(), "Three \2 \4 %s"sv);
266 }
267
TEST_F(DetokenizeWithCollisions,Collision_PreferMostDecodedArgs_NoPercent)268 TEST_F(DetokenizeWithCollisions, Collision_PreferMostDecodedArgs_NoPercent) {
269 // The "Two args %s %s ..." string successfully decodes this, and has more
270 // "arguments", because of %%, but %% doesn't count as as a decoded argument.
271 EXPECT_EQ(detok_.Detokenize("\0\0\0\0\x01\x00\x01\x02"sv).BestString(),
272 "Four args -1 0 -1 1");
273 }
274
TEST_F(DetokenizeWithCollisions,Collision_PreferStillPresentString)275 TEST_F(DetokenizeWithCollisions, Collision_PreferStillPresentString) {
276 for (auto [data, expected] :
277 TestCases(Case{"\x00\x00\x00\x00"sv, "This string is present"},
278 Case{"\xAA\xAA\xAA\xAA"sv, "This one is present"})) {
279 EXPECT_EQ(detok_.Detokenize(data).BestString(), expected);
280 }
281 }
282
TEST_F(DetokenizeWithCollisions,Collision_TracksAllMatches)283 TEST_F(DetokenizeWithCollisions, Collision_TracksAllMatches) {
284 auto result = detok_.Detokenize("\0\0\0\0"sv);
285 EXPECT_EQ(result.matches().size(), 7u);
286 }
287
288 } // namespace
289 } // namespace pw::tokenizer
290