1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14
15 // This program generates Python test data for decoder_test.py.
16 //
17 // To generate the test data, build the target
18 // pw_tokenizer_generate_decoding_test_data. Execute the binary and move the
19 // generated files to this directory.
20
21 #include <array>
22 #include <cctype>
23 #include <cinttypes>
24 #include <cstdarg>
25 #include <cstdint>
26 #include <cstdio>
27 #include <random>
28 #include <span>
29
30 #include "pw_tokenizer/internal/decode.h"
31 #include "pw_tokenizer/tokenize.h"
32 #include "pw_varint/varint.h"
33
34 namespace {
35
36 // Defines how to format test cases for the target language.
37 struct SourceFileFormat {
38 const char* extension;
39 const char* comment;
40 const char* header;
41 const char* footer;
42 const char* test_case_prefix;
43 const char* binary_string_prefix;
44 const char* binary_string_suffix;
45 };
46
47 // clang-format off
48 constexpr const char* kCopyrightLines[] = {
49 "Copyright 2020 The Pigweed Authors",
50 "",
51 "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not",
52 "use this file except in compliance with the License. You may obtain a copy of",
53 "the License at",
54 "",
55 " https://www.apache.org/licenses/LICENSE-2.0",
56 "",
57 "Unless required by applicable law or agreed to in writing, software",
58 "distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT",
59 "WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the",
60 "License for the specific language governing permissions and limitations under",
61 "the License.",
62 };
63 // clang-format on
64
65 // The header includes a %s for the name and a %s for the test case type.
66 constexpr const char kCcHeader[] = R"(#pragma once
67
68 #include <string_view>
69 #include <tuple>
70
71 namespace pw::test::%s {
72
73 using namespace std::literals::string_view_literals;
74
75 // clang-format off
76 using TestCase = %s;
77
78 inline constexpr TestCase kTestData[] = {
79 )";
80
81 constexpr const char kCcFooter[] = R"(
82 };
83
84 } // namespace pw::test::%s
85 )";
86
87 constexpr const char kPythonHeader[] = R"("""Generated test data."""
88
89 # pylint: disable=line-too-long
90 # C++ test case type for %s:
91 # %s
92
93
94 def TestCase(*args): # pylint: disable=invalid-name
95 return tuple(args)
96
97
98 # yapf: disable
99 TEST_DATA = (
100 )";
101
102 constexpr SourceFileFormat kCcFormat{
103 ".h", "//", kCcHeader, kCcFooter, "TestCase", "\"", "\"sv"};
104
105 constexpr SourceFileFormat kPythonFormat{
106 ".py", "#", kPythonHeader, "\n)\n", "", "b'", "'"};
107
108 class TestDataFile {
109 public:
TestDataFile(const char * name,const SourceFileFormat & format,const char * test_case_format)110 TestDataFile(const char* name,
111 const SourceFileFormat& format,
112 const char* test_case_format)
113 : format_(format),
114 name_(name),
115 test_case_format_(test_case_format),
116 path_(std::string(name) + "_test_data" + format_.extension),
117 file_(std::fopen(path_.c_str(), "w")) {}
118
~TestDataFile()119 ~TestDataFile() { std::fclose(file_); }
120
fmt() const121 const SourceFileFormat& fmt() const { return format_; }
path() const122 const std::string& path() const { return path_; }
123
124 // Writes a file with test cases uses the provided function.
WriteTestCases(void (* function)(TestDataFile *))125 void WriteTestCases(void (*function)(TestDataFile*)) {
126 static constexpr const char* kFileBase =
127 &__FILE__[std::string_view(__FILE__).find_last_of('/') + 1];
128
129 for (const char* line : kCopyrightLines) {
130 printf("%s", fmt().comment);
131 if (line[0] == '\0') {
132 printf("\n");
133 } else {
134 printf(" %s\n", line);
135 }
136 }
137
138 printf("\n%s AUTOGENERATED - DO NOT EDIT\n", fmt().comment);
139 printf("%s This file contains test data generated by %s.\n",
140 fmt().comment,
141 kFileBase);
142
143 printf(fmt().header, name_, test_case_format_);
144 function(this);
145 printf(fmt().footer, name_);
146 }
147
148 // Starts a section of test cases in the file.
Section(const char * comment)149 void Section(const char* comment) {
150 printf("\n%s %s\n", fmt().comment, comment);
151 }
152
printf(const char * format,...)153 int printf(const char* format, ...) PW_PRINTF_FORMAT(2, 3) {
154 va_list args;
155 va_start(args, format);
156 const int result = std::vfprintf(file_, format, args);
157 va_end(args);
158 return result;
159 }
160
161 private:
162 SourceFileFormat format_;
163 const char* name_;
164 const char* test_case_format_;
165 std::string path_;
166 FILE* file_;
167 };
168
169 // Writes a decoding test case to the file.
TestCase(TestDataFile * file,std::span<const uint8_t> buffer,const char * format,const char * formatted)170 void TestCase(TestDataFile* file,
171 std::span<const uint8_t> buffer,
172 const char* format,
173 const char* formatted) {
174 file->printf(R"(TestCase("%s", "%s", %s)",
175 format,
176 formatted,
177 file->fmt().binary_string_prefix);
178
179 for (uint8_t byte : buffer) {
180 file->printf("\\x%02x", byte);
181 }
182
183 file->printf("%s),\n", file->fmt().binary_string_suffix);
184 }
185
186 template <size_t kSize>
TestCase(TestDataFile * file,const char * format,const char (& buffer)[kSize],const char * formatted)187 void TestCase(TestDataFile* file,
188 const char* format,
189 const char (&buffer)[kSize],
190 const char* formatted) {
191 TestCase(file,
192 std::span(reinterpret_cast<const uint8_t*>(buffer), kSize - 1),
193 format,
194 formatted);
195 }
196
197 // __VA_ARGS__ is expanded twice, so ONLY variables / constants should be used.
198 #define MAKE_TEST_CASE(format, ...) \
199 do { \
200 std::array<uint8_t, 128> buffer; \
201 size_t size = buffer.size(); \
202 PW_TOKENIZE_TO_BUFFER(buffer.data(), &size, format, ##__VA_ARGS__); \
203 \
204 std::array<char, 128> formatted = {}; \
205 std::snprintf(formatted.data(), formatted.size(), format, ##__VA_ARGS__); \
206 TestCase(file, \
207 std::span(buffer).first(size).subspan(4), /* skip the token */ \
208 format, \
209 formatted.data()); \
210 } while (0)
211
212 // Formats the contents like an error.
213 #define ERROR_STR PW_TOKENIZER_ARG_DECODING_ERROR
214
215 // Generates data to test tokenized string decoding.
GenerateEncodedStrings(TestDataFile * file)216 void GenerateEncodedStrings(TestDataFile* file) {
217 std::mt19937 random(6006411);
218 std::uniform_int_distribution<int64_t> big;
219 std::uniform_int_distribution<int32_t> medium;
220 std::uniform_int_distribution<char> small(' ', '~');
221 std::uniform_real_distribution<float> real;
222
223 file->Section("Simple strings");
224 TestCase(file, "%s", "\3SFO", "SFO");
225 TestCase(file, "%s", "\4KSJC", "KSJC");
226 TestCase(file, "%s", "\0", "");
227
228 TestCase(file, "%5s%s", "\2no\3fun", " nofun");
229 TestCase(file, "%5s%s", "\6abcdef\0", "abcdef");
230 TestCase(file, "%5s%s", "\0\6abcdef", " abcdef");
231
232 TestCase(file,
233 "%s %-6s%s%s%s",
234 "\5Intel\580586\7toaster\1 \4oven",
235 "Intel 80586 toaster oven");
236 TestCase(file,
237 "%s %-6s%s%s%s",
238 "\5Apple\x09"
239 "automatic\7 pencil\1 \x09sharpener",
240 "Apple automatic pencil sharpener");
241
242 file->Section("Zero-length strings");
243 TestCase(file, "%s-%s", "\x02so\x00", "so-");
244 TestCase(file, "%s-%s", "\x00\04cool", "-cool");
245 TestCase(file, "%s%s%3s%s", "\0\0\0\0", " ");
246 TestCase(file, "(%5s)(%2s)(%7s)", "\x80\0\x80", "([...])( )( [...])");
247
248 file->Section("Invalid strings");
249 TestCase(file, "%s", "\x03hi", ERROR_STR("%s ERROR (hi)"));
250 TestCase(file, "%30s", "\x03hi", ERROR_STR("%30s ERROR (hi)"));
251 TestCase(file, "%30s", "\x83hi", ERROR_STR("%30s ERROR (hi)"));
252 TestCase(file, "%s", "\x85yo!", ERROR_STR("%s ERROR (yo!)"));
253 TestCase(file, "%s", "\x01", ERROR_STR("%s ERROR"));
254 TestCase(file, "%30s", "\x81", ERROR_STR("%30s ERROR"));
255
256 file->Section("Continue after truncated string");
257 TestCase(file, "%s %d %s", "\x82go\4\5lunch", "go[...] 2 lunch");
258 TestCase(file, "%6s%s%s", "\x80\x85hello\x05there", " [...]hello[...]there");
259
260 file->Section("Floating point");
261 TestCase(file, "%1.1f", "\0\0\0\0", "0.0");
262 TestCase(file, "%0.5f", "\xdb\x0f\x49\x40", "3.14159");
263
264 file->Section("Character"); // ZigZag doubles the value of positive integers.
265 TestCase(file, "%c", "\x40", " "); // 0x20
266 TestCase(file, "%c", "\x48", "$"); // 0x24
267 TestCase(file, "%c", "\x48", "$"); // 0x24
268 TestCase(file, "100%c!", "\x4A", "100%!"); // 0x25
269
270 file->Section("Atypical argument types");
271 MAKE_TEST_CASE("%ju", static_cast<uintmax_t>(99));
272 MAKE_TEST_CASE("%jd", static_cast<intmax_t>(99));
273 MAKE_TEST_CASE("%zu", sizeof(uint64_t));
274 MAKE_TEST_CASE("%zd", static_cast<ssize_t>(123));
275 MAKE_TEST_CASE("%td", static_cast<ptrdiff_t>(99));
276
277 file->Section("Percent character");
278 TestCase(file, "%%", "", "%");
279 TestCase(file, "%%%%%%%%", "abc", "%%%%");
280 TestCase(file, "whoa%%%%wow%%%%!%%", "", "whoa%%wow%%!%");
281 TestCase(file, "This is %d%% effective", "\x02", "This is 1% effective");
282 TestCase(
283 file, "%% is 100%sa%%sign%%%s", "\x01%\x03OK?", "% is 100%a%sign%OK?");
284
285 file->Section("Percent character prints after errors");
286 TestCase(file, "%s%%", "\x83-10\0", "-10[...]%");
287 TestCase(
288 file, "%d%% is a good %%", "", ERROR_STR("%d MISSING") "% is a good %");
289
290 file->Section("Various format strings");
291 MAKE_TEST_CASE("!");
292 MAKE_TEST_CASE("%s", "%s");
293 MAKE_TEST_CASE("%s", "hello");
294 MAKE_TEST_CASE("%s%s", "Hello", "old");
295 MAKE_TEST_CASE("%s to the%c%s", "hello", ' ', "whirled");
296 MAKE_TEST_CASE("hello %s %d %d %d", "rolled", 1, 2, 3);
297
298 TestCase(file, "", "", "");
299 TestCase(file, "This has no specifiers", "", "This has no specifiers");
300 TestCase(file, "%s_or_%3s", "\x05hello\x02hi", "hello_or_ hi");
301 TestCase(file, "%s_or_%3d", "\x05hello\x7f", "hello_or_-64");
302 TestCase(file,
303 "%s or hi%c pi=%1.2e",
304 "\x05hello\x42\xdb\x0f\x49\x40",
305 "hello or hi! pi=3.14e+00");
306 TestCase(file,
307 "Why, %s there. My favorite number is %.2f%c",
308 "\x05hello\xdb\x0f\x49\x40\x42",
309 "Why, hello there. My favorite number is 3.14!");
310
311 file->Section("Various errors");
312 TestCase(file, "%d", "", ERROR_STR("%d MISSING"));
313
314 TestCase(file,
315 "ABC%d123%dabc%dABC",
316 "",
317 "ABC" ERROR_STR("%d MISSING") "123" ERROR_STR(
318 "%d SKIPPED") "abc" ERROR_STR("%d SKIPPED") "ABC");
319
320 TestCase(file,
321 "%sXY%+ldxy%a",
322 "\x83Yo!\x80",
323 "Yo![...]XY" ERROR_STR("%+ld ERROR") "xy" ERROR_STR("%a SKIPPED"));
324
325 TestCase(file, "%d", "", ERROR_STR("%d MISSING"));
326
327 TestCase(file,
328 "%sXY%+ldxy%a",
329 "\x83Yo!\x80",
330 "Yo![...]XY" ERROR_STR("%+ld ERROR") "xy" ERROR_STR("%a SKIPPED"));
331
332 TestCase(file,
333 "%s%lld%9u",
334 "\x81$\x80\x80",
335 "$[...]" ERROR_STR("%lld ERROR") ERROR_STR("%9u SKIPPED"));
336
337 file->Section("Alternate form (#)");
338 MAKE_TEST_CASE("Hex: %#x", 0xbeef);
339 MAKE_TEST_CASE("Hex: %#08X", 0xfeed);
340
341 file->Section("Random integers");
342 for (int i = 0; i < 100; ++i) {
343 float f = real(random);
344 MAKE_TEST_CASE(
345 "This is a number: %+08.3e%1.0E%02d%g%G%f%-3f", f, f, i, f, f, f, f);
346 }
347
348 for (int i = 0; i < 100; ++i) {
349 unsigned long long n1 = big(random);
350 int n2 = medium(random);
351 char ch = small(random);
352 if (ch == '"' || ch == '\\') {
353 ch = '\t';
354 }
355
356 MAKE_TEST_CASE("%s: %llu %d %c", std::to_string(i).c_str(), n1, n2, ch);
357 }
358
359 for (int i = 0; i < 100; ++i) {
360 const long long n1 = big(random);
361 const unsigned n2 = medium(random);
362 const char ch = small(random);
363
364 MAKE_TEST_CASE(
365 "%s: %lld 0x%16u%08X %d", std::to_string(i).c_str(), n1, n2, n2, ch);
366 }
367 }
368
369 template <typename T>
OutputVarintTest(TestDataFile * file,T i)370 void OutputVarintTest(TestDataFile* file, T i) {
371 if constexpr (sizeof(T) <= sizeof(int)) {
372 file->printf(R"(TestCase("%%d", "%d", "%%u", "%u", %s)",
373 static_cast<int>(i),
374 static_cast<unsigned>(i),
375 file->fmt().binary_string_prefix);
376 } else {
377 file->printf(R"(TestCase("%%lld", "%lld", "%%llu", "%llu", %s)",
378 static_cast<long long>(i),
379 static_cast<unsigned long long>(i),
380 file->fmt().binary_string_prefix);
381 }
382
383 std::array<uint8_t, 10> buffer;
384 // All integers are encoded as signed for tokenization.
385 size_t size =
386 pw::varint::Encode(i, std::as_writable_bytes(std::span(buffer)));
387
388 for (size_t i = 0; i < size; ++i) {
389 file->printf("\\x%02x", buffer[i]);
390 }
391
392 file->printf("%s),\n", file->fmt().binary_string_suffix);
393 }
394
395 // Generates data to test variable-length integer decoding.
GenerateVarints(TestDataFile * file)396 void GenerateVarints(TestDataFile* file) {
397 std::mt19937 random(6006411);
398 std::uniform_int_distribution<int64_t> signed64;
399 std::uniform_int_distribution<int32_t> signed32;
400 std::uniform_int_distribution<int16_t> signed16;
401
402 file->Section("Important numbers");
403 OutputVarintTest(file, 0);
404 OutputVarintTest(file, std::numeric_limits<int16_t>::min());
405 OutputVarintTest(file, std::numeric_limits<int16_t>::min() + 1);
406 OutputVarintTest(file, std::numeric_limits<int16_t>::max() - 1);
407 OutputVarintTest(file, std::numeric_limits<int16_t>::max());
408 OutputVarintTest(file, std::numeric_limits<int32_t>::min());
409 OutputVarintTest(file, std::numeric_limits<int32_t>::min() + 1);
410 OutputVarintTest(file, std::numeric_limits<int32_t>::max() - 1);
411 OutputVarintTest(file, std::numeric_limits<int32_t>::max());
412 OutputVarintTest(file, std::numeric_limits<int64_t>::min());
413 OutputVarintTest(file, std::numeric_limits<int64_t>::min() + 1);
414 OutputVarintTest(file, std::numeric_limits<int64_t>::max() - 1);
415 OutputVarintTest(file, std::numeric_limits<int64_t>::max());
416
417 file->Section("Random 64-bit ints");
418 for (int i = 0; i < 500; ++i) {
419 OutputVarintTest(file, signed64(random));
420 }
421 file->Section("Random 32-bit ints");
422 for (int i = 0; i < 100; ++i) {
423 OutputVarintTest(file, signed32(random));
424 }
425 file->Section("Random 16-bit ints");
426 for (int i = 0; i < 100; ++i) {
427 OutputVarintTest(file, signed16(random));
428 }
429
430 file->Section("All 8-bit numbers");
431 {
432 int i = std::numeric_limits<int8_t>::min();
433 while (true) {
434 OutputVarintTest(file, i);
435 if (i == std::numeric_limits<int8_t>::max()) {
436 break;
437 }
438 // Don't use an inline increment to avoid undefined behavior (overflow).
439 i += 1;
440 }
441 }
442 }
443
444 template <typename Function>
WriteFile(const char * name,const char * test_case_format,Function function)445 void WriteFile(const char* name,
446 const char* test_case_format,
447 Function function) {
448 for (const SourceFileFormat& file_format : {kCcFormat, kPythonFormat}) {
449 TestDataFile file(name, file_format, test_case_format);
450 file.WriteTestCases(function);
451
452 std::printf("Wrote %s\n", file.path().c_str());
453 }
454 }
455
456 } // namespace
457
main(int,char **)458 int main(int, char**) {
459 WriteFile("tokenized_string_decoding",
460 "std::tuple<const char*, std::string_view, std::string_view>",
461 GenerateEncodedStrings);
462 WriteFile("varint_decoding",
463 "std::tuple<const char*, const char*, const char*, const char*, "
464 "std::string_view>",
465 GenerateVarints);
466 return 0;
467 }
468