1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 // This program generates Python test data for decoder_test.py.
16 //
17 // To generate the test data, build the target
18 // pw_tokenizer_generate_decoding_test_data. Execute the binary and move the
19 // generated files to this directory.
20 
21 #include <array>
22 #include <cctype>
23 #include <cinttypes>
24 #include <cstdarg>
25 #include <cstdint>
26 #include <cstdio>
27 #include <random>
28 #include <span>
29 
30 #include "pw_tokenizer/internal/decode.h"
31 #include "pw_tokenizer/tokenize.h"
32 #include "pw_varint/varint.h"
33 
34 namespace {
35 
36 // Defines how to format test cases for the target language.
37 struct SourceFileFormat {
38   const char* extension;
39   const char* comment;
40   const char* header;
41   const char* footer;
42   const char* test_case_prefix;
43   const char* binary_string_prefix;
44   const char* binary_string_suffix;
45 };
46 
47 // clang-format off
48 constexpr const char* kCopyrightLines[] = {
49 "Copyright 2020 The Pigweed Authors",
50 "",
51 "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not",
52 "use this file except in compliance with the License. You may obtain a copy of",
53 "the License at",
54 "",
55 "    https://www.apache.org/licenses/LICENSE-2.0",
56 "",
57 "Unless required by applicable law or agreed to in writing, software",
58 "distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT",
59 "WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the",
60 "License for the specific language governing permissions and limitations under",
61 "the License.",
62 };
63 // clang-format on
64 
65 // The header includes a %s for the name and a %s for the test case type.
66 constexpr const char kCcHeader[] = R"(#pragma once
67 
68 #include <string_view>
69 #include <tuple>
70 
71 namespace pw::test::%s {
72 
73 using namespace std::literals::string_view_literals;
74 
75 // clang-format off
76 using TestCase = %s;
77 
78 inline constexpr TestCase kTestData[] = {
79 )";
80 
81 constexpr const char kCcFooter[] = R"(
82 };
83 
84 }  // namespace pw::test::%s
85 )";
86 
87 constexpr const char kPythonHeader[] = R"("""Generated test data."""
88 
89 # pylint: disable=line-too-long
90 # C++ test case type for %s:
91 #     %s
92 
93 
94 def TestCase(*args):  # pylint: disable=invalid-name
95     return tuple(args)
96 
97 
98 # yapf: disable
99 TEST_DATA = (
100 )";
101 
102 constexpr SourceFileFormat kCcFormat{
103     ".h", "//", kCcHeader, kCcFooter, "TestCase", "\"", "\"sv"};
104 
105 constexpr SourceFileFormat kPythonFormat{
106     ".py", "#", kPythonHeader, "\n)\n", "", "b'", "'"};
107 
108 class TestDataFile {
109  public:
TestDataFile(const char * name,const SourceFileFormat & format,const char * test_case_format)110   TestDataFile(const char* name,
111                const SourceFileFormat& format,
112                const char* test_case_format)
113       : format_(format),
114         name_(name),
115         test_case_format_(test_case_format),
116         path_(std::string(name) + "_test_data" + format_.extension),
117         file_(std::fopen(path_.c_str(), "w")) {}
118 
~TestDataFile()119   ~TestDataFile() { std::fclose(file_); }
120 
fmt() const121   const SourceFileFormat& fmt() const { return format_; }
path() const122   const std::string& path() const { return path_; }
123 
124   // Writes a file with test cases uses the provided function.
WriteTestCases(void (* function)(TestDataFile *))125   void WriteTestCases(void (*function)(TestDataFile*)) {
126     static constexpr const char* kFileBase =
127         &__FILE__[std::string_view(__FILE__).find_last_of('/') + 1];
128 
129     for (const char* line : kCopyrightLines) {
130       printf("%s", fmt().comment);
131       if (line[0] == '\0') {
132         printf("\n");
133       } else {
134         printf(" %s\n", line);
135       }
136     }
137 
138     printf("\n%s AUTOGENERATED - DO NOT EDIT\n", fmt().comment);
139     printf("%s This file contains test data generated by %s.\n",
140            fmt().comment,
141            kFileBase);
142 
143     printf(fmt().header, name_, test_case_format_);
144     function(this);
145     printf(fmt().footer, name_);
146   }
147 
148   // Starts a section of test cases in the file.
Section(const char * comment)149   void Section(const char* comment) {
150     printf("\n%s %s\n", fmt().comment, comment);
151   }
152 
printf(const char * format,...)153   int printf(const char* format, ...) PW_PRINTF_FORMAT(2, 3) {
154     va_list args;
155     va_start(args, format);
156     const int result = std::vfprintf(file_, format, args);
157     va_end(args);
158     return result;
159   }
160 
161  private:
162   SourceFileFormat format_;
163   const char* name_;
164   const char* test_case_format_;
165   std::string path_;
166   FILE* file_;
167 };
168 
169 // Writes a decoding test case to the file.
TestCase(TestDataFile * file,std::span<const uint8_t> buffer,const char * format,const char * formatted)170 void TestCase(TestDataFile* file,
171               std::span<const uint8_t> buffer,
172               const char* format,
173               const char* formatted) {
174   file->printf(R"(TestCase("%s", "%s", %s)",
175                format,
176                formatted,
177                file->fmt().binary_string_prefix);
178 
179   for (uint8_t byte : buffer) {
180     file->printf("\\x%02x", byte);
181   }
182 
183   file->printf("%s),\n", file->fmt().binary_string_suffix);
184 }
185 
186 template <size_t kSize>
TestCase(TestDataFile * file,const char * format,const char (& buffer)[kSize],const char * formatted)187 void TestCase(TestDataFile* file,
188               const char* format,
189               const char (&buffer)[kSize],
190               const char* formatted) {
191   TestCase(file,
192            std::span(reinterpret_cast<const uint8_t*>(buffer), kSize - 1),
193            format,
194            formatted);
195 }
196 
197 // __VA_ARGS__ is expanded twice, so ONLY variables / constants should be used.
198 #define MAKE_TEST_CASE(format, ...)                                           \
199   do {                                                                        \
200     std::array<uint8_t, 128> buffer;                                          \
201     size_t size = buffer.size();                                              \
202     PW_TOKENIZE_TO_BUFFER(buffer.data(), &size, format, ##__VA_ARGS__);       \
203                                                                               \
204     std::array<char, 128> formatted = {};                                     \
205     std::snprintf(formatted.data(), formatted.size(), format, ##__VA_ARGS__); \
206     TestCase(file,                                                            \
207              std::span(buffer).first(size).subspan(4), /* skip the token */   \
208              format,                                                          \
209              formatted.data());                                               \
210   } while (0)
211 
212 // Formats the contents like an error.
213 #define ERROR_STR PW_TOKENIZER_ARG_DECODING_ERROR
214 
215 // Generates data to test tokenized string decoding.
GenerateEncodedStrings(TestDataFile * file)216 void GenerateEncodedStrings(TestDataFile* file) {
217   std::mt19937 random(6006411);
218   std::uniform_int_distribution<int64_t> big;
219   std::uniform_int_distribution<int32_t> medium;
220   std::uniform_int_distribution<char> small(' ', '~');
221   std::uniform_real_distribution<float> real;
222 
223   file->Section("Simple strings");
224   TestCase(file, "%s", "\3SFO", "SFO");
225   TestCase(file, "%s", "\4KSJC", "KSJC");
226   TestCase(file, "%s", "\0", "");
227 
228   TestCase(file, "%5s%s", "\2no\3fun", "   nofun");
229   TestCase(file, "%5s%s", "\6abcdef\0", "abcdef");
230   TestCase(file, "%5s%s", "\0\6abcdef", "     abcdef");
231 
232   TestCase(file,
233            "%s %-6s%s%s%s",
234            "\5Intel\580586\7toaster\1 \4oven",
235            "Intel 80586 toaster oven");
236   TestCase(file,
237            "%s %-6s%s%s%s",
238            "\5Apple\x09"
239            "automatic\7 pencil\1 \x09sharpener",
240            "Apple automatic pencil sharpener");
241 
242   file->Section("Zero-length strings");
243   TestCase(file, "%s-%s", "\x02so\x00", "so-");
244   TestCase(file, "%s-%s", "\x00\04cool", "-cool");
245   TestCase(file, "%s%s%3s%s", "\0\0\0\0", "   ");
246   TestCase(file, "(%5s)(%2s)(%7s)", "\x80\0\x80", "([...])(  )(  [...])");
247 
248   file->Section("Invalid strings");
249   TestCase(file, "%s", "\x03hi", ERROR_STR("%s ERROR (hi)"));
250   TestCase(file, "%30s", "\x03hi", ERROR_STR("%30s ERROR (hi)"));
251   TestCase(file, "%30s", "\x83hi", ERROR_STR("%30s ERROR (hi)"));
252   TestCase(file, "%s", "\x85yo!", ERROR_STR("%s ERROR (yo!)"));
253   TestCase(file, "%s", "\x01", ERROR_STR("%s ERROR"));
254   TestCase(file, "%30s", "\x81", ERROR_STR("%30s ERROR"));
255 
256   file->Section("Continue after truncated string");
257   TestCase(file, "%s %d %s", "\x82go\4\5lunch", "go[...] 2 lunch");
258   TestCase(file, "%6s%s%s", "\x80\x85hello\x05there", " [...]hello[...]there");
259 
260   file->Section("Floating point");
261   TestCase(file, "%1.1f", "\0\0\0\0", "0.0");
262   TestCase(file, "%0.5f", "\xdb\x0f\x49\x40", "3.14159");
263 
264   file->Section("Character");  // ZigZag doubles the value of positive integers.
265   TestCase(file, "%c", "\x40", " ");          // 0x20
266   TestCase(file, "%c", "\x48", "$");          // 0x24
267   TestCase(file, "%c", "\x48", "$");          // 0x24
268   TestCase(file, "100%c!", "\x4A", "100%!");  // 0x25
269 
270   file->Section("Atypical argument types");
271   MAKE_TEST_CASE("%ju", static_cast<uintmax_t>(99));
272   MAKE_TEST_CASE("%jd", static_cast<intmax_t>(99));
273   MAKE_TEST_CASE("%zu", sizeof(uint64_t));
274   MAKE_TEST_CASE("%zd", static_cast<ssize_t>(123));
275   MAKE_TEST_CASE("%td", static_cast<ptrdiff_t>(99));
276 
277   file->Section("Percent character");
278   TestCase(file, "%%", "", "%");
279   TestCase(file, "%%%%%%%%", "abc", "%%%%");
280   TestCase(file, "whoa%%%%wow%%%%!%%", "", "whoa%%wow%%!%");
281   TestCase(file, "This is %d%% effective", "\x02", "This is 1% effective");
282   TestCase(
283       file, "%% is 100%sa%%sign%%%s", "\x01%\x03OK?", "% is 100%a%sign%OK?");
284 
285   file->Section("Percent character prints after errors");
286   TestCase(file, "%s%%", "\x83-10\0", "-10[...]%");
287   TestCase(
288       file, "%d%% is a good %%", "", ERROR_STR("%d MISSING") "% is a good %");
289 
290   file->Section("Various format strings");
291   MAKE_TEST_CASE("!");
292   MAKE_TEST_CASE("%s", "%s");
293   MAKE_TEST_CASE("%s", "hello");
294   MAKE_TEST_CASE("%s%s", "Hello", "old");
295   MAKE_TEST_CASE("%s to the%c%s", "hello", ' ', "whirled");
296   MAKE_TEST_CASE("hello %s %d %d %d", "rolled", 1, 2, 3);
297 
298   TestCase(file, "", "", "");
299   TestCase(file, "This has no specifiers", "", "This has no specifiers");
300   TestCase(file, "%s_or_%3s", "\x05hello\x02hi", "hello_or_ hi");
301   TestCase(file, "%s_or_%3d", "\x05hello\x7f", "hello_or_-64");
302   TestCase(file,
303            "%s or hi%c pi=%1.2e",
304            "\x05hello\x42\xdb\x0f\x49\x40",
305            "hello or hi! pi=3.14e+00");
306   TestCase(file,
307            "Why, %s there. My favorite number is %.2f%c",
308            "\x05hello\xdb\x0f\x49\x40\x42",
309            "Why, hello there. My favorite number is 3.14!");
310 
311   file->Section("Various errors");
312   TestCase(file, "%d", "", ERROR_STR("%d MISSING"));
313 
314   TestCase(file,
315            "ABC%d123%dabc%dABC",
316            "",
317            "ABC" ERROR_STR("%d MISSING") "123" ERROR_STR(
318                "%d SKIPPED") "abc" ERROR_STR("%d SKIPPED") "ABC");
319 
320   TestCase(file,
321            "%sXY%+ldxy%a",
322            "\x83Yo!\x80",
323            "Yo![...]XY" ERROR_STR("%+ld ERROR") "xy" ERROR_STR("%a SKIPPED"));
324 
325   TestCase(file, "%d", "", ERROR_STR("%d MISSING"));
326 
327   TestCase(file,
328            "%sXY%+ldxy%a",
329            "\x83Yo!\x80",
330            "Yo![...]XY" ERROR_STR("%+ld ERROR") "xy" ERROR_STR("%a SKIPPED"));
331 
332   TestCase(file,
333            "%s%lld%9u",
334            "\x81$\x80\x80",
335            "$[...]" ERROR_STR("%lld ERROR") ERROR_STR("%9u SKIPPED"));
336 
337   file->Section("Alternate form (#)");
338   MAKE_TEST_CASE("Hex: %#x", 0xbeef);
339   MAKE_TEST_CASE("Hex: %#08X", 0xfeed);
340 
341   file->Section("Random integers");
342   for (int i = 0; i < 100; ++i) {
343     float f = real(random);
344     MAKE_TEST_CASE(
345         "This is a number: %+08.3e%1.0E%02d%g%G%f%-3f", f, f, i, f, f, f, f);
346   }
347 
348   for (int i = 0; i < 100; ++i) {
349     unsigned long long n1 = big(random);
350     int n2 = medium(random);
351     char ch = small(random);
352     if (ch == '"' || ch == '\\') {
353       ch = '\t';
354     }
355 
356     MAKE_TEST_CASE("%s: %llu %d %c", std::to_string(i).c_str(), n1, n2, ch);
357   }
358 
359   for (int i = 0; i < 100; ++i) {
360     const long long n1 = big(random);
361     const unsigned n2 = medium(random);
362     const char ch = small(random);
363 
364     MAKE_TEST_CASE(
365         "%s: %lld 0x%16u%08X %d", std::to_string(i).c_str(), n1, n2, n2, ch);
366   }
367 }
368 
369 template <typename T>
OutputVarintTest(TestDataFile * file,T i)370 void OutputVarintTest(TestDataFile* file, T i) {
371   if constexpr (sizeof(T) <= sizeof(int)) {
372     file->printf(R"(TestCase("%%d", "%d", "%%u", "%u", %s)",
373                  static_cast<int>(i),
374                  static_cast<unsigned>(i),
375                  file->fmt().binary_string_prefix);
376   } else {
377     file->printf(R"(TestCase("%%lld", "%lld", "%%llu", "%llu", %s)",
378                  static_cast<long long>(i),
379                  static_cast<unsigned long long>(i),
380                  file->fmt().binary_string_prefix);
381   }
382 
383   std::array<uint8_t, 10> buffer;
384   // All integers are encoded as signed for tokenization.
385   size_t size =
386       pw::varint::Encode(i, std::as_writable_bytes(std::span(buffer)));
387 
388   for (size_t i = 0; i < size; ++i) {
389     file->printf("\\x%02x", buffer[i]);
390   }
391 
392   file->printf("%s),\n", file->fmt().binary_string_suffix);
393 }
394 
395 // Generates data to test variable-length integer decoding.
GenerateVarints(TestDataFile * file)396 void GenerateVarints(TestDataFile* file) {
397   std::mt19937 random(6006411);
398   std::uniform_int_distribution<int64_t> signed64;
399   std::uniform_int_distribution<int32_t> signed32;
400   std::uniform_int_distribution<int16_t> signed16;
401 
402   file->Section("Important numbers");
403   OutputVarintTest(file, 0);
404   OutputVarintTest(file, std::numeric_limits<int16_t>::min());
405   OutputVarintTest(file, std::numeric_limits<int16_t>::min() + 1);
406   OutputVarintTest(file, std::numeric_limits<int16_t>::max() - 1);
407   OutputVarintTest(file, std::numeric_limits<int16_t>::max());
408   OutputVarintTest(file, std::numeric_limits<int32_t>::min());
409   OutputVarintTest(file, std::numeric_limits<int32_t>::min() + 1);
410   OutputVarintTest(file, std::numeric_limits<int32_t>::max() - 1);
411   OutputVarintTest(file, std::numeric_limits<int32_t>::max());
412   OutputVarintTest(file, std::numeric_limits<int64_t>::min());
413   OutputVarintTest(file, std::numeric_limits<int64_t>::min() + 1);
414   OutputVarintTest(file, std::numeric_limits<int64_t>::max() - 1);
415   OutputVarintTest(file, std::numeric_limits<int64_t>::max());
416 
417   file->Section("Random 64-bit ints");
418   for (int i = 0; i < 500; ++i) {
419     OutputVarintTest(file, signed64(random));
420   }
421   file->Section("Random 32-bit ints");
422   for (int i = 0; i < 100; ++i) {
423     OutputVarintTest(file, signed32(random));
424   }
425   file->Section("Random 16-bit ints");
426   for (int i = 0; i < 100; ++i) {
427     OutputVarintTest(file, signed16(random));
428   }
429 
430   file->Section("All 8-bit numbers");
431   {
432     int i = std::numeric_limits<int8_t>::min();
433     while (true) {
434       OutputVarintTest(file, i);
435       if (i == std::numeric_limits<int8_t>::max()) {
436         break;
437       }
438       // Don't use an inline increment to avoid undefined behavior (overflow).
439       i += 1;
440     }
441   }
442 }
443 
444 template <typename Function>
WriteFile(const char * name,const char * test_case_format,Function function)445 void WriteFile(const char* name,
446                const char* test_case_format,
447                Function function) {
448   for (const SourceFileFormat& file_format : {kCcFormat, kPythonFormat}) {
449     TestDataFile file(name, file_format, test_case_format);
450     file.WriteTestCases(function);
451 
452     std::printf("Wrote %s\n", file.path().c_str());
453   }
454 }
455 
456 }  // namespace
457 
main(int,char **)458 int main(int, char**) {
459   WriteFile("tokenized_string_decoding",
460             "std::tuple<const char*, std::string_view, std::string_view>",
461             GenerateEncodedStrings);
462   WriteFile("varint_decoding",
463             "std::tuple<const char*, const char*, const char*, const char*, "
464             "std::string_view>",
465             GenerateVarints);
466   return 0;
467 }
468