1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14
15 // This file implements a basic fuzz test for the Detokenizer.
16 // An instance of the Detokenizer is created from a minimal, nearly-empty token
17 // database. Fuzz data is fed to the detokenizer in various supported input
18 // argument formats at random, when then decodes this data and tries to match
19 // it to tokens in the database.
20
21 #include <cstddef>
22 #include <cstdint>
23 #include <cstring>
24 #include <vector>
25
26 #include "pw_fuzzer/fuzzed_data_provider.h"
27 #include "pw_preprocessor/util.h"
28 #include "pw_tokenizer/detokenize.h"
29
30 namespace pw::tokenizer {
31 namespace {
32
33 constexpr size_t kFuzzRangeMin = 0;
34 constexpr size_t kFuzzRangeMax = 10000;
35
36 enum DetokenizeBufferArgumentType : uint8_t {
37 kSpan = 0,
38 kStringView,
39 kPtrAndLength,
40 kMaxValue = kPtrAndLength
41 };
42
43 // In order to better fuzz the detokenizer, rather than use an empty token
44 // database, we construct a minimal database with 4 entries out of a string
45 // literal array that matches the token database format (see token_database.h
46 // for detailed info on the database entry format)
47 alignas(TokenDatabase::RawEntry) constexpr char kBasicData[] =
48 "TOKENS\0\0"
49 "\x04\x00\x00\x00"
50 "\0\0\0\0"
51 "\x01\x00\x00\x00----"
52 "\x05\x00\x00\x00----"
53 "\xFF\x00\x00\x00----"
54 "\xFF\xEE\xEE\xDD----"
55 "One\0"
56 "TWO\0"
57 "333\0"
58 "FOUR";
59
60 } // namespace
61
LLVMFuzzerTestOneInput(const uint8_t * data,size_t size)62 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
63 static Detokenizer detokenizer(TokenDatabase::Create<kBasicData>());
64
65 FuzzedDataProvider provider(data, size);
66
67 while (provider.remaining_bytes() != 0) {
68 // Map the first word of the remaining fuzz data to a buffer argument
69 // type, and feed the Detokenizer with a random length buffer to be
70 // detokenized in the relevant format. The detokenized string returned
71 // is itself of little consequence to this test.
72 switch (provider.ConsumeEnum<DetokenizeBufferArgumentType>()) {
73 case kSpan: {
74 size_t consumed_size = provider.ConsumeIntegralInRange<size_t>(
75 kFuzzRangeMin, kFuzzRangeMax);
76 std::vector<uint8_t> buffer =
77 provider.ConsumeBytes<uint8_t>(consumed_size);
78 auto detokenized_string =
79 detokenizer.Detokenize(std::span(&buffer[0], buffer.size()));
80 static_cast<void>(detokenized_string);
81 break;
82 }
83
84 case kStringView: {
85 std::string str =
86 provider.ConsumeRandomLengthString(provider.remaining_bytes());
87 auto detokenized_string = detokenizer.Detokenize(str);
88 static_cast<void>(detokenized_string);
89 break;
90 }
91
92 case kPtrAndLength: {
93 size_t consumed_size = provider.ConsumeIntegralInRange<size_t>(
94 kFuzzRangeMin, kFuzzRangeMax);
95 std::vector<uint8_t> buffer =
96 provider.ConsumeBytes<uint8_t>(consumed_size);
97 auto detokenized_string =
98 detokenizer.Detokenize(&buffer[0], buffer.size());
99 static_cast<void>(detokenized_string);
100 break;
101 }
102 }
103 }
104
105 return 0;
106 }
107
108 } // namespace pw::tokenizer
109