1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 // This file implements a basic fuzz test for the Detokenizer.
16 // An instance of the Detokenizer is created from a minimal, nearly-empty token
17 // database. Fuzz data is fed to the detokenizer in various supported input
18 // argument formats at random, when then decodes this data and tries to match
19 // it to tokens in the database.
20 
21 #include <cstddef>
22 #include <cstdint>
23 #include <cstring>
24 #include <vector>
25 
26 #include "pw_fuzzer/fuzzed_data_provider.h"
27 #include "pw_preprocessor/util.h"
28 #include "pw_tokenizer/detokenize.h"
29 
30 namespace pw::tokenizer {
31 namespace {
32 
33 constexpr size_t kFuzzRangeMin = 0;
34 constexpr size_t kFuzzRangeMax = 10000;
35 
36 enum DetokenizeBufferArgumentType : uint8_t {
37   kSpan = 0,
38   kStringView,
39   kPtrAndLength,
40   kMaxValue = kPtrAndLength
41 };
42 
43 // In order to better fuzz the detokenizer, rather than use an empty token
44 // database, we construct a minimal database with 4 entries out of a string
45 // literal array that matches the token database format (see token_database.h
46 // for detailed info on the database entry format)
47 alignas(TokenDatabase::RawEntry) constexpr char kBasicData[] =
48     "TOKENS\0\0"
49     "\x04\x00\x00\x00"
50     "\0\0\0\0"
51     "\x01\x00\x00\x00----"
52     "\x05\x00\x00\x00----"
53     "\xFF\x00\x00\x00----"
54     "\xFF\xEE\xEE\xDD----"
55     "One\0"
56     "TWO\0"
57     "333\0"
58     "FOUR";
59 
60 }  // namespace
61 
LLVMFuzzerTestOneInput(const uint8_t * data,size_t size)62 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
63   static Detokenizer detokenizer(TokenDatabase::Create<kBasicData>());
64 
65   FuzzedDataProvider provider(data, size);
66 
67   while (provider.remaining_bytes() != 0) {
68     // Map the first word of the remaining fuzz data to a buffer argument
69     // type, and feed the Detokenizer with a random length buffer to be
70     // detokenized in the relevant format. The detokenized string returned
71     // is itself of little consequence to this test.
72     switch (provider.ConsumeEnum<DetokenizeBufferArgumentType>()) {
73       case kSpan: {
74         size_t consumed_size = provider.ConsumeIntegralInRange<size_t>(
75             kFuzzRangeMin, kFuzzRangeMax);
76         std::vector<uint8_t> buffer =
77             provider.ConsumeBytes<uint8_t>(consumed_size);
78         auto detokenized_string =
79             detokenizer.Detokenize(std::span(&buffer[0], buffer.size()));
80         static_cast<void>(detokenized_string);
81         break;
82       }
83 
84       case kStringView: {
85         std::string str =
86             provider.ConsumeRandomLengthString(provider.remaining_bytes());
87         auto detokenized_string = detokenizer.Detokenize(str);
88         static_cast<void>(detokenized_string);
89         break;
90       }
91 
92       case kPtrAndLength: {
93         size_t consumed_size = provider.ConsumeIntegralInRange<size_t>(
94             kFuzzRangeMin, kFuzzRangeMax);
95         std::vector<uint8_t> buffer =
96             provider.ConsumeBytes<uint8_t>(consumed_size);
97         auto detokenized_string =
98             detokenizer.Detokenize(&buffer[0], buffer.size());
99         static_cast<void>(detokenized_string);
100         break;
101       }
102     }
103   }
104 
105   return 0;
106 }
107 
108 }  // namespace pw::tokenizer
109