1 #include <assert.h>
2 #include <stddef.h>
3 #include <stdint.h>
4 #include <algorithm>
5 #include <clocale>
6 #include <memory>
7 #include <regex>
8 #include <string>
9 
10 #include "flatbuffers/idl.h"
11 #include "test_init.h"
12 
13 static constexpr uint8_t flags_scalar_type = 0x0F;  // type of scalar value
14 static constexpr uint8_t flags_quotes_kind = 0x10;  // quote " or '
15 // reserved for future: json {named} or [unnamed]
16 // static constexpr uint8_t flags_json_bracer = 0x20;
17 
18 // Find all 'subj' sub-strings and replace first character of sub-string.
19 // BreakSequence("testest","tes", 'X') -> "XesXest".
20 // BreakSequence("xxx","xx", 'Y') -> "YYx".
BreakSequence(std::string & s,const char * subj,char repl)21 static void BreakSequence(std::string &s, const char *subj, char repl) {
22   size_t pos = 0;
23   while (pos = s.find(subj, pos), pos != std::string::npos) {
24     s.at(pos) = repl;
25     pos++;
26   }
27 }
28 
29 // Remove all leading and trailing symbols matched with pattern set.
30 // StripString("xy{xy}y", "xy") -> "{xy}"
StripString(const std::string & s,const char * pattern,size_t * pos=nullptr)31 static std::string StripString(const std::string &s, const char *pattern,
32                                size_t *pos = nullptr) {
33   if (pos) *pos = 0;
34   // leading
35   auto first = s.find_first_not_of(pattern);
36   if (std::string::npos == first) return "";
37   if (pos) *pos = first;
38   // trailing
39   auto last = s.find_last_not_of(pattern);
40   assert(last < s.length());
41   assert(first <= last);
42   return s.substr(first, last - first + 1);
43 }
44 
45 class RegexMatcher {
46  protected:
47   virtual bool MatchNumber(const std::string &input) const = 0;
48 
49  public:
50   virtual ~RegexMatcher() = default;
51 
52   struct MatchResult {
53     size_t pos{ 0 };
54     size_t len{ 0 };
55     bool res{ false };
56     bool quoted{ false };
57   };
58 
Match(const std::string & input) const59   MatchResult Match(const std::string &input) const {
60     MatchResult r;
61     // strip leading and trailing "spaces" accepted by flatbuffer
62     auto test = StripString(input, "\t\r\n ", &r.pos);
63     r.len = test.size();
64     // check quotes
65     if (test.size() >= 2) {
66       auto fch = test.front();
67       auto lch = test.back();
68       r.quoted = (fch == lch) && (fch == '\'' || fch == '\"');
69       if (r.quoted) {
70         // remove quotes for regex test
71         test = test.substr(1, test.size() - 2);
72       }
73     }
74     // Fast check:
75     if (test.empty()) return r;
76     // A string with a valid scalar shouldn't have non-ascii or non-printable
77     // symbols.
78     for (auto c : test) {
79       if ((c < ' ') || (c > '~')) return r;
80     }
81     // Check with regex
82     r.res = MatchNumber(test);
83     return r;
84   }
85 
MatchRegexList(const std::string & input,const std::vector<std::regex> & re_list) const86   bool MatchRegexList(const std::string &input,
87                       const std::vector<std::regex> &re_list) const {
88     auto str = StripString(input, " ");
89     if (str.empty()) return false;
90     for (auto &re : re_list) {
91       std::smatch match;
92       if (std::regex_match(str, match, re)) return true;
93     }
94     return false;
95   }
96 };
97 
98 class IntegerRegex : public RegexMatcher {
99  protected:
MatchNumber(const std::string & input) const100   bool MatchNumber(const std::string &input) const override {
101     static const std::vector<std::regex> re_list = {
102       std::regex{ R"(^[-+]?[0-9]+$)", std::regex_constants::optimize },
103 
104       std::regex{
105           R"(^[-+]?0[xX][0-9a-fA-F]+$)", std::regex_constants::optimize }
106     };
107     return MatchRegexList(input, re_list);
108   }
109 
110  public:
111   IntegerRegex() = default;
112   virtual ~IntegerRegex() = default;
113 };
114 
115 class UIntegerRegex : public RegexMatcher {
116  protected:
MatchNumber(const std::string & input) const117   bool MatchNumber(const std::string &input) const override {
118     static const std::vector<std::regex> re_list = {
119       std::regex{ R"(^[+]?[0-9]+$)", std::regex_constants::optimize },
120       std::regex{
121           R"(^[+]?0[xX][0-9a-fA-F]+$)", std::regex_constants::optimize },
122       // accept -0 number
123       std::regex{ R"(^[-](?:0[xX])?0+$)", std::regex_constants::optimize }
124     };
125     return MatchRegexList(input, re_list);
126   }
127 
128  public:
129   UIntegerRegex() = default;
130   virtual ~UIntegerRegex() = default;
131 };
132 
133 class BooleanRegex : public IntegerRegex {
134  protected:
MatchNumber(const std::string & input) const135   bool MatchNumber(const std::string &input) const override {
136     if (input == "true" || input == "false") return true;
137     return IntegerRegex::MatchNumber(input);
138   }
139 
140  public:
141   BooleanRegex() = default;
142   virtual ~BooleanRegex() = default;
143 };
144 
145 class FloatRegex : public RegexMatcher {
146  protected:
MatchNumber(const std::string & input) const147   bool MatchNumber(const std::string &input) const override {
148     static const std::vector<std::regex> re_list = {
149       // hex-float
150       std::regex{
151           R"(^[-+]?0[xX](?:(?:[.][0-9a-fA-F]+)|(?:[0-9a-fA-F]+[.][0-9a-fA-F]*)|(?:[0-9a-fA-F]+))[pP][-+]?[0-9]+$)",
152           std::regex_constants::optimize },
153       // dec-float
154       std::regex{
155           R"(^[-+]?(?:(?:[.][0-9]+)|(?:[0-9]+[.][0-9]*)|(?:[0-9]+))(?:[eE][-+]?[0-9]+)?$)",
156           std::regex_constants::optimize },
157 
158       std::regex{ R"(^[-+]?(?:nan|inf|infinity)$)",
159                   std::regex_constants::optimize | std::regex_constants::icase }
160     };
161     return MatchRegexList(input, re_list);
162   }
163 
164  public:
165   FloatRegex() = default;
166   virtual ~FloatRegex() = default;
167 };
168 
169 class ScalarReferenceResult {
170  private:
ScalarReferenceResult(const char * _type,RegexMatcher::MatchResult _matched)171   ScalarReferenceResult(const char *_type, RegexMatcher::MatchResult _matched)
172       : type(_type), matched(_matched) {}
173 
174  public:
175   // Decode scalar type and check if the input string satisfies the scalar type.
Check(uint8_t code,const std::string & input)176   static ScalarReferenceResult Check(uint8_t code, const std::string &input) {
177     switch (code) {
178       case 0x0: return { "double", FloatRegex().Match(input) };
179       case 0x1: return { "float", FloatRegex().Match(input) };
180       case 0x2: return { "int8", IntegerRegex().Match(input) };
181       case 0x3: return { "int16", IntegerRegex().Match(input) };
182       case 0x4: return { "int32", IntegerRegex().Match(input) };
183       case 0x5: return { "int64", IntegerRegex().Match(input) };
184       case 0x6: return { "uint8", UIntegerRegex().Match(input) };
185       case 0x7: return { "uint16", UIntegerRegex().Match(input) };
186       case 0x8: return { "uint32", UIntegerRegex().Match(input) };
187       case 0x9: return { "uint64", UIntegerRegex().Match(input) };
188       case 0xA: return { "bool", BooleanRegex().Match(input) };
189       default: return { "float", FloatRegex().Match(input) };
190     };
191   }
192 
193   const char *type;
194   const RegexMatcher::MatchResult matched;
195 };
196 
Parse(flatbuffers::Parser & parser,const std::string & json,std::string * _text)197 bool Parse(flatbuffers::Parser &parser, const std::string &json,
198            std::string *_text) {
199   auto done = parser.Parse(json.c_str());
200   if (done) {
201     TEST_EQ(GenerateText(parser, parser.builder_.GetBufferPointer(), _text),
202             true);
203   } else {
204     *_text = parser.error_;
205   }
206   return done;
207 }
208 
209 // Utility for test run.
210 OneTimeTestInit OneTimeTestInit::one_time_init_;
211 
212 // llvm std::regex have problem with stack overflow, limit maximum length.
213 // ./scalar_fuzzer -max_len=3000
LLVMFuzzerTestOneInput(const uint8_t * data,size_t size)214 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
215   // Reserve one byte for Parser flags and one byte for repetition counter.
216   if (size < 3) return 0;
217   const uint8_t flags = data[0];
218   // normalize to ascii alphabet
219   const int extra_rep_number = data[1] >= '0' ? (data[1] - '0') : 0;
220   data += 2;
221   size -= 2;  // bypass
222 
223   // Guarantee 0-termination.
224   const std::string original(reinterpret_cast<const char *>(data), size);
225   auto input = std::string(original.c_str());  // until '\0'
226   if (input.empty()) return 0;
227 
228   // Break comments in json to avoid complexity with regex matcher.
229   // The string " 12345 /* text */" will be accepted if insert it to string
230   // expression: "table X { Y: " + " 12345 /* text */" + "; }.
231   // But strings like this will complicate regex matcher.
232   // We reject this by transform "/* text */ 12345" to "@* text */ 12345".
233   BreakSequence(input, "//", '@');  // "//" -> "@/"
234   BreakSequence(input, "/*", '@');  // "/*" -> "@*"
235   // Break all known scalar functions (todo: add them to regex?):
236   for (auto f : { "deg", "rad", "sin", "cos", "tan", "asin", "acos", "atan" }) {
237     BreakSequence(input, f, '_');  // ident -> ident
238   }
239 
240   // Extract type of scalar from 'flags' and check if the input string satisfies
241   // the scalar type.
242   const auto ref_res =
243       ScalarReferenceResult::Check(flags & flags_scalar_type, input);
244   auto &recheck = ref_res.matched;
245 
246   // Create parser
247   flatbuffers::IDLOptions opts;
248   opts.force_defaults = true;
249   opts.output_default_scalars_in_json = true;
250   opts.indent_step = -1;
251   opts.strict_json = true;
252 
253   flatbuffers::Parser parser(opts);
254   auto schema =
255       "table X { Y: " + std::string(ref_res.type) + "; } root_type X;";
256   TEST_EQ_FUNC(parser.Parse(schema.c_str()), true);
257 
258   // The fuzzer can adjust the number repetition if a side-effects have found.
259   // Each test should pass at least two times to ensure that the parser doesn't
260   // have any hidden-states or locale-depended effects.
261   for (auto cnt = 0; cnt < (extra_rep_number + 2); cnt++) {
262     // Each even run (0,2,4..) will test locale independed code.
263     auto use_locale = !!OneTimeTestInit::test_locale() && (0 == (cnt % 2));
264     // Set new locale.
265     if (use_locale) {
266       FLATBUFFERS_ASSERT(setlocale(LC_ALL, OneTimeTestInit::test_locale()));
267     }
268 
269     // Parse original input as-is.
270     auto orig_scalar = "{ \"Y\" : " + input + " }";
271     std::string orig_back;
272     auto orig_done = Parse(parser, orig_scalar, &orig_back);
273 
274     if (recheck.res != orig_done) {
275       // look for "does not fit" or "doesn't fit" or "out of range"
276       auto not_fit =
277           (true == recheck.res)
278               ? ((orig_back.find("does not fit") != std::string::npos) ||
279                  (orig_back.find("out of range") != std::string::npos))
280               : false;
281 
282       if (false == not_fit) {
283         TEST_OUTPUT_LINE("Stage 1 failed: Parser(%d) != Regex(%d)", orig_done,
284                          recheck.res);
285         TEST_EQ_STR(orig_back.c_str(),
286                     input.substr(recheck.pos, recheck.len).c_str());
287         TEST_EQ_FUNC(orig_done, recheck.res);
288       }
289     }
290 
291     // Try to make quoted string and test it.
292     std::string qouted_input;
293     if (true == recheck.quoted) {
294       // we can't simply remove quotes, they may be nested "'12'".
295       // Original string "\'12\'" converted to "'12'".
296       // The string can be an invalid string by JSON rules, but after quotes
297       // removed can transform to valid.
298       assert(recheck.len >= 2);
299     } else {
300       const auto quote = (flags & flags_quotes_kind) ? '\"' : '\'';
301       qouted_input = input;  // copy
302       qouted_input.insert(recheck.pos + recheck.len, 1, quote);
303       qouted_input.insert(recheck.pos, 1, quote);
304     }
305 
306     // Test quoted version of the string
307     if (!qouted_input.empty()) {
308       auto fix_scalar = "{ \"Y\" : " + qouted_input + " }";
309       std::string fix_back;
310       auto fix_done = Parse(parser, fix_scalar, &fix_back);
311 
312       if (orig_done != fix_done) {
313         TEST_OUTPUT_LINE("Stage 2 failed: Parser(%d) != Regex(%d)", fix_done,
314                          orig_done);
315         TEST_EQ_STR(fix_back.c_str(), orig_back.c_str());
316       }
317       if (orig_done) { TEST_EQ_STR(fix_back.c_str(), orig_back.c_str()); }
318       TEST_EQ_FUNC(fix_done, orig_done);
319     }
320 
321     // Create new parser and test default value
322     if (true == orig_done) {
323       flatbuffers::Parser def_parser(opts);  // re-use options
324       auto def_schema = "table X { Y: " + std::string(ref_res.type) + " = " +
325                         input + "; } root_type X;" +
326                         "{}";  // <- with empty json {}!
327 
328       auto def_done = def_parser.Parse(def_schema.c_str());
329       if (false == def_done) {
330         TEST_OUTPUT_LINE("Stage 3.1 failed with _error = %s",
331                          def_parser.error_.c_str());
332         FLATBUFFERS_ASSERT(false);
333       }
334       // Compare with print.
335       std::string ref_string, def_string;
336       FLATBUFFERS_ASSERT(GenerateText(
337           parser, parser.builder_.GetBufferPointer(), &ref_string));
338       FLATBUFFERS_ASSERT(GenerateText(
339           def_parser, def_parser.builder_.GetBufferPointer(), &def_string));
340       if (ref_string != def_string) {
341         TEST_OUTPUT_LINE("Stage 3.2 failed: '%s' != '%s'", def_string.c_str(),
342                          ref_string.c_str());
343         FLATBUFFERS_ASSERT(false);
344       }
345     }
346 
347     // Restore locale.
348     if (use_locale) { FLATBUFFERS_ASSERT(setlocale(LC_ALL, "C")); }
349   }
350   return 0;
351 }
352