1 /*
2  * Copyright 2014 Google Inc. All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <algorithm>
18 #include <cmath>
19 #include <list>
20 #include <string>
21 #include <utility>
22 
23 #include "flatbuffers/idl.h"
24 #include "flatbuffers/util.h"
25 
26 namespace flatbuffers {
27 
28 // Reflects the version at the compiling time of binary(lib/dll/so).
FLATBUFFERS_VERSION()29 const char *FLATBUFFERS_VERSION() {
30   // clang-format off
31   return
32       FLATBUFFERS_STRING(FLATBUFFERS_VERSION_MAJOR) "."
33       FLATBUFFERS_STRING(FLATBUFFERS_VERSION_MINOR) "."
34       FLATBUFFERS_STRING(FLATBUFFERS_VERSION_REVISION);
35   // clang-format on
36 }
37 
38 const double kPi = 3.14159265358979323846;
39 
40 // clang-format off
41 const char *const kTypeNames[] = {
42   #define FLATBUFFERS_TD(ENUM, IDLTYPE, ...) \
43     IDLTYPE,
44     FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
45   #undef FLATBUFFERS_TD
46   nullptr
47 };
48 
49 const char kTypeSizes[] = {
50   #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, ...) \
51     sizeof(CTYPE),
52     FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
53   #undef FLATBUFFERS_TD
54 };
55 // clang-format on
56 
57 // The enums in the reflection schema should match the ones we use internally.
58 // Compare the last element to check if these go out of sync.
59 static_assert(BASE_TYPE_UNION == static_cast<BaseType>(reflection::Union),
60               "enums don't match");
61 
62 // Any parsing calls have to be wrapped in this macro, which automates
63 // handling of recursive error checking a bit. It will check the received
64 // CheckedError object, and return straight away on error.
65 #define ECHECK(call)           \
66   {                            \
67     auto ce = (call);          \
68     if (ce.Check()) return ce; \
69   }
70 
71 // These two functions are called hundreds of times below, so define a short
72 // form:
73 #define NEXT() ECHECK(Next())
74 #define EXPECT(tok) ECHECK(Expect(tok))
75 
ValidateUTF8(const std::string & str)76 static bool ValidateUTF8(const std::string &str) {
77   const char *s = &str[0];
78   const char *const sEnd = s + str.length();
79   while (s < sEnd) {
80     if (FromUTF8(&s) < 0) { return false; }
81   }
82   return true;
83 }
84 
85 // Convert an underscore_based_indentifier in to camelCase.
86 // Also uppercases the first character if first is true.
MakeCamel(const std::string & in,bool first)87 std::string MakeCamel(const std::string &in, bool first) {
88   std::string s;
89   for (size_t i = 0; i < in.length(); i++) {
90     if (!i && first)
91       s += static_cast<char>(toupper(in[0]));
92     else if (in[i] == '_' && i + 1 < in.length())
93       s += static_cast<char>(toupper(in[++i]));
94     else
95       s += in[i];
96   }
97   return s;
98 }
99 
100 // Convert an underscore_based_identifier in to screaming snake case.
MakeScreamingCamel(const std::string & in)101 std::string MakeScreamingCamel(const std::string &in) {
102   std::string s;
103   for (size_t i = 0; i < in.length(); i++) {
104     if (in[i] != '_')
105       s += static_cast<char>(toupper(in[i]));
106     else
107       s += in[i];
108   }
109   return s;
110 }
111 
DeserializeDoc(std::vector<std::string> & doc,const Vector<Offset<String>> * documentation)112 void DeserializeDoc(std::vector<std::string> &doc,
113                     const Vector<Offset<String>> *documentation) {
114   if (documentation == nullptr) return;
115   for (uoffset_t index = 0; index < documentation->size(); index++)
116     doc.push_back(documentation->Get(index)->str());
117 }
118 
Message(const std::string & msg)119 void Parser::Message(const std::string &msg) {
120   if (!error_.empty()) error_ += "\n";  // log all warnings and errors
121   error_ += file_being_parsed_.length() ? AbsolutePath(file_being_parsed_) : "";
122   // clang-format off
123 
124   #ifdef _WIN32  // MSVC alike
125     error_ +=
126         "(" + NumToString(line_) + ", " + NumToString(CursorPosition()) + ")";
127   #else  // gcc alike
128     if (file_being_parsed_.length()) error_ += ":";
129     error_ += NumToString(line_) + ": " + NumToString(CursorPosition());
130   #endif
131   // clang-format on
132   error_ += ": " + msg;
133 }
134 
Warning(const std::string & msg)135 void Parser::Warning(const std::string &msg) { Message("warning: " + msg); }
136 
Error(const std::string & msg)137 CheckedError Parser::Error(const std::string &msg) {
138   Message("error: " + msg);
139   return CheckedError(true);
140 }
141 
NoError()142 inline CheckedError NoError() { return CheckedError(false); }
143 
RecurseError()144 CheckedError Parser::RecurseError() {
145   return Error("maximum parsing recursion of " +
146                NumToString(FLATBUFFERS_MAX_PARSING_DEPTH) + " reached");
147 }
148 
Recurse(F f)149 template<typename F> CheckedError Parser::Recurse(F f) {
150   if (recurse_protection_counter >= (FLATBUFFERS_MAX_PARSING_DEPTH))
151     return RecurseError();
152   recurse_protection_counter++;
153   auto ce = f();
154   recurse_protection_counter--;
155   return ce;
156 }
157 
TypeToIntervalString()158 template<typename T> std::string TypeToIntervalString() {
159   return "[" + NumToString((flatbuffers::numeric_limits<T>::lowest)()) + "; " +
160          NumToString((flatbuffers::numeric_limits<T>::max)()) + "]";
161 }
162 
163 // atot: template version of atoi/atof: convert a string to an instance of T.
164 template<typename T>
atot(const char * s,Parser & parser,T * val)165 inline CheckedError atot(const char *s, Parser &parser, T *val) {
166   auto done = StringToNumber(s, val);
167   if (done) return NoError();
168   if (0 == *val)
169     return parser.Error("invalid number: \"" + std::string(s) + "\"");
170   else
171     return parser.Error("invalid number: \"" + std::string(s) + "\"" +
172                         ", constant does not fit " + TypeToIntervalString<T>());
173 }
174 template<>
atot(const char * s,Parser & parser,Offset<void> * val)175 inline CheckedError atot<Offset<void>>(const char *s, Parser &parser,
176                                        Offset<void> *val) {
177   (void)parser;
178   *val = Offset<void>(atoi(s));
179   return NoError();
180 }
181 
GetFullyQualifiedName(const std::string & name,size_t max_components) const182 std::string Namespace::GetFullyQualifiedName(const std::string &name,
183                                              size_t max_components) const {
184   // Early exit if we don't have a defined namespace.
185   if (components.empty() || !max_components) { return name; }
186   std::string stream_str;
187   for (size_t i = 0; i < std::min(components.size(), max_components); i++) {
188     if (i) { stream_str += '.'; }
189     stream_str += std::string(components[i]);
190   }
191   if (name.length()) {
192     stream_str += '.';
193     stream_str += name;
194   }
195   return stream_str;
196 }
197 
198 // Declare tokens we'll use. Single character tokens are represented by their
199 // ascii character code (e.g. '{'), others above 256.
200 // clang-format off
201 #define FLATBUFFERS_GEN_TOKENS(TD) \
202   TD(Eof, 256, "end of file") \
203   TD(StringConstant, 257, "string constant") \
204   TD(IntegerConstant, 258, "integer constant") \
205   TD(FloatConstant, 259, "float constant") \
206   TD(Identifier, 260, "identifier")
207 #ifdef __GNUC__
208 __extension__  // Stop GCC complaining about trailing comma with -Wpendantic.
209 #endif
210 enum {
211   #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) kToken ## NAME = VALUE,
212     FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
213   #undef FLATBUFFERS_TOKEN
214 };
215 
TokenToString(int t)216 static std::string TokenToString(int t) {
217   static const char * const tokens[] = {
218     #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) STRING,
219       FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
220     #undef FLATBUFFERS_TOKEN
221     #define FLATBUFFERS_TD(ENUM, IDLTYPE, ...) \
222       IDLTYPE,
223       FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
224     #undef FLATBUFFERS_TD
225   };
226   if (t < 256) {  // A single ascii char token.
227     std::string s;
228     s.append(1, static_cast<char>(t));
229     return s;
230   } else {       // Other tokens.
231     return tokens[t - 256];
232   }
233 }
234 // clang-format on
235 
TokenToStringId(int t) const236 std::string Parser::TokenToStringId(int t) const {
237   return t == kTokenIdentifier ? attribute_ : TokenToString(t);
238 }
239 
240 // Parses exactly nibbles worth of hex digits into a number, or error.
ParseHexNum(int nibbles,uint64_t * val)241 CheckedError Parser::ParseHexNum(int nibbles, uint64_t *val) {
242   FLATBUFFERS_ASSERT(nibbles > 0);
243   for (int i = 0; i < nibbles; i++)
244     if (!is_xdigit(cursor_[i]))
245       return Error("escape code must be followed by " + NumToString(nibbles) +
246                    " hex digits");
247   std::string target(cursor_, cursor_ + nibbles);
248   *val = StringToUInt(target.c_str(), 16);
249   cursor_ += nibbles;
250   return NoError();
251 }
252 
SkipByteOrderMark()253 CheckedError Parser::SkipByteOrderMark() {
254   if (static_cast<unsigned char>(*cursor_) != 0xef) return NoError();
255   cursor_++;
256   if (static_cast<unsigned char>(*cursor_) != 0xbb)
257     return Error("invalid utf-8 byte order mark");
258   cursor_++;
259   if (static_cast<unsigned char>(*cursor_) != 0xbf)
260     return Error("invalid utf-8 byte order mark");
261   cursor_++;
262   return NoError();
263 }
264 
IsIdentifierStart(char c)265 static inline bool IsIdentifierStart(char c) {
266   return is_alpha(c) || (c == '_');
267 }
268 
Next()269 CheckedError Parser::Next() {
270   doc_comment_.clear();
271   bool seen_newline = cursor_ == source_;
272   attribute_.clear();
273   attr_is_trivial_ascii_string_ = true;
274   for (;;) {
275     char c = *cursor_++;
276     token_ = c;
277     switch (c) {
278       case '\0':
279         cursor_--;
280         token_ = kTokenEof;
281         return NoError();
282       case ' ':
283       case '\r':
284       case '\t': break;
285       case '\n':
286         MarkNewLine();
287         seen_newline = true;
288         break;
289       case '{':
290       case '}':
291       case '(':
292       case ')':
293       case '[':
294       case ']':
295       case ',':
296       case ':':
297       case ';':
298       case '=': return NoError();
299       case '\"':
300       case '\'': {
301         int unicode_high_surrogate = -1;
302 
303         while (*cursor_ != c) {
304           if (*cursor_ < ' ' && static_cast<signed char>(*cursor_) >= 0)
305             return Error("illegal character in string constant");
306           if (*cursor_ == '\\') {
307             attr_is_trivial_ascii_string_ = false;  // has escape sequence
308             cursor_++;
309             if (unicode_high_surrogate != -1 && *cursor_ != 'u') {
310               return Error(
311                   "illegal Unicode sequence (unpaired high surrogate)");
312             }
313             switch (*cursor_) {
314               case 'n':
315                 attribute_ += '\n';
316                 cursor_++;
317                 break;
318               case 't':
319                 attribute_ += '\t';
320                 cursor_++;
321                 break;
322               case 'r':
323                 attribute_ += '\r';
324                 cursor_++;
325                 break;
326               case 'b':
327                 attribute_ += '\b';
328                 cursor_++;
329                 break;
330               case 'f':
331                 attribute_ += '\f';
332                 cursor_++;
333                 break;
334               case '\"':
335                 attribute_ += '\"';
336                 cursor_++;
337                 break;
338               case '\'':
339                 attribute_ += '\'';
340                 cursor_++;
341                 break;
342               case '\\':
343                 attribute_ += '\\';
344                 cursor_++;
345                 break;
346               case '/':
347                 attribute_ += '/';
348                 cursor_++;
349                 break;
350               case 'x': {  // Not in the JSON standard
351                 cursor_++;
352                 uint64_t val;
353                 ECHECK(ParseHexNum(2, &val));
354                 attribute_ += static_cast<char>(val);
355                 break;
356               }
357               case 'u': {
358                 cursor_++;
359                 uint64_t val;
360                 ECHECK(ParseHexNum(4, &val));
361                 if (val >= 0xD800 && val <= 0xDBFF) {
362                   if (unicode_high_surrogate != -1) {
363                     return Error(
364                         "illegal Unicode sequence (multiple high surrogates)");
365                   } else {
366                     unicode_high_surrogate = static_cast<int>(val);
367                   }
368                 } else if (val >= 0xDC00 && val <= 0xDFFF) {
369                   if (unicode_high_surrogate == -1) {
370                     return Error(
371                         "illegal Unicode sequence (unpaired low surrogate)");
372                   } else {
373                     int code_point = 0x10000 +
374                                      ((unicode_high_surrogate & 0x03FF) << 10) +
375                                      (val & 0x03FF);
376                     ToUTF8(code_point, &attribute_);
377                     unicode_high_surrogate = -1;
378                   }
379                 } else {
380                   if (unicode_high_surrogate != -1) {
381                     return Error(
382                         "illegal Unicode sequence (unpaired high surrogate)");
383                   }
384                   ToUTF8(static_cast<int>(val), &attribute_);
385                 }
386                 break;
387               }
388               default: return Error("unknown escape code in string constant");
389             }
390           } else {  // printable chars + UTF-8 bytes
391             if (unicode_high_surrogate != -1) {
392               return Error(
393                   "illegal Unicode sequence (unpaired high surrogate)");
394             }
395             // reset if non-printable
396             attr_is_trivial_ascii_string_ &=
397                 check_ascii_range(*cursor_, ' ', '~');
398 
399             attribute_ += *cursor_++;
400           }
401         }
402         if (unicode_high_surrogate != -1) {
403           return Error("illegal Unicode sequence (unpaired high surrogate)");
404         }
405         cursor_++;
406         if (!attr_is_trivial_ascii_string_ && !opts.allow_non_utf8 &&
407             !ValidateUTF8(attribute_)) {
408           return Error("illegal UTF-8 sequence");
409         }
410         token_ = kTokenStringConstant;
411         return NoError();
412       }
413       case '/':
414         if (*cursor_ == '/') {
415           const char *start = ++cursor_;
416           while (*cursor_ && *cursor_ != '\n' && *cursor_ != '\r') cursor_++;
417           if (*start == '/') {  // documentation comment
418             if (!seen_newline)
419               return Error(
420                   "a documentation comment should be on a line on its own");
421             doc_comment_.push_back(std::string(start + 1, cursor_));
422           }
423           break;
424         } else if (*cursor_ == '*') {
425           cursor_++;
426           // TODO: make nested.
427           while (*cursor_ != '*' || cursor_[1] != '/') {
428             if (*cursor_ == '\n') MarkNewLine();
429             if (!*cursor_) return Error("end of file in comment");
430             cursor_++;
431           }
432           cursor_ += 2;
433           break;
434         }
435         FLATBUFFERS_FALLTHROUGH();  // else fall thru
436       default:
437         const auto has_sign = (c == '+') || (c == '-');
438         // '-'/'+' and following identifier - can be a predefined constant like:
439         // NAN, INF, PI, etc.
440         if (IsIdentifierStart(c) || (has_sign && IsIdentifierStart(*cursor_))) {
441           // Collect all chars of an identifier:
442           const char *start = cursor_ - 1;
443           while (IsIdentifierStart(*cursor_) || is_digit(*cursor_)) cursor_++;
444           attribute_.append(start, cursor_);
445           token_ = has_sign ? kTokenStringConstant : kTokenIdentifier;
446           return NoError();
447         }
448 
449         auto dot_lvl =
450             (c == '.') ? 0 : 1;  // dot_lvl==0 <=> exactly one '.' seen
451         if (!dot_lvl && !is_digit(*cursor_)) return NoError();  // enum?
452         // Parser accepts hexadecimal-floating-literal (see C++ 5.13.4).
453         if (is_digit(c) || has_sign || !dot_lvl) {
454           const auto start = cursor_ - 1;
455           auto start_digits = !is_digit(c) ? cursor_ : cursor_ - 1;
456           if (!is_digit(c) && is_digit(*cursor_)) {
457             start_digits = cursor_;  // see digit in cursor_ position
458             c = *cursor_++;
459           }
460           // hex-float can't begind with '.'
461           auto use_hex = dot_lvl && (c == '0') && is_alpha_char(*cursor_, 'X');
462           if (use_hex) start_digits = ++cursor_;  // '0x' is the prefix, skip it
463           // Read an integer number or mantisa of float-point number.
464           do {
465             if (use_hex) {
466               while (is_xdigit(*cursor_)) cursor_++;
467             } else {
468               while (is_digit(*cursor_)) cursor_++;
469             }
470           } while ((*cursor_ == '.') && (++cursor_) && (--dot_lvl >= 0));
471           // Exponent of float-point number.
472           if ((dot_lvl >= 0) && (cursor_ > start_digits)) {
473             // The exponent suffix of hexadecimal float number is mandatory.
474             if (use_hex && !dot_lvl) start_digits = cursor_;
475             if ((use_hex && is_alpha_char(*cursor_, 'P')) ||
476                 is_alpha_char(*cursor_, 'E')) {
477               dot_lvl = 0;  // Emulate dot to signal about float-point number.
478               cursor_++;
479               if (*cursor_ == '+' || *cursor_ == '-') cursor_++;
480               start_digits = cursor_;  // the exponent-part has to have digits
481               // Exponent is decimal integer number
482               while (is_digit(*cursor_)) cursor_++;
483               if (*cursor_ == '.') {
484                 cursor_++;  // If see a dot treat it as part of invalid number.
485                 dot_lvl = -1;  // Fall thru to Error().
486               }
487             }
488           }
489           // Finalize.
490           if ((dot_lvl >= 0) && (cursor_ > start_digits)) {
491             attribute_.append(start, cursor_);
492             token_ = dot_lvl ? kTokenIntegerConstant : kTokenFloatConstant;
493             return NoError();
494           } else {
495             return Error("invalid number: " + std::string(start, cursor_));
496           }
497         }
498         std::string ch;
499         ch = c;
500         if (false == check_ascii_range(c, ' ', '~'))
501           ch = "code: " + NumToString(c);
502         return Error("illegal character: " + ch);
503     }
504   }
505 }
506 
507 // Check if a given token is next.
Is(int t) const508 bool Parser::Is(int t) const { return t == token_; }
509 
IsIdent(const char * id) const510 bool Parser::IsIdent(const char *id) const {
511   return token_ == kTokenIdentifier && attribute_ == id;
512 }
513 
514 // Expect a given token to be next, consume it, or error if not present.
Expect(int t)515 CheckedError Parser::Expect(int t) {
516   if (t != token_) {
517     return Error("expecting: " + TokenToString(t) +
518                  " instead got: " + TokenToStringId(token_));
519   }
520   NEXT();
521   return NoError();
522 }
523 
ParseNamespacing(std::string * id,std::string * last)524 CheckedError Parser::ParseNamespacing(std::string *id, std::string *last) {
525   while (Is('.')) {
526     NEXT();
527     *id += ".";
528     *id += attribute_;
529     if (last) *last = attribute_;
530     EXPECT(kTokenIdentifier);
531   }
532   return NoError();
533 }
534 
LookupEnum(const std::string & id)535 EnumDef *Parser::LookupEnum(const std::string &id) {
536   // Search thru parent namespaces.
537   for (int components = static_cast<int>(current_namespace_->components.size());
538        components >= 0; components--) {
539     auto ed = enums_.Lookup(
540         current_namespace_->GetFullyQualifiedName(id, components));
541     if (ed) return ed;
542   }
543   return nullptr;
544 }
545 
LookupStruct(const std::string & id) const546 StructDef *Parser::LookupStruct(const std::string &id) const {
547   auto sd = structs_.Lookup(id);
548   if (sd) sd->refcount++;
549   return sd;
550 }
551 
ParseTypeIdent(Type & type)552 CheckedError Parser::ParseTypeIdent(Type &type) {
553   std::string id = attribute_;
554   EXPECT(kTokenIdentifier);
555   ECHECK(ParseNamespacing(&id, nullptr));
556   auto enum_def = LookupEnum(id);
557   if (enum_def) {
558     type = enum_def->underlying_type;
559     if (enum_def->is_union) type.base_type = BASE_TYPE_UNION;
560   } else {
561     type.base_type = BASE_TYPE_STRUCT;
562     type.struct_def = LookupCreateStruct(id);
563   }
564   return NoError();
565 }
566 
567 // Parse any IDL type.
ParseType(Type & type)568 CheckedError Parser::ParseType(Type &type) {
569   if (token_ == kTokenIdentifier) {
570     if (IsIdent("bool")) {
571       type.base_type = BASE_TYPE_BOOL;
572       NEXT();
573     } else if (IsIdent("byte") || IsIdent("int8")) {
574       type.base_type = BASE_TYPE_CHAR;
575       NEXT();
576     } else if (IsIdent("ubyte") || IsIdent("uint8")) {
577       type.base_type = BASE_TYPE_UCHAR;
578       NEXT();
579     } else if (IsIdent("short") || IsIdent("int16")) {
580       type.base_type = BASE_TYPE_SHORT;
581       NEXT();
582     } else if (IsIdent("ushort") || IsIdent("uint16")) {
583       type.base_type = BASE_TYPE_USHORT;
584       NEXT();
585     } else if (IsIdent("int") || IsIdent("int32")) {
586       type.base_type = BASE_TYPE_INT;
587       NEXT();
588     } else if (IsIdent("uint") || IsIdent("uint32")) {
589       type.base_type = BASE_TYPE_UINT;
590       NEXT();
591     } else if (IsIdent("long") || IsIdent("int64")) {
592       type.base_type = BASE_TYPE_LONG;
593       NEXT();
594     } else if (IsIdent("ulong") || IsIdent("uint64")) {
595       type.base_type = BASE_TYPE_ULONG;
596       NEXT();
597     } else if (IsIdent("float") || IsIdent("float32")) {
598       type.base_type = BASE_TYPE_FLOAT;
599       NEXT();
600     } else if (IsIdent("double") || IsIdent("float64")) {
601       type.base_type = BASE_TYPE_DOUBLE;
602       NEXT();
603     } else if (IsIdent("string")) {
604       type.base_type = BASE_TYPE_STRING;
605       NEXT();
606     } else {
607       ECHECK(ParseTypeIdent(type));
608     }
609   } else if (token_ == '[') {
610     NEXT();
611     Type subtype;
612     ECHECK(Recurse([&]() { return ParseType(subtype); }));
613     if (IsSeries(subtype)) {
614       // We could support this, but it will complicate things, and it's
615       // easier to work around with a struct around the inner vector.
616       return Error("nested vector types not supported (wrap in table first)");
617     }
618     if (token_ == ':') {
619       NEXT();
620       if (token_ != kTokenIntegerConstant) {
621         return Error("length of fixed-length array must be an integer value");
622       }
623       uint16_t fixed_length = 0;
624       bool check = StringToNumber(attribute_.c_str(), &fixed_length);
625       if (!check || fixed_length < 1) {
626         return Error(
627             "length of fixed-length array must be positive and fit to "
628             "uint16_t type");
629       }
630       type = Type(BASE_TYPE_ARRAY, subtype.struct_def, subtype.enum_def,
631                   fixed_length);
632       NEXT();
633     } else {
634       type = Type(BASE_TYPE_VECTOR, subtype.struct_def, subtype.enum_def);
635     }
636     type.element = subtype.base_type;
637     EXPECT(']');
638   } else {
639     return Error("illegal type syntax");
640   }
641   return NoError();
642 }
643 
AddField(StructDef & struct_def,const std::string & name,const Type & type,FieldDef ** dest)644 CheckedError Parser::AddField(StructDef &struct_def, const std::string &name,
645                               const Type &type, FieldDef **dest) {
646   auto &field = *new FieldDef();
647   field.value.offset =
648       FieldIndexToOffset(static_cast<voffset_t>(struct_def.fields.vec.size()));
649   field.name = name;
650   field.file = struct_def.file;
651   field.value.type = type;
652   if (struct_def.fixed) {  // statically compute the field offset
653     auto size = InlineSize(type);
654     auto alignment = InlineAlignment(type);
655     // structs_ need to have a predictable format, so we need to align to
656     // the largest scalar
657     struct_def.minalign = std::max(struct_def.minalign, alignment);
658     struct_def.PadLastField(alignment);
659     field.value.offset = static_cast<voffset_t>(struct_def.bytesize);
660     struct_def.bytesize += size;
661   }
662   if (struct_def.fields.Add(name, &field))
663     return Error("field already exists: " + name);
664   *dest = &field;
665   return NoError();
666 }
667 
ParseField(StructDef & struct_def)668 CheckedError Parser::ParseField(StructDef &struct_def) {
669   std::string name = attribute_;
670 
671   if (LookupCreateStruct(name, false, false))
672     return Error("field name can not be the same as table/struct name");
673 
674   std::vector<std::string> dc = doc_comment_;
675   EXPECT(kTokenIdentifier);
676   EXPECT(':');
677   Type type;
678   ECHECK(ParseType(type));
679 
680   if (struct_def.fixed && !IsScalar(type.base_type) && !IsStruct(type) &&
681       !IsArray(type))
682     return Error("structs_ may contain only scalar or struct fields");
683 
684   if (!struct_def.fixed && IsArray(type))
685     return Error("fixed-length array in table must be wrapped in struct");
686 
687   if (IsArray(type) && !SupportsAdvancedArrayFeatures()) {
688     return Error(
689         "Arrays are not yet supported in all "
690         "the specified programming languages.");
691   }
692 
693   FieldDef *typefield = nullptr;
694   if (type.base_type == BASE_TYPE_UNION) {
695     // For union fields, add a second auto-generated field to hold the type,
696     // with a special suffix.
697     ECHECK(AddField(struct_def, name + UnionTypeFieldSuffix(),
698                     type.enum_def->underlying_type, &typefield));
699   } else if (type.base_type == BASE_TYPE_VECTOR &&
700              type.element == BASE_TYPE_UNION) {
701     // Only cpp, js and ts supports the union vector feature so far.
702     if (!SupportsAdvancedUnionFeatures()) {
703       return Error(
704           "Vectors of unions are not yet supported in all "
705           "the specified programming languages.");
706     }
707     // For vector of union fields, add a second auto-generated vector field to
708     // hold the types, with a special suffix.
709     Type union_vector(BASE_TYPE_VECTOR, nullptr, type.enum_def);
710     union_vector.element = BASE_TYPE_UTYPE;
711     ECHECK(AddField(struct_def, name + UnionTypeFieldSuffix(), union_vector,
712                     &typefield));
713   }
714 
715   FieldDef *field;
716   ECHECK(AddField(struct_def, name, type, &field));
717 
718   if (token_ == '=') {
719     NEXT();
720     ECHECK(ParseSingleValue(&field->name, field->value, true));
721     if (!IsScalar(type.base_type) ||
722         (struct_def.fixed && field->value.constant != "0"))
723       return Error(
724           "default values currently only supported for scalars in tables");
725   }
726   // Append .0 if the value has not it (skip hex and scientific floats).
727   // This suffix needed for generated C++ code.
728   if (IsFloat(type.base_type)) {
729     auto &text = field->value.constant;
730     FLATBUFFERS_ASSERT(false == text.empty());
731     auto s = text.c_str();
732     while (*s == ' ') s++;
733     if (*s == '-' || *s == '+') s++;
734     // 1) A float constants (nan, inf, pi, etc) is a kind of identifier.
735     // 2) A float number needn't ".0" at the end if it has exponent.
736     if ((false == IsIdentifierStart(*s)) &&
737         (std::string::npos == field->value.constant.find_first_of(".eEpP"))) {
738       field->value.constant += ".0";
739     }
740   }
741   if (type.enum_def) {
742     // The type.base_type can only be scalar, union, array or vector.
743     // Table, struct or string can't have enum_def.
744     // Default value of union and vector in NONE, NULL translated to "0".
745     FLATBUFFERS_ASSERT(IsInteger(type.base_type) ||
746                        (type.base_type == BASE_TYPE_UNION) ||
747                        (type.base_type == BASE_TYPE_VECTOR) ||
748                        (type.base_type == BASE_TYPE_ARRAY));
749     if (type.base_type == BASE_TYPE_VECTOR) {
750       // Vector can't use initialization list.
751       FLATBUFFERS_ASSERT(field->value.constant == "0");
752     } else {
753       // All unions should have the NONE ("0") enum value.
754       auto in_enum = type.enum_def->attributes.Lookup("bit_flags") ||
755                      type.enum_def->FindByValue(field->value.constant);
756       if (false == in_enum)
757         return Error("default value of " + field->value.constant +
758                      " for field " + name + " is not part of enum " +
759                      type.enum_def->name);
760     }
761   }
762 
763   field->doc_comment = dc;
764   ECHECK(ParseMetaData(&field->attributes));
765   field->deprecated = field->attributes.Lookup("deprecated") != nullptr;
766   auto hash_name = field->attributes.Lookup("hash");
767   if (hash_name) {
768     switch ((type.base_type == BASE_TYPE_VECTOR) ? type.element
769                                                  : type.base_type) {
770       case BASE_TYPE_SHORT:
771       case BASE_TYPE_USHORT: {
772         if (FindHashFunction16(hash_name->constant.c_str()) == nullptr)
773           return Error("Unknown hashing algorithm for 16 bit types: " +
774                        hash_name->constant);
775         break;
776       }
777       case BASE_TYPE_INT:
778       case BASE_TYPE_UINT: {
779         if (FindHashFunction32(hash_name->constant.c_str()) == nullptr)
780           return Error("Unknown hashing algorithm for 32 bit types: " +
781                        hash_name->constant);
782         break;
783       }
784       case BASE_TYPE_LONG:
785       case BASE_TYPE_ULONG: {
786         if (FindHashFunction64(hash_name->constant.c_str()) == nullptr)
787           return Error("Unknown hashing algorithm for 64 bit types: " +
788                        hash_name->constant);
789         break;
790       }
791       default:
792         return Error(
793             "only short, ushort, int, uint, long and ulong data types support "
794             "hashing.");
795     }
796   }
797   auto cpp_type = field->attributes.Lookup("cpp_type");
798   if (cpp_type) {
799     if (!hash_name)
800       return Error("cpp_type can only be used with a hashed field");
801     /// forcing cpp_ptr_type to 'naked' if unset
802     auto cpp_ptr_type = field->attributes.Lookup("cpp_ptr_type");
803     if (!cpp_ptr_type) {
804       auto val = new Value();
805       val->type = cpp_type->type;
806       val->constant = "naked";
807       field->attributes.Add("cpp_ptr_type", val);
808     }
809   }
810   if (field->deprecated && struct_def.fixed)
811     return Error("can't deprecate fields in a struct");
812   field->required = field->attributes.Lookup("required") != nullptr;
813   if (field->required && (struct_def.fixed || IsScalar(type.base_type)))
814     return Error("only non-scalar fields in tables may be 'required'");
815   field->key = field->attributes.Lookup("key") != nullptr;
816   if (field->key) {
817     if (struct_def.has_key) return Error("only one field may be set as 'key'");
818     struct_def.has_key = true;
819     if (!IsScalar(type.base_type)) {
820       field->required = true;
821       if (type.base_type != BASE_TYPE_STRING)
822         return Error("'key' field must be string or scalar type");
823     }
824   }
825   field->shared = field->attributes.Lookup("shared") != nullptr;
826   if (field->shared && field->value.type.base_type != BASE_TYPE_STRING)
827     return Error("shared can only be defined on strings");
828 
829   auto field_native_custom_alloc =
830       field->attributes.Lookup("native_custom_alloc");
831   if (field_native_custom_alloc)
832     return Error(
833         "native_custom_alloc can only be used with a table or struct "
834         "definition");
835 
836   field->native_inline = field->attributes.Lookup("native_inline") != nullptr;
837   if (field->native_inline && !IsStruct(field->value.type))
838     return Error("native_inline can only be defined on structs");
839 
840   auto nested = field->attributes.Lookup("nested_flatbuffer");
841   if (nested) {
842     if (nested->type.base_type != BASE_TYPE_STRING)
843       return Error(
844           "nested_flatbuffer attribute must be a string (the root type)");
845     if (type.base_type != BASE_TYPE_VECTOR || type.element != BASE_TYPE_UCHAR)
846       return Error(
847           "nested_flatbuffer attribute may only apply to a vector of ubyte");
848     // This will cause an error if the root type of the nested flatbuffer
849     // wasn't defined elsewhere.
850     field->nested_flatbuffer = LookupCreateStruct(nested->constant);
851   }
852 
853   if (field->attributes.Lookup("flexbuffer")) {
854     field->flexbuffer = true;
855     uses_flexbuffers_ = true;
856     if (type.base_type != BASE_TYPE_VECTOR || type.element != BASE_TYPE_UCHAR)
857       return Error("flexbuffer attribute may only apply to a vector of ubyte");
858   }
859 
860   if (typefield) {
861     if (!IsScalar(typefield->value.type.base_type)) {
862       // this is a union vector field
863       typefield->required = field->required;
864     }
865     // If this field is a union, and it has a manually assigned id,
866     // the automatically added type field should have an id as well (of N - 1).
867     auto attr = field->attributes.Lookup("id");
868     if (attr) {
869       auto id = atoi(attr->constant.c_str());
870       auto val = new Value();
871       val->type = attr->type;
872       val->constant = NumToString(id - 1);
873       typefield->attributes.Add("id", val);
874     }
875   }
876 
877   EXPECT(';');
878   return NoError();
879 }
880 
ParseString(Value & val)881 CheckedError Parser::ParseString(Value &val) {
882   auto s = attribute_;
883   EXPECT(kTokenStringConstant);
884   val.constant = NumToString(builder_.CreateString(s).o);
885   return NoError();
886 }
887 
ParseComma()888 CheckedError Parser::ParseComma() {
889   if (!opts.protobuf_ascii_alike) EXPECT(',');
890   return NoError();
891 }
892 
ParseAnyValue(Value & val,FieldDef * field,size_t parent_fieldn,const StructDef * parent_struct_def,uoffset_t count,bool inside_vector)893 CheckedError Parser::ParseAnyValue(Value &val, FieldDef *field,
894                                    size_t parent_fieldn,
895                                    const StructDef *parent_struct_def,
896                                    uoffset_t count, bool inside_vector) {
897   switch (val.type.base_type) {
898     case BASE_TYPE_UNION: {
899       FLATBUFFERS_ASSERT(field);
900       std::string constant;
901       Vector<uint8_t> *vector_of_union_types = nullptr;
902       // Find corresponding type field we may have already parsed.
903       for (auto elem = field_stack_.rbegin() + count;
904            elem != field_stack_.rbegin() + parent_fieldn + count; ++elem) {
905         auto &type = elem->second->value.type;
906         if (type.enum_def == val.type.enum_def) {
907           if (inside_vector) {
908             if (type.base_type == BASE_TYPE_VECTOR &&
909                 type.element == BASE_TYPE_UTYPE) {
910               // Vector of union type field.
911               uoffset_t offset;
912               ECHECK(atot(elem->first.constant.c_str(), *this, &offset));
913               vector_of_union_types = reinterpret_cast<Vector<uint8_t> *>(
914                   builder_.GetCurrentBufferPointer() + builder_.GetSize() -
915                   offset);
916               break;
917             }
918           } else {
919             if (type.base_type == BASE_TYPE_UTYPE) {
920               // Union type field.
921               constant = elem->first.constant;
922               break;
923             }
924           }
925         }
926       }
927       if (constant.empty() && !inside_vector) {
928         // We haven't seen the type field yet. Sadly a lot of JSON writers
929         // output these in alphabetical order, meaning it comes after this
930         // value. So we scan past the value to find it, then come back here.
931         // We currently don't do this for vectors of unions because the
932         // scanning/serialization logic would get very complicated.
933         auto type_name = field->name + UnionTypeFieldSuffix();
934         FLATBUFFERS_ASSERT(parent_struct_def);
935         auto type_field = parent_struct_def->fields.Lookup(type_name);
936         FLATBUFFERS_ASSERT(type_field);  // Guaranteed by ParseField().
937         // Remember where we are in the source file, so we can come back here.
938         auto backup = *static_cast<ParserState *>(this);
939         ECHECK(SkipAnyJsonValue());  // The table.
940         ECHECK(ParseComma());
941         auto next_name = attribute_;
942         if (Is(kTokenStringConstant)) {
943           NEXT();
944         } else {
945           EXPECT(kTokenIdentifier);
946         }
947         if (next_name == type_name) {
948           EXPECT(':');
949           Value type_val = type_field->value;
950           ECHECK(ParseAnyValue(type_val, type_field, 0, nullptr, 0));
951           constant = type_val.constant;
952           // Got the information we needed, now rewind:
953           *static_cast<ParserState *>(this) = backup;
954         }
955       }
956       if (constant.empty() && !vector_of_union_types) {
957         return Error("missing type field for this union value: " + field->name);
958       }
959       uint8_t enum_idx;
960       if (vector_of_union_types) {
961         enum_idx = vector_of_union_types->Get(count);
962       } else {
963         ECHECK(atot(constant.c_str(), *this, &enum_idx));
964       }
965       auto enum_val = val.type.enum_def->ReverseLookup(enum_idx, true);
966       if (!enum_val) return Error("illegal type id for: " + field->name);
967       if (enum_val->union_type.base_type == BASE_TYPE_STRUCT) {
968         ECHECK(ParseTable(*enum_val->union_type.struct_def, &val.constant,
969                           nullptr));
970         if (enum_val->union_type.struct_def->fixed) {
971           // All BASE_TYPE_UNION values are offsets, so turn this into one.
972           SerializeStruct(*enum_val->union_type.struct_def, val);
973           builder_.ClearOffsets();
974           val.constant = NumToString(builder_.GetSize());
975         }
976       } else if (enum_val->union_type.base_type == BASE_TYPE_STRING) {
977         ECHECK(ParseString(val));
978       } else {
979         FLATBUFFERS_ASSERT(false);
980       }
981       break;
982     }
983     case BASE_TYPE_STRUCT:
984       ECHECK(ParseTable(*val.type.struct_def, &val.constant, nullptr));
985       break;
986     case BASE_TYPE_STRING: {
987       ECHECK(ParseString(val));
988       break;
989     }
990     case BASE_TYPE_VECTOR: {
991       uoffset_t off;
992       ECHECK(ParseVector(val.type.VectorType(), &off, field, parent_fieldn));
993       val.constant = NumToString(off);
994       break;
995     }
996     case BASE_TYPE_ARRAY: {
997       ECHECK(ParseArray(val));
998       break;
999     }
1000     case BASE_TYPE_INT:
1001     case BASE_TYPE_UINT:
1002     case BASE_TYPE_LONG:
1003     case BASE_TYPE_ULONG: {
1004       if (field && field->attributes.Lookup("hash") &&
1005           (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
1006         ECHECK(ParseHash(val, field));
1007       } else {
1008         ECHECK(ParseSingleValue(field ? &field->name : nullptr, val, false));
1009       }
1010       break;
1011     }
1012     default:
1013       ECHECK(ParseSingleValue(field ? &field->name : nullptr, val, false));
1014       break;
1015   }
1016   return NoError();
1017 }
1018 
SerializeStruct(const StructDef & struct_def,const Value & val)1019 void Parser::SerializeStruct(const StructDef &struct_def, const Value &val) {
1020   SerializeStruct(builder_, struct_def, val);
1021 }
1022 
SerializeStruct(FlatBufferBuilder & builder,const StructDef & struct_def,const Value & val)1023 void Parser::SerializeStruct(FlatBufferBuilder &builder,
1024                              const StructDef &struct_def, const Value &val) {
1025   FLATBUFFERS_ASSERT(val.constant.length() == struct_def.bytesize);
1026   builder.Align(struct_def.minalign);
1027   builder.PushBytes(reinterpret_cast<const uint8_t *>(val.constant.c_str()),
1028                     struct_def.bytesize);
1029   builder.AddStructOffset(val.offset, builder.GetSize());
1030 }
1031 
1032 template<typename F>
ParseTableDelimiters(size_t & fieldn,const StructDef * struct_def,F body)1033 CheckedError Parser::ParseTableDelimiters(size_t &fieldn,
1034                                           const StructDef *struct_def, F body) {
1035   // We allow tables both as JSON object{ .. } with field names
1036   // or vector[..] with all fields in order
1037   char terminator = '}';
1038   bool is_nested_vector = struct_def && Is('[');
1039   if (is_nested_vector) {
1040     NEXT();
1041     terminator = ']';
1042   } else {
1043     EXPECT('{');
1044   }
1045   for (;;) {
1046     if ((!opts.strict_json || !fieldn) && Is(terminator)) break;
1047     std::string name;
1048     if (is_nested_vector) {
1049       if (fieldn >= struct_def->fields.vec.size()) {
1050         return Error("too many unnamed fields in nested array");
1051       }
1052       name = struct_def->fields.vec[fieldn]->name;
1053     } else {
1054       name = attribute_;
1055       if (Is(kTokenStringConstant)) {
1056         NEXT();
1057       } else {
1058         EXPECT(opts.strict_json ? kTokenStringConstant : kTokenIdentifier);
1059       }
1060       if (!opts.protobuf_ascii_alike || !(Is('{') || Is('['))) EXPECT(':');
1061     }
1062     ECHECK(body(name, fieldn, struct_def));
1063     if (Is(terminator)) break;
1064     ECHECK(ParseComma());
1065   }
1066   NEXT();
1067   if (is_nested_vector && fieldn != struct_def->fields.vec.size()) {
1068     return Error("wrong number of unnamed fields in table vector");
1069   }
1070   return NoError();
1071 }
1072 
ParseTable(const StructDef & struct_def,std::string * value,uoffset_t * ovalue)1073 CheckedError Parser::ParseTable(const StructDef &struct_def, std::string *value,
1074                                 uoffset_t *ovalue) {
1075   size_t fieldn_outer = 0;
1076   auto err = ParseTableDelimiters(
1077       fieldn_outer, &struct_def,
1078       [&](const std::string &name, size_t &fieldn,
1079           const StructDef *struct_def_inner) -> CheckedError {
1080         if (name == "$schema") {
1081           ECHECK(Expect(kTokenStringConstant));
1082           return NoError();
1083         }
1084         auto field = struct_def_inner->fields.Lookup(name);
1085         if (!field) {
1086           if (!opts.skip_unexpected_fields_in_json) {
1087             return Error("unknown field: " + name);
1088           } else {
1089             ECHECK(SkipAnyJsonValue());
1090           }
1091         } else {
1092           if (IsIdent("null") && !IsScalar(field->value.type.base_type)) {
1093             ECHECK(Next());  // Ignore this field.
1094           } else {
1095             Value val = field->value;
1096             if (field->flexbuffer) {
1097               flexbuffers::Builder builder(1024,
1098                                            flexbuffers::BUILDER_FLAG_SHARE_ALL);
1099               ECHECK(ParseFlexBufferValue(&builder));
1100               builder.Finish();
1101               // Force alignment for nested flexbuffer
1102               builder_.ForceVectorAlignment(builder.GetSize(), sizeof(uint8_t),
1103                                             sizeof(largest_scalar_t));
1104               auto off = builder_.CreateVector(builder.GetBuffer());
1105               val.constant = NumToString(off.o);
1106             } else if (field->nested_flatbuffer) {
1107               ECHECK(
1108                   ParseNestedFlatbuffer(val, field, fieldn, struct_def_inner));
1109             } else {
1110               ECHECK(Recurse([&]() {
1111                 return ParseAnyValue(val, field, fieldn, struct_def_inner, 0);
1112               }));
1113             }
1114             // Hardcoded insertion-sort with error-check.
1115             // If fields are specified in order, then this loop exits
1116             // immediately.
1117             auto elem = field_stack_.rbegin();
1118             for (; elem != field_stack_.rbegin() + fieldn; ++elem) {
1119               auto existing_field = elem->second;
1120               if (existing_field == field)
1121                 return Error("field set more than once: " + field->name);
1122               if (existing_field->value.offset < field->value.offset) break;
1123             }
1124             // Note: elem points to before the insertion point, thus .base()
1125             // points to the correct spot.
1126             field_stack_.insert(elem.base(), std::make_pair(val, field));
1127             fieldn++;
1128           }
1129         }
1130         return NoError();
1131       });
1132   ECHECK(err);
1133 
1134   // Check if all required fields are parsed.
1135   for (auto field_it = struct_def.fields.vec.begin();
1136        field_it != struct_def.fields.vec.end(); ++field_it) {
1137     auto required_field = *field_it;
1138     if (!required_field->required) { continue; }
1139     bool found = false;
1140     for (auto pf_it = field_stack_.end() - fieldn_outer;
1141          pf_it != field_stack_.end(); ++pf_it) {
1142       auto parsed_field = pf_it->second;
1143       if (parsed_field == required_field) {
1144         found = true;
1145         break;
1146       }
1147     }
1148     if (!found) {
1149       return Error("required field is missing: " + required_field->name +
1150                    " in " + struct_def.name);
1151     }
1152   }
1153 
1154   if (struct_def.fixed && fieldn_outer != struct_def.fields.vec.size())
1155     return Error("struct: wrong number of initializers: " + struct_def.name);
1156 
1157   auto start = struct_def.fixed ? builder_.StartStruct(struct_def.minalign)
1158                                 : builder_.StartTable();
1159 
1160   for (size_t size = struct_def.sortbysize ? sizeof(largest_scalar_t) : 1; size;
1161        size /= 2) {
1162     // Go through elements in reverse, since we're building the data backwards.
1163     for (auto it = field_stack_.rbegin();
1164          it != field_stack_.rbegin() + fieldn_outer; ++it) {
1165       auto &field_value = it->first;
1166       auto field = it->second;
1167       if (!struct_def.sortbysize ||
1168           size == SizeOf(field_value.type.base_type)) {
1169         switch (field_value.type.base_type) {
1170           // clang-format off
1171           #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, ...) \
1172             case BASE_TYPE_ ## ENUM: \
1173               builder_.Pad(field->padding); \
1174               if (struct_def.fixed) { \
1175                 CTYPE val; \
1176                 ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
1177                 builder_.PushElement(val); \
1178               } else { \
1179                 CTYPE val, valdef; \
1180                 ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
1181                 ECHECK(atot(field->value.constant.c_str(), *this, &valdef)); \
1182                 builder_.AddElement(field_value.offset, val, valdef); \
1183               } \
1184               break;
1185             FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD)
1186           #undef FLATBUFFERS_TD
1187           #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, ...) \
1188             case BASE_TYPE_ ## ENUM: \
1189               builder_.Pad(field->padding); \
1190               if (IsStruct(field->value.type)) { \
1191                 SerializeStruct(*field->value.type.struct_def, field_value); \
1192               } else { \
1193                 CTYPE val; \
1194                 ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
1195                 builder_.AddOffset(field_value.offset, val); \
1196               } \
1197               break;
1198             FLATBUFFERS_GEN_TYPES_POINTER(FLATBUFFERS_TD)
1199           #undef FLATBUFFERS_TD
1200             case BASE_TYPE_ARRAY:
1201               builder_.Pad(field->padding);
1202               builder_.PushBytes(
1203                 reinterpret_cast<const uint8_t*>(field_value.constant.c_str()),
1204                 InlineSize(field_value.type));
1205               break;
1206             // clang-format on
1207         }
1208       }
1209     }
1210   }
1211   for (size_t i = 0; i < fieldn_outer; i++) field_stack_.pop_back();
1212 
1213   if (struct_def.fixed) {
1214     builder_.ClearOffsets();
1215     builder_.EndStruct();
1216     FLATBUFFERS_ASSERT(value);
1217     // Temporarily store this struct in the value string, since it is to
1218     // be serialized in-place elsewhere.
1219     value->assign(
1220         reinterpret_cast<const char *>(builder_.GetCurrentBufferPointer()),
1221         struct_def.bytesize);
1222     builder_.PopBytes(struct_def.bytesize);
1223     FLATBUFFERS_ASSERT(!ovalue);
1224   } else {
1225     auto val = builder_.EndTable(start);
1226     if (ovalue) *ovalue = val;
1227     if (value) *value = NumToString(val);
1228   }
1229   return NoError();
1230 }
1231 
1232 template<typename F>
ParseVectorDelimiters(uoffset_t & count,F body)1233 CheckedError Parser::ParseVectorDelimiters(uoffset_t &count, F body) {
1234   EXPECT('[');
1235   for (;;) {
1236     if ((!opts.strict_json || !count) && Is(']')) break;
1237     ECHECK(body(count));
1238     count++;
1239     if (Is(']')) break;
1240     ECHECK(ParseComma());
1241   }
1242   NEXT();
1243   return NoError();
1244 }
1245 
CompareType(const uint8_t * a,const uint8_t * b,BaseType ftype)1246 static bool CompareType(const uint8_t *a, const uint8_t *b, BaseType ftype) {
1247   switch (ftype) {
1248 #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, ...) \
1249   case BASE_TYPE_##ENUM: return ReadScalar<CTYPE>(a) < ReadScalar<CTYPE>(b);
1250     FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD)
1251 #undef FLATBUFFERS_TD
1252     case BASE_TYPE_STRING:
1253       // Indirect offset pointer to string pointer.
1254       a += ReadScalar<uoffset_t>(a);
1255       b += ReadScalar<uoffset_t>(b);
1256       return *reinterpret_cast<const String *>(a) <
1257              *reinterpret_cast<const String *>(b);
1258     default: return false;
1259   }
1260 }
1261 
1262 // See below for why we need our own sort :(
1263 template<typename T, typename F, typename S>
SimpleQsort(T * begin,T * end,size_t width,F comparator,S swapper)1264 void SimpleQsort(T *begin, T *end, size_t width, F comparator, S swapper) {
1265   if (end - begin <= static_cast<ptrdiff_t>(width)) return;
1266   auto l = begin + width;
1267   auto r = end;
1268   while (l < r) {
1269     if (comparator(begin, l)) {
1270       r -= width;
1271       swapper(l, r);
1272     } else {
1273       l++;
1274     }
1275   }
1276   l -= width;
1277   swapper(begin, l);
1278   SimpleQsort(begin, l, width, comparator, swapper);
1279   SimpleQsort(r, end, width, comparator, swapper);
1280 }
1281 
ParseVector(const Type & type,uoffset_t * ovalue,FieldDef * field,size_t fieldn)1282 CheckedError Parser::ParseVector(const Type &type, uoffset_t *ovalue,
1283                                  FieldDef *field, size_t fieldn) {
1284   uoffset_t count = 0;
1285   auto err = ParseVectorDelimiters(count, [&](uoffset_t &) -> CheckedError {
1286     Value val;
1287     val.type = type;
1288     ECHECK(Recurse([&]() {
1289       return ParseAnyValue(val, field, fieldn, nullptr, count, true);
1290     }));
1291     field_stack_.push_back(std::make_pair(val, nullptr));
1292     return NoError();
1293   });
1294   ECHECK(err);
1295 
1296   const auto *force_align = field->attributes.Lookup("force_align");
1297   const size_t align =
1298       force_align ? static_cast<size_t>(atoi(force_align->constant.c_str()))
1299                   : 1;
1300   const size_t len = count * InlineSize(type) / InlineAlignment(type);
1301   const size_t elemsize = InlineAlignment(type);
1302   if (align > 1) { builder_.ForceVectorAlignment(len, elemsize, align); }
1303 
1304   builder_.StartVector(len, elemsize);
1305   for (uoffset_t i = 0; i < count; i++) {
1306     // start at the back, since we're building the data backwards.
1307     auto &val = field_stack_.back().first;
1308     switch (val.type.base_type) {
1309       // clang-format off
1310       #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE,...) \
1311         case BASE_TYPE_ ## ENUM: \
1312           if (IsStruct(val.type)) SerializeStruct(*val.type.struct_def, val); \
1313           else { \
1314              CTYPE elem; \
1315              ECHECK(atot(val.constant.c_str(), *this, &elem)); \
1316              builder_.PushElement(elem); \
1317           } \
1318           break;
1319         FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
1320       #undef FLATBUFFERS_TD
1321       // clang-format on
1322     }
1323     field_stack_.pop_back();
1324   }
1325 
1326   builder_.ClearOffsets();
1327   *ovalue = builder_.EndVector(count);
1328 
1329   if (type.base_type == BASE_TYPE_STRUCT && type.struct_def->has_key) {
1330     // We should sort this vector. Find the key first.
1331     const FieldDef *key = nullptr;
1332     for (auto it = type.struct_def->fields.vec.begin();
1333          it != type.struct_def->fields.vec.end(); ++it) {
1334       if ((*it)->key) {
1335         key = (*it);
1336         break;
1337       }
1338     }
1339     FLATBUFFERS_ASSERT(key);
1340     // Now sort it.
1341     // We can't use std::sort because for structs the size is not known at
1342     // compile time, and for tables our iterators dereference offsets, so can't
1343     // be used to swap elements.
1344     // And we can't use C qsort either, since that would force use to use
1345     // globals, making parsing thread-unsafe.
1346     // So for now, we use SimpleQsort above.
1347     // TODO: replace with something better, preferably not recursive.
1348     static voffset_t offset = key->value.offset;
1349     static BaseType ftype = key->value.type.base_type;
1350 
1351     if (type.struct_def->fixed) {
1352       auto v =
1353           reinterpret_cast<VectorOfAny *>(builder_.GetCurrentBufferPointer());
1354       SimpleQsort<uint8_t>(
1355           v->Data(), v->Data() + v->size() * type.struct_def->bytesize,
1356           type.struct_def->bytesize,
1357           [](const uint8_t *a, const uint8_t *b) -> bool {
1358             return CompareType(a + offset, b + offset, ftype);
1359           },
1360           [&](uint8_t *a, uint8_t *b) {
1361             // FIXME: faster?
1362             for (size_t i = 0; i < type.struct_def->bytesize; i++) {
1363               std::swap(a[i], b[i]);
1364             }
1365           });
1366     } else {
1367       auto v = reinterpret_cast<Vector<Offset<Table>> *>(
1368           builder_.GetCurrentBufferPointer());
1369       // Here also can't use std::sort. We do have an iterator type for it,
1370       // but it is non-standard as it will dereference the offsets, and thus
1371       // can't be used to swap elements.
1372       SimpleQsort<Offset<Table>>(
1373           v->data(), v->data() + v->size(), 1,
1374           [](const Offset<Table> *_a, const Offset<Table> *_b) -> bool {
1375             // Indirect offset pointer to table pointer.
1376             auto a = reinterpret_cast<const uint8_t *>(_a) +
1377                      ReadScalar<uoffset_t>(_a);
1378             auto b = reinterpret_cast<const uint8_t *>(_b) +
1379                      ReadScalar<uoffset_t>(_b);
1380             // Fetch field address from table.
1381             a = reinterpret_cast<const Table *>(a)->GetAddressOf(offset);
1382             b = reinterpret_cast<const Table *>(b)->GetAddressOf(offset);
1383             return CompareType(a, b, ftype);
1384           },
1385           [&](Offset<Table> *a, Offset<Table> *b) {
1386             // These are serialized offsets, so are relative where they are
1387             // stored in memory, so compute the distance between these pointers:
1388             ptrdiff_t diff = (b - a) * sizeof(Offset<Table>);
1389             FLATBUFFERS_ASSERT(diff >= 0);  // Guaranteed by SimpleQsort.
1390             auto udiff = static_cast<uoffset_t>(diff);
1391             a->o = EndianScalar(ReadScalar<uoffset_t>(a) - udiff);
1392             b->o = EndianScalar(ReadScalar<uoffset_t>(b) + udiff);
1393             std::swap(*a, *b);
1394           });
1395     }
1396   }
1397   return NoError();
1398 }
1399 
ParseArray(Value & array)1400 CheckedError Parser::ParseArray(Value &array) {
1401   std::vector<Value> stack;
1402   FlatBufferBuilder builder;
1403   const auto &type = array.type.VectorType();
1404   auto length = array.type.fixed_length;
1405   uoffset_t count = 0;
1406   auto err = ParseVectorDelimiters(count, [&](uoffset_t &) -> CheckedError {
1407     vector_emplace_back(&stack, Value());
1408     auto &val = stack.back();
1409     val.type = type;
1410     if (IsStruct(type)) {
1411       ECHECK(ParseTable(*val.type.struct_def, &val.constant, nullptr));
1412     } else {
1413       ECHECK(ParseSingleValue(nullptr, val, false));
1414     }
1415     return NoError();
1416   });
1417   ECHECK(err);
1418   if (length != count) return Error("Fixed-length array size is incorrect.");
1419 
1420   for (auto it = stack.rbegin(); it != stack.rend(); ++it) {
1421     auto &val = *it;
1422     // clang-format off
1423     switch (val.type.base_type) {
1424       #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, ...) \
1425         case BASE_TYPE_ ## ENUM: \
1426           if (IsStruct(val.type)) { \
1427             SerializeStruct(builder, *val.type.struct_def, val); \
1428           } else { \
1429             CTYPE elem; \
1430             ECHECK(atot(val.constant.c_str(), *this, &elem)); \
1431             builder.PushElement(elem); \
1432           } \
1433         break;
1434         FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
1435       #undef FLATBUFFERS_TD
1436       default: FLATBUFFERS_ASSERT(0);
1437     }
1438     // clang-format on
1439   }
1440 
1441   array.constant.assign(
1442       reinterpret_cast<const char *>(builder.GetCurrentBufferPointer()),
1443       InlineSize(array.type));
1444   return NoError();
1445 }
1446 
ParseNestedFlatbuffer(Value & val,FieldDef * field,size_t fieldn,const StructDef * parent_struct_def)1447 CheckedError Parser::ParseNestedFlatbuffer(Value &val, FieldDef *field,
1448                                            size_t fieldn,
1449                                            const StructDef *parent_struct_def) {
1450   if (token_ == '[') {  // backwards compat for 'legacy' ubyte buffers
1451     ECHECK(ParseAnyValue(val, field, fieldn, parent_struct_def, 0));
1452   } else {
1453     auto cursor_at_value_begin = cursor_;
1454     ECHECK(SkipAnyJsonValue());
1455     std::string substring(cursor_at_value_begin - 1, cursor_ - 1);
1456 
1457     // Create and initialize new parser
1458     Parser nested_parser;
1459     FLATBUFFERS_ASSERT(field->nested_flatbuffer);
1460     nested_parser.root_struct_def_ = field->nested_flatbuffer;
1461     nested_parser.enums_ = enums_;
1462     nested_parser.opts = opts;
1463     nested_parser.uses_flexbuffers_ = uses_flexbuffers_;
1464 
1465     // Parse JSON substring into new flatbuffer builder using nested_parser
1466     bool ok = nested_parser.Parse(substring.c_str(), nullptr, nullptr);
1467 
1468     // Clean nested_parser to avoid deleting the elements in
1469     // the SymbolTables on destruction
1470     nested_parser.enums_.dict.clear();
1471     nested_parser.enums_.vec.clear();
1472 
1473     if (!ok) { ECHECK(Error(nested_parser.error_)); }
1474     // Force alignment for nested flatbuffer
1475     builder_.ForceVectorAlignment(
1476         nested_parser.builder_.GetSize(), sizeof(uint8_t),
1477         nested_parser.builder_.GetBufferMinAlignment());
1478 
1479     auto off = builder_.CreateVector(nested_parser.builder_.GetBufferPointer(),
1480                                      nested_parser.builder_.GetSize());
1481     val.constant = NumToString(off.o);
1482   }
1483   return NoError();
1484 }
1485 
ParseMetaData(SymbolTable<Value> * attributes)1486 CheckedError Parser::ParseMetaData(SymbolTable<Value> *attributes) {
1487   if (Is('(')) {
1488     NEXT();
1489     for (;;) {
1490       auto name = attribute_;
1491       if (false == (Is(kTokenIdentifier) || Is(kTokenStringConstant)))
1492         return Error("attribute name must be either identifier or string: " +
1493                      name);
1494       if (known_attributes_.find(name) == known_attributes_.end())
1495         return Error("user define attributes must be declared before use: " +
1496                      name);
1497       NEXT();
1498       auto e = new Value();
1499       attributes->Add(name, e);
1500       if (Is(':')) {
1501         NEXT();
1502         ECHECK(ParseSingleValue(&name, *e, true));
1503       }
1504       if (Is(')')) {
1505         NEXT();
1506         break;
1507       }
1508       EXPECT(',');
1509     }
1510   }
1511   return NoError();
1512 }
1513 
TryTypedValue(const std::string * name,int dtoken,bool check,Value & e,BaseType req,bool * destmatch)1514 CheckedError Parser::TryTypedValue(const std::string *name, int dtoken,
1515                                    bool check, Value &e, BaseType req,
1516                                    bool *destmatch) {
1517   bool match = dtoken == token_;
1518   if (match) {
1519     FLATBUFFERS_ASSERT(*destmatch == false);
1520     *destmatch = true;
1521     e.constant = attribute_;
1522     // Check token match
1523     if (!check) {
1524       if (e.type.base_type == BASE_TYPE_NONE) {
1525         e.type.base_type = req;
1526       } else {
1527         return Error(
1528             std::string("type mismatch: expecting: ") +
1529             kTypeNames[e.type.base_type] + ", found: " + kTypeNames[req] +
1530             ", name: " + (name ? *name : "") + ", value: " + e.constant);
1531       }
1532     }
1533     // The exponent suffix of hexadecimal float-point number is mandatory.
1534     // A hex-integer constant is forbidden as an initializer of float number.
1535     if ((kTokenFloatConstant != dtoken) && IsFloat(e.type.base_type)) {
1536       const auto &s = e.constant;
1537       const auto k = s.find_first_of("0123456789.");
1538       if ((std::string::npos != k) && (s.length() > (k + 1)) &&
1539           (s[k] == '0' && is_alpha_char(s[k + 1], 'X')) &&
1540           (std::string::npos == s.find_first_of("pP", k + 2))) {
1541         return Error(
1542             "invalid number, the exponent suffix of hexadecimal "
1543             "floating-point literals is mandatory: \"" +
1544             s + "\"");
1545       }
1546     }
1547 
1548     NEXT();
1549   }
1550   return NoError();
1551 }
1552 
ParseEnumFromString(const Type & type,std::string * result)1553 CheckedError Parser::ParseEnumFromString(const Type &type,
1554                                          std::string *result) {
1555   const auto base_type =
1556       type.enum_def ? type.enum_def->underlying_type.base_type : type.base_type;
1557   if (!IsInteger(base_type)) return Error("not a valid value for this field");
1558   uint64_t u64 = 0;
1559   for (size_t pos = 0; pos != std::string::npos;) {
1560     const auto delim = attribute_.find_first_of(' ', pos);
1561     const auto last = (std::string::npos == delim);
1562     auto word = attribute_.substr(pos, !last ? delim - pos : std::string::npos);
1563     pos = !last ? delim + 1 : std::string::npos;
1564     const EnumVal *ev = nullptr;
1565     if (type.enum_def) {
1566       ev = type.enum_def->Lookup(word);
1567     } else {
1568       auto dot = word.find_first_of('.');
1569       if (std::string::npos == dot)
1570         return Error("enum values need to be qualified by an enum type");
1571       auto enum_def_str = word.substr(0, dot);
1572       const auto enum_def = LookupEnum(enum_def_str);
1573       if (!enum_def) return Error("unknown enum: " + enum_def_str);
1574       auto enum_val_str = word.substr(dot + 1);
1575       ev = enum_def->Lookup(enum_val_str);
1576     }
1577     if (!ev) return Error("unknown enum value: " + word);
1578     u64 |= ev->GetAsUInt64();
1579   }
1580   *result = IsUnsigned(base_type) ? NumToString(u64)
1581                                   : NumToString(static_cast<int64_t>(u64));
1582   return NoError();
1583 }
1584 
ParseHash(Value & e,FieldDef * field)1585 CheckedError Parser::ParseHash(Value &e, FieldDef *field) {
1586   FLATBUFFERS_ASSERT(field);
1587   Value *hash_name = field->attributes.Lookup("hash");
1588   switch (e.type.base_type) {
1589     case BASE_TYPE_SHORT: {
1590       auto hash = FindHashFunction16(hash_name->constant.c_str());
1591       int16_t hashed_value = static_cast<int16_t>(hash(attribute_.c_str()));
1592       e.constant = NumToString(hashed_value);
1593       break;
1594     }
1595     case BASE_TYPE_USHORT: {
1596       auto hash = FindHashFunction16(hash_name->constant.c_str());
1597       uint16_t hashed_value = hash(attribute_.c_str());
1598       e.constant = NumToString(hashed_value);
1599       break;
1600     }
1601     case BASE_TYPE_INT: {
1602       auto hash = FindHashFunction32(hash_name->constant.c_str());
1603       int32_t hashed_value = static_cast<int32_t>(hash(attribute_.c_str()));
1604       e.constant = NumToString(hashed_value);
1605       break;
1606     }
1607     case BASE_TYPE_UINT: {
1608       auto hash = FindHashFunction32(hash_name->constant.c_str());
1609       uint32_t hashed_value = hash(attribute_.c_str());
1610       e.constant = NumToString(hashed_value);
1611       break;
1612     }
1613     case BASE_TYPE_LONG: {
1614       auto hash = FindHashFunction64(hash_name->constant.c_str());
1615       int64_t hashed_value = static_cast<int64_t>(hash(attribute_.c_str()));
1616       e.constant = NumToString(hashed_value);
1617       break;
1618     }
1619     case BASE_TYPE_ULONG: {
1620       auto hash = FindHashFunction64(hash_name->constant.c_str());
1621       uint64_t hashed_value = hash(attribute_.c_str());
1622       e.constant = NumToString(hashed_value);
1623       break;
1624     }
1625     default: FLATBUFFERS_ASSERT(0);
1626   }
1627   NEXT();
1628   return NoError();
1629 }
1630 
TokenError()1631 CheckedError Parser::TokenError() {
1632   return Error("cannot parse value starting with: " + TokenToStringId(token_));
1633 }
1634 
1635 // Re-pack helper (ParseSingleValue) to normalize defaults of scalars.
SingleValueRepack(Value & e,T val)1636 template<typename T> inline void SingleValueRepack(Value &e, T val) {
1637   // Remove leading zeros.
1638   if (IsInteger(e.type.base_type)) { e.constant = NumToString(val); }
1639 }
1640 #if defined(FLATBUFFERS_HAS_NEW_STRTOD) && (FLATBUFFERS_HAS_NEW_STRTOD > 0)
1641 // Normilaze defaults NaN to unsigned quiet-NaN(0).
SingleValueRepack(Value & e,float val)1642 static inline void SingleValueRepack(Value &e, float val) {
1643   if (val != val) e.constant = "nan";
1644 }
SingleValueRepack(Value & e,double val)1645 static inline void SingleValueRepack(Value &e, double val) {
1646   if (val != val) e.constant = "nan";
1647 }
1648 #endif
1649 
ParseSingleValue(const std::string * name,Value & e,bool check_now)1650 CheckedError Parser::ParseSingleValue(const std::string *name, Value &e,
1651                                       bool check_now) {
1652   // First see if this could be a conversion function:
1653   if (token_ == kTokenIdentifier && *cursor_ == '(') {
1654     // todo: Extract processing of conversion functions to ParseFunction.
1655     const auto functionname = attribute_;
1656     if (!IsFloat(e.type.base_type)) {
1657       return Error(functionname + ": type of argument mismatch, expecting: " +
1658                    kTypeNames[BASE_TYPE_DOUBLE] +
1659                    ", found: " + kTypeNames[e.type.base_type] +
1660                    ", name: " + (name ? *name : "") + ", value: " + e.constant);
1661     }
1662     NEXT();
1663     EXPECT('(');
1664     ECHECK(Recurse([&]() { return ParseSingleValue(name, e, false); }));
1665     EXPECT(')');
1666     // calculate with double precision
1667     double x, y = 0.0;
1668     ECHECK(atot(e.constant.c_str(), *this, &x));
1669     auto func_match = false;
1670     // clang-format off
1671     #define FLATBUFFERS_FN_DOUBLE(name, op) \
1672       if (!func_match && functionname == name) { y = op; func_match = true; }
1673     FLATBUFFERS_FN_DOUBLE("deg", x / kPi * 180);
1674     FLATBUFFERS_FN_DOUBLE("rad", x * kPi / 180);
1675     FLATBUFFERS_FN_DOUBLE("sin", sin(x));
1676     FLATBUFFERS_FN_DOUBLE("cos", cos(x));
1677     FLATBUFFERS_FN_DOUBLE("tan", tan(x));
1678     FLATBUFFERS_FN_DOUBLE("asin", asin(x));
1679     FLATBUFFERS_FN_DOUBLE("acos", acos(x));
1680     FLATBUFFERS_FN_DOUBLE("atan", atan(x));
1681     // TODO(wvo): add more useful conversion functions here.
1682     #undef FLATBUFFERS_FN_DOUBLE
1683     // clang-format on
1684     if (true != func_match) {
1685       return Error(std::string("Unknown conversion function: ") + functionname +
1686                    ", field name: " + (name ? *name : "") +
1687                    ", value: " + e.constant);
1688     }
1689     e.constant = NumToString(y);
1690     return NoError();
1691   }
1692 
1693   auto match = false;
1694   const auto in_type = e.type.base_type;
1695   // clang-format off
1696   #define IF_ECHECK_(force, dtoken, check, req)    \
1697     if (!match && ((check) || IsConstTrue(force))) \
1698     ECHECK(TryTypedValue(name, dtoken, check, e, req, &match))
1699   #define TRY_ECHECK(dtoken, check, req) IF_ECHECK_(false, dtoken, check, req)
1700   #define FORCE_ECHECK(dtoken, check, req) IF_ECHECK_(true, dtoken, check, req)
1701   // clang-format on
1702 
1703   if (token_ == kTokenStringConstant || token_ == kTokenIdentifier) {
1704     const auto kTokenStringOrIdent = token_;
1705     // The string type is a most probable type, check it first.
1706     TRY_ECHECK(kTokenStringConstant, in_type == BASE_TYPE_STRING,
1707                BASE_TYPE_STRING);
1708 
1709     // avoid escaped and non-ascii in the string
1710     if (!match && (token_ == kTokenStringConstant) && IsScalar(in_type) &&
1711         !attr_is_trivial_ascii_string_) {
1712       return Error(
1713           std::string("type mismatch or invalid value, an initializer of "
1714                       "non-string field must be trivial ASCII string: type: ") +
1715           kTypeNames[in_type] + ", name: " + (name ? *name : "") +
1716           ", value: " + attribute_);
1717     }
1718 
1719     // A boolean as true/false. Boolean as Integer check below.
1720     if (!match && IsBool(in_type)) {
1721       auto is_true = attribute_ == "true";
1722       if (is_true || attribute_ == "false") {
1723         attribute_ = is_true ? "1" : "0";
1724         // accepts both kTokenStringConstant and kTokenIdentifier
1725         TRY_ECHECK(kTokenStringOrIdent, IsBool(in_type), BASE_TYPE_BOOL);
1726       }
1727     }
1728     // Check if this could be a string/identifier enum value.
1729     // Enum can have only true integer base type.
1730     if (!match && IsInteger(in_type) && !IsBool(in_type) &&
1731         IsIdentifierStart(*attribute_.c_str())) {
1732       ECHECK(ParseEnumFromString(e.type, &e.constant));
1733       NEXT();
1734       match = true;
1735     }
1736     // Parse a float/integer number from the string.
1737     if (!match) check_now = true;  // Re-pack if parsed from string literal.
1738     if (!match && (token_ == kTokenStringConstant) && IsScalar(in_type)) {
1739       // remove trailing whitespaces from attribute_
1740       auto last = attribute_.find_last_not_of(' ');
1741       if (std::string::npos != last)  // has non-whitespace
1742         attribute_.resize(last + 1);
1743     }
1744     // Float numbers or nan, inf, pi, etc.
1745     TRY_ECHECK(kTokenStringOrIdent, IsFloat(in_type), BASE_TYPE_FLOAT);
1746     // An integer constant in string.
1747     TRY_ECHECK(kTokenStringOrIdent, IsInteger(in_type), BASE_TYPE_INT);
1748     // Unknown tokens will be interpreted as string type.
1749     // An attribute value may be a scalar or string constant.
1750     FORCE_ECHECK(kTokenStringConstant, in_type == BASE_TYPE_STRING,
1751                  BASE_TYPE_STRING);
1752   } else {
1753     // Try a float number.
1754     TRY_ECHECK(kTokenFloatConstant, IsFloat(in_type), BASE_TYPE_FLOAT);
1755     // Integer token can init any scalar (integer of float).
1756     FORCE_ECHECK(kTokenIntegerConstant, IsScalar(in_type), BASE_TYPE_INT);
1757   }
1758 #undef FORCE_ECHECK
1759 #undef TRY_ECHECK
1760 #undef IF_ECHECK_
1761 
1762   if (!match) {
1763     std::string msg;
1764     msg += "Cannot assign token starting with '" + TokenToStringId(token_) +
1765            "' to value of <" + std::string(kTypeNames[in_type]) + "> type.";
1766     return Error(msg);
1767   }
1768   const auto match_type = e.type.base_type;  // may differ from in_type
1769   // The check_now flag must be true when parse a fbs-schema.
1770   // This flag forces to check default scalar values or metadata of field.
1771   // For JSON parser the flag should be false.
1772   // If it is set for JSON each value will be checked twice (see ParseTable).
1773   if (check_now && IsScalar(match_type)) {
1774     // clang-format off
1775     switch (match_type) {
1776     #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, ...) \
1777       case BASE_TYPE_ ## ENUM: {\
1778           CTYPE val; \
1779           ECHECK(atot(e.constant.c_str(), *this, &val)); \
1780           SingleValueRepack(e, val); \
1781         break; }
1782     FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD)
1783     #undef FLATBUFFERS_TD
1784     default: break;
1785     }
1786     // clang-format on
1787   }
1788   return NoError();
1789 }
1790 
LookupCreateStruct(const std::string & name,bool create_if_new,bool definition)1791 StructDef *Parser::LookupCreateStruct(const std::string &name,
1792                                       bool create_if_new, bool definition) {
1793   std::string qualified_name = current_namespace_->GetFullyQualifiedName(name);
1794   // See if it exists pre-declared by an unqualified use.
1795   auto struct_def = LookupStruct(name);
1796   if (struct_def && struct_def->predecl) {
1797     if (definition) {
1798       // Make sure it has the current namespace, and is registered under its
1799       // qualified name.
1800       struct_def->defined_namespace = current_namespace_;
1801       structs_.Move(name, qualified_name);
1802     }
1803     return struct_def;
1804   }
1805   // See if it exists pre-declared by an qualified use.
1806   struct_def = LookupStruct(qualified_name);
1807   if (struct_def && struct_def->predecl) {
1808     if (definition) {
1809       // Make sure it has the current namespace.
1810       struct_def->defined_namespace = current_namespace_;
1811     }
1812     return struct_def;
1813   }
1814   if (!definition) {
1815     // Search thru parent namespaces.
1816     for (size_t components = current_namespace_->components.size();
1817          components && !struct_def; components--) {
1818       struct_def = LookupStruct(
1819           current_namespace_->GetFullyQualifiedName(name, components - 1));
1820     }
1821   }
1822   if (!struct_def && create_if_new) {
1823     struct_def = new StructDef();
1824     if (definition) {
1825       structs_.Add(qualified_name, struct_def);
1826       struct_def->name = name;
1827       struct_def->defined_namespace = current_namespace_;
1828     } else {
1829       // Not a definition.
1830       // Rather than failing, we create a "pre declared" StructDef, due to
1831       // circular references, and check for errors at the end of parsing.
1832       // It is defined in the current namespace, as the best guess what the
1833       // final namespace will be.
1834       structs_.Add(name, struct_def);
1835       struct_def->name = name;
1836       struct_def->defined_namespace = current_namespace_;
1837       struct_def->original_location.reset(
1838           new std::string(file_being_parsed_ + ":" + NumToString(line_)));
1839     }
1840   }
1841   return struct_def;
1842 }
1843 
MinValue() const1844 const EnumVal *EnumDef::MinValue() const {
1845   return vals.vec.empty() ? nullptr : vals.vec.front();
1846 }
MaxValue() const1847 const EnumVal *EnumDef::MaxValue() const {
1848   return vals.vec.empty() ? nullptr : vals.vec.back();
1849 }
1850 
EnumDistanceImpl(T e1,T e2)1851 template<typename T> static uint64_t EnumDistanceImpl(T e1, T e2) {
1852   if (e1 < e2) { std::swap(e1, e2); }  // use std for scalars
1853   // Signed overflow may occur, use unsigned calculation.
1854   // The unsigned overflow is well-defined by C++ standard (modulo 2^n).
1855   return static_cast<uint64_t>(e1) - static_cast<uint64_t>(e2);
1856 }
1857 
Distance(const EnumVal * v1,const EnumVal * v2) const1858 uint64_t EnumDef::Distance(const EnumVal *v1, const EnumVal *v2) const {
1859   return IsUInt64() ? EnumDistanceImpl(v1->GetAsUInt64(), v2->GetAsUInt64())
1860                     : EnumDistanceImpl(v1->GetAsInt64(), v2->GetAsInt64());
1861 }
1862 
AllFlags() const1863 std::string EnumDef::AllFlags() const {
1864   FLATBUFFERS_ASSERT(attributes.Lookup("bit_flags"));
1865   uint64_t u64 = 0;
1866   for (auto it = Vals().begin(); it != Vals().end(); ++it) {
1867     u64 |= (*it)->GetAsUInt64();
1868   }
1869   return IsUInt64() ? NumToString(u64) : NumToString(static_cast<int64_t>(u64));
1870 }
1871 
ReverseLookup(int64_t enum_idx,bool skip_union_default) const1872 EnumVal *EnumDef::ReverseLookup(int64_t enum_idx,
1873                                 bool skip_union_default) const {
1874   auto skip_first = static_cast<int>(is_union && skip_union_default);
1875   for (auto it = Vals().begin() + skip_first; it != Vals().end(); ++it) {
1876     if ((*it)->GetAsInt64() == enum_idx) { return *it; }
1877   }
1878   return nullptr;
1879 }
1880 
FindByValue(const std::string & constant) const1881 EnumVal *EnumDef::FindByValue(const std::string &constant) const {
1882   int64_t i64;
1883   auto done = false;
1884   if (IsUInt64()) {
1885     uint64_t u64;  // avoid reinterpret_cast of pointers
1886     done = StringToNumber(constant.c_str(), &u64);
1887     i64 = static_cast<int64_t>(u64);
1888   } else {
1889     done = StringToNumber(constant.c_str(), &i64);
1890   }
1891   FLATBUFFERS_ASSERT(done);
1892   if (!done) return nullptr;
1893   return ReverseLookup(i64, false);
1894 }
1895 
SortByValue()1896 void EnumDef::SortByValue() {
1897   auto &v = vals.vec;
1898   if (IsUInt64())
1899     std::sort(v.begin(), v.end(), [](const EnumVal *e1, const EnumVal *e2) {
1900       return e1->GetAsUInt64() < e2->GetAsUInt64();
1901     });
1902   else
1903     std::sort(v.begin(), v.end(), [](const EnumVal *e1, const EnumVal *e2) {
1904       return e1->GetAsInt64() < e2->GetAsInt64();
1905     });
1906 }
1907 
RemoveDuplicates()1908 void EnumDef::RemoveDuplicates() {
1909   // This method depends form SymbolTable implementation!
1910   // 1) vals.vec - owner (raw pointer)
1911   // 2) vals.dict - access map
1912   auto first = vals.vec.begin();
1913   auto last = vals.vec.end();
1914   if (first == last) return;
1915   auto result = first;
1916   while (++first != last) {
1917     if ((*result)->value != (*first)->value) {
1918       *(++result) = *first;
1919     } else {
1920       auto ev = *first;
1921       for (auto it = vals.dict.begin(); it != vals.dict.end(); ++it) {
1922         if (it->second == ev) it->second = *result;  // reassign
1923       }
1924       delete ev;  // delete enum value
1925       *first = nullptr;
1926     }
1927   }
1928   vals.vec.erase(++result, last);
1929 }
1930 
ChangeEnumValue(EnumVal * ev,T new_value)1931 template<typename T> void EnumDef::ChangeEnumValue(EnumVal *ev, T new_value) {
1932   ev->value = static_cast<int64_t>(new_value);
1933 }
1934 
1935 namespace EnumHelper {
1936 template<BaseType E> struct EnumValType { typedef int64_t type; };
1937 template<> struct EnumValType<BASE_TYPE_ULONG> { typedef uint64_t type; };
1938 }  // namespace EnumHelper
1939 
1940 struct EnumValBuilder {
CreateEnumeratorflatbuffers::EnumValBuilder1941   EnumVal *CreateEnumerator(const std::string &ev_name) {
1942     FLATBUFFERS_ASSERT(!temp);
1943     auto first = enum_def.vals.vec.empty();
1944     user_value = first;
1945     temp = new EnumVal(ev_name, first ? 0 : enum_def.vals.vec.back()->value);
1946     return temp;
1947   }
1948 
CreateEnumeratorflatbuffers::EnumValBuilder1949   EnumVal *CreateEnumerator(const std::string &ev_name, int64_t val) {
1950     FLATBUFFERS_ASSERT(!temp);
1951     user_value = true;
1952     temp = new EnumVal(ev_name, val);
1953     return temp;
1954   }
1955 
AcceptEnumeratorflatbuffers::EnumValBuilder1956   FLATBUFFERS_CHECKED_ERROR AcceptEnumerator(const std::string &name) {
1957     FLATBUFFERS_ASSERT(temp);
1958     ECHECK(ValidateValue(&temp->value, false == user_value));
1959     FLATBUFFERS_ASSERT((temp->union_type.enum_def == nullptr) ||
1960                        (temp->union_type.enum_def == &enum_def));
1961     auto not_unique = enum_def.vals.Add(name, temp);
1962     temp = nullptr;
1963     if (not_unique) return parser.Error("enum value already exists: " + name);
1964     return NoError();
1965   }
1966 
AcceptEnumeratorflatbuffers::EnumValBuilder1967   FLATBUFFERS_CHECKED_ERROR AcceptEnumerator() {
1968     return AcceptEnumerator(temp->name);
1969   }
1970 
AssignEnumeratorValueflatbuffers::EnumValBuilder1971   FLATBUFFERS_CHECKED_ERROR AssignEnumeratorValue(const std::string &value) {
1972     user_value = true;
1973     auto fit = false;
1974     auto ascending = false;
1975     if (enum_def.IsUInt64()) {
1976       uint64_t u64;
1977       fit = StringToNumber(value.c_str(), &u64);
1978       ascending = u64 > temp->GetAsUInt64();
1979       temp->value = static_cast<int64_t>(u64);  // well-defined since C++20.
1980     } else {
1981       int64_t i64;
1982       fit = StringToNumber(value.c_str(), &i64);
1983       ascending = i64 > temp->GetAsInt64();
1984       temp->value = i64;
1985     }
1986     if (!fit) return parser.Error("enum value does not fit, \"" + value + "\"");
1987     if (!ascending && strict_ascending && !enum_def.vals.vec.empty())
1988       return parser.Error("enum values must be specified in ascending order");
1989     return NoError();
1990   }
1991 
1992   template<BaseType E, typename CTYPE>
ValidateImplflatbuffers::EnumValBuilder1993   inline FLATBUFFERS_CHECKED_ERROR ValidateImpl(int64_t *ev, int m) {
1994     typedef typename EnumHelper::EnumValType<E>::type T;  // int64_t or uint64_t
1995     static_assert(sizeof(T) == sizeof(int64_t), "invalid EnumValType");
1996     const auto v = static_cast<T>(*ev);
1997     auto up = static_cast<T>((flatbuffers::numeric_limits<CTYPE>::max)());
1998     auto dn = static_cast<T>((flatbuffers::numeric_limits<CTYPE>::lowest)());
1999     if (v < dn || v > (up - m)) {
2000       return parser.Error("enum value does not fit, \"" + NumToString(v) +
2001                           (m ? " + 1\"" : "\"") + " out of " +
2002                           TypeToIntervalString<CTYPE>());
2003     }
2004     *ev = static_cast<int64_t>(v + m);  // well-defined since C++20.
2005     return NoError();
2006   }
2007 
ValidateValueflatbuffers::EnumValBuilder2008   FLATBUFFERS_CHECKED_ERROR ValidateValue(int64_t *ev, bool next) {
2009     // clang-format off
2010     switch (enum_def.underlying_type.base_type) {
2011     #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, ...)                   \
2012       case BASE_TYPE_##ENUM: {                                          \
2013         if (!IsInteger(BASE_TYPE_##ENUM)) break;                        \
2014         return ValidateImpl<BASE_TYPE_##ENUM, CTYPE>(ev, next ? 1 : 0); \
2015       }
2016       FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD)
2017     #undef FLATBUFFERS_TD
2018     default: break;
2019     }
2020     // clang-format on
2021     return parser.Error("fatal: invalid enum underlying type");
2022   }
2023 
EnumValBuilderflatbuffers::EnumValBuilder2024   EnumValBuilder(Parser &_parser, EnumDef &_enum_def, bool strict_order = true)
2025       : parser(_parser),
2026         enum_def(_enum_def),
2027         temp(nullptr),
2028         strict_ascending(strict_order),
2029         user_value(false) {}
2030 
~EnumValBuilderflatbuffers::EnumValBuilder2031   ~EnumValBuilder() { delete temp; }
2032 
2033   Parser &parser;
2034   EnumDef &enum_def;
2035   EnumVal *temp;
2036   const bool strict_ascending;
2037   bool user_value;
2038 };
2039 
ParseEnum(const bool is_union,EnumDef ** dest)2040 CheckedError Parser::ParseEnum(const bool is_union, EnumDef **dest) {
2041   std::vector<std::string> enum_comment = doc_comment_;
2042   NEXT();
2043   std::string enum_name = attribute_;
2044   EXPECT(kTokenIdentifier);
2045   EnumDef *enum_def;
2046   ECHECK(StartEnum(enum_name, is_union, &enum_def));
2047   enum_def->doc_comment = enum_comment;
2048   if (!is_union && !opts.proto_mode) {
2049     // Give specialized error message, since this type spec used to
2050     // be optional in the first FlatBuffers release.
2051     if (!Is(':')) {
2052       return Error(
2053           "must specify the underlying integer type for this"
2054           " enum (e.g. \': short\', which was the default).");
2055     } else {
2056       NEXT();
2057     }
2058     // Specify the integer type underlying this enum.
2059     ECHECK(ParseType(enum_def->underlying_type));
2060     if (!IsInteger(enum_def->underlying_type.base_type) ||
2061         IsBool(enum_def->underlying_type.base_type))
2062       return Error("underlying enum type must be integral");
2063     // Make this type refer back to the enum it was derived from.
2064     enum_def->underlying_type.enum_def = enum_def;
2065   }
2066   ECHECK(ParseMetaData(&enum_def->attributes));
2067   const auto underlying_type = enum_def->underlying_type.base_type;
2068   if (enum_def->attributes.Lookup("bit_flags") &&
2069       !IsUnsigned(underlying_type)) {
2070     // todo: Convert to the Error in the future?
2071     Warning("underlying type of bit_flags enum must be unsigned");
2072   }
2073   // Protobuf allows them to be specified in any order, so sort afterwards.
2074   const auto strict_ascending = (false == opts.proto_mode);
2075   EnumValBuilder evb(*this, *enum_def, strict_ascending);
2076   EXPECT('{');
2077   // A lot of code generatos expect that an enum is not-empty.
2078   if ((is_union || Is('}')) && !opts.proto_mode) {
2079     evb.CreateEnumerator("NONE");
2080     ECHECK(evb.AcceptEnumerator());
2081   }
2082   std::set<std::pair<BaseType, StructDef *>> union_types;
2083   while (!Is('}')) {
2084     if (opts.proto_mode && attribute_ == "option") {
2085       ECHECK(ParseProtoOption());
2086     } else {
2087       auto &ev = *evb.CreateEnumerator(attribute_);
2088       auto full_name = ev.name;
2089       ev.doc_comment = doc_comment_;
2090       EXPECT(kTokenIdentifier);
2091       if (is_union) {
2092         ECHECK(ParseNamespacing(&full_name, &ev.name));
2093         if (opts.union_value_namespacing) {
2094           // Since we can't namespace the actual enum identifiers, turn
2095           // namespace parts into part of the identifier.
2096           ev.name = full_name;
2097           std::replace(ev.name.begin(), ev.name.end(), '.', '_');
2098         }
2099         if (Is(':')) {
2100           NEXT();
2101           ECHECK(ParseType(ev.union_type));
2102           if (ev.union_type.base_type != BASE_TYPE_STRUCT &&
2103               ev.union_type.base_type != BASE_TYPE_STRING)
2104             return Error("union value type may only be table/struct/string");
2105         } else {
2106           ev.union_type = Type(BASE_TYPE_STRUCT, LookupCreateStruct(full_name));
2107         }
2108         if (!enum_def->uses_multiple_type_instances) {
2109           auto ins = union_types.insert(std::make_pair(
2110               ev.union_type.base_type, ev.union_type.struct_def));
2111           enum_def->uses_multiple_type_instances = (false == ins.second);
2112         }
2113       }
2114 
2115       if (Is('=')) {
2116         NEXT();
2117         ECHECK(evb.AssignEnumeratorValue(attribute_));
2118         EXPECT(kTokenIntegerConstant);
2119       } else if (false == strict_ascending) {
2120         // The opts.proto_mode flag is active.
2121         return Error("Protobuf mode doesn't allow implicit enum values.");
2122       }
2123 
2124       ECHECK(evb.AcceptEnumerator());
2125 
2126       if (opts.proto_mode && Is('[')) {
2127         NEXT();
2128         // ignore attributes on enums.
2129         while (token_ != ']') NEXT();
2130         NEXT();
2131       }
2132     }
2133     if (!Is(opts.proto_mode ? ';' : ',')) break;
2134     NEXT();
2135   }
2136   EXPECT('}');
2137 
2138   // At this point, the enum can be empty if input is invalid proto-file.
2139   if (!enum_def->size())
2140     return Error("incomplete enum declaration, values not found");
2141 
2142   if (enum_def->attributes.Lookup("bit_flags")) {
2143     const auto base_width = static_cast<uint64_t>(8 * SizeOf(underlying_type));
2144     for (auto it = enum_def->Vals().begin(); it != enum_def->Vals().end();
2145          ++it) {
2146       auto ev = *it;
2147       const auto u = ev->GetAsUInt64();
2148       // Stop manipulations with the sign.
2149       if (!IsUnsigned(underlying_type) && u == (base_width - 1))
2150         return Error("underlying type of bit_flags enum must be unsigned");
2151       if (u >= base_width)
2152         return Error("bit flag out of range of underlying integral type");
2153       enum_def->ChangeEnumValue(ev, 1ULL << u);
2154     }
2155   }
2156 
2157   if (false == strict_ascending)
2158     enum_def->SortByValue();  // Must be sorted to use MinValue/MaxValue.
2159 
2160   if (dest) *dest = enum_def;
2161   types_.Add(current_namespace_->GetFullyQualifiedName(enum_def->name),
2162              new Type(BASE_TYPE_UNION, nullptr, enum_def));
2163   return NoError();
2164 }
2165 
StartStruct(const std::string & name,StructDef ** dest)2166 CheckedError Parser::StartStruct(const std::string &name, StructDef **dest) {
2167   auto &struct_def = *LookupCreateStruct(name, true, true);
2168   if (!struct_def.predecl) return Error("datatype already exists: " + name);
2169   struct_def.predecl = false;
2170   struct_def.name = name;
2171   struct_def.file = file_being_parsed_;
2172   // Move this struct to the back of the vector just in case it was predeclared,
2173   // to preserve declaration order.
2174   *std::remove(structs_.vec.begin(), structs_.vec.end(), &struct_def) =
2175       &struct_def;
2176   *dest = &struct_def;
2177   return NoError();
2178 }
2179 
CheckClash(std::vector<FieldDef * > & fields,StructDef * struct_def,const char * suffix,BaseType basetype)2180 CheckedError Parser::CheckClash(std::vector<FieldDef *> &fields,
2181                                 StructDef *struct_def, const char *suffix,
2182                                 BaseType basetype) {
2183   auto len = strlen(suffix);
2184   for (auto it = fields.begin(); it != fields.end(); ++it) {
2185     auto &fname = (*it)->name;
2186     if (fname.length() > len &&
2187         fname.compare(fname.length() - len, len, suffix) == 0 &&
2188         (*it)->value.type.base_type != BASE_TYPE_UTYPE) {
2189       auto field =
2190           struct_def->fields.Lookup(fname.substr(0, fname.length() - len));
2191       if (field && field->value.type.base_type == basetype)
2192         return Error("Field " + fname +
2193                      " would clash with generated functions for field " +
2194                      field->name);
2195     }
2196   }
2197   return NoError();
2198 }
2199 
SupportsAdvancedUnionFeatures() const2200 bool Parser::SupportsAdvancedUnionFeatures() const {
2201   return opts.lang_to_generate != 0 &&
2202          (opts.lang_to_generate &
2203           ~(IDLOptions::kCpp | IDLOptions::kJs | IDLOptions::kTs |
2204             IDLOptions::kPhp | IDLOptions::kJava | IDLOptions::kCSharp |
2205             IDLOptions::kKotlin | IDLOptions::kBinary | IDLOptions::kSwift)) ==
2206              0;
2207 }
2208 
SupportsAdvancedArrayFeatures() const2209 bool Parser::SupportsAdvancedArrayFeatures() const {
2210   return (opts.lang_to_generate &
2211           ~(IDLOptions::kCpp | IDLOptions::kPython | IDLOptions::kJava |
2212             IDLOptions::kCSharp | IDLOptions::kJsonSchema | IDLOptions::kJson |
2213             IDLOptions::kBinary)) == 0;
2214 }
2215 
UniqueNamespace(Namespace * ns)2216 Namespace *Parser::UniqueNamespace(Namespace *ns) {
2217   for (auto it = namespaces_.begin(); it != namespaces_.end(); ++it) {
2218     if (ns->components == (*it)->components) {
2219       delete ns;
2220       return *it;
2221     }
2222   }
2223   namespaces_.push_back(ns);
2224   return ns;
2225 }
2226 
UnqualifiedName(const std::string & full_qualified_name)2227 std::string Parser::UnqualifiedName(const std::string &full_qualified_name) {
2228   Namespace *ns = new Namespace();
2229 
2230   std::size_t current, previous = 0;
2231   current = full_qualified_name.find('.');
2232   while (current != std::string::npos) {
2233     ns->components.push_back(
2234         full_qualified_name.substr(previous, current - previous));
2235     previous = current + 1;
2236     current = full_qualified_name.find('.', previous);
2237   }
2238   current_namespace_ = UniqueNamespace(ns);
2239   return full_qualified_name.substr(previous, current - previous);
2240 }
2241 
compareFieldDefs(const FieldDef * a,const FieldDef * b)2242 static bool compareFieldDefs(const FieldDef *a, const FieldDef *b) {
2243   auto a_id = atoi(a->attributes.Lookup("id")->constant.c_str());
2244   auto b_id = atoi(b->attributes.Lookup("id")->constant.c_str());
2245   return a_id < b_id;
2246 }
2247 
ParseDecl()2248 CheckedError Parser::ParseDecl() {
2249   std::vector<std::string> dc = doc_comment_;
2250   bool fixed = IsIdent("struct");
2251   if (!fixed && !IsIdent("table")) return Error("declaration expected");
2252   NEXT();
2253   std::string name = attribute_;
2254   EXPECT(kTokenIdentifier);
2255   StructDef *struct_def;
2256   ECHECK(StartStruct(name, &struct_def));
2257   struct_def->doc_comment = dc;
2258   struct_def->fixed = fixed;
2259   ECHECK(ParseMetaData(&struct_def->attributes));
2260   struct_def->sortbysize =
2261       struct_def->attributes.Lookup("original_order") == nullptr && !fixed;
2262   EXPECT('{');
2263   while (token_ != '}') ECHECK(ParseField(*struct_def));
2264   auto force_align = struct_def->attributes.Lookup("force_align");
2265   if (fixed) {
2266     if (force_align) {
2267       auto align = static_cast<size_t>(atoi(force_align->constant.c_str()));
2268       if (force_align->type.base_type != BASE_TYPE_INT ||
2269           align < struct_def->minalign || align > FLATBUFFERS_MAX_ALIGNMENT ||
2270           align & (align - 1))
2271         return Error(
2272             "force_align must be a power of two integer ranging from the"
2273             "struct\'s natural alignment to " +
2274             NumToString(FLATBUFFERS_MAX_ALIGNMENT));
2275       struct_def->minalign = align;
2276     }
2277     if (!struct_def->bytesize) return Error("size 0 structs not allowed");
2278   }
2279   struct_def->PadLastField(struct_def->minalign);
2280   // Check if this is a table that has manual id assignments
2281   auto &fields = struct_def->fields.vec;
2282   if (!fixed && fields.size()) {
2283     size_t num_id_fields = 0;
2284     for (auto it = fields.begin(); it != fields.end(); ++it) {
2285       if ((*it)->attributes.Lookup("id")) num_id_fields++;
2286     }
2287     // If any fields have ids..
2288     if (num_id_fields) {
2289       // Then all fields must have them.
2290       if (num_id_fields != fields.size())
2291         return Error(
2292             "either all fields or no fields must have an 'id' attribute");
2293       // Simply sort by id, then the fields are the same as if no ids had
2294       // been specified.
2295       std::sort(fields.begin(), fields.end(), compareFieldDefs);
2296       // Verify we have a contiguous set, and reassign vtable offsets.
2297       for (int i = 0; i < static_cast<int>(fields.size()); i++) {
2298         if (i != atoi(fields[i]->attributes.Lookup("id")->constant.c_str()))
2299           return Error("field id\'s must be consecutive from 0, id " +
2300                        NumToString(i) + " missing or set twice");
2301         fields[i]->value.offset = FieldIndexToOffset(static_cast<voffset_t>(i));
2302       }
2303     }
2304   }
2305 
2306   ECHECK(
2307       CheckClash(fields, struct_def, UnionTypeFieldSuffix(), BASE_TYPE_UNION));
2308   ECHECK(CheckClash(fields, struct_def, "Type", BASE_TYPE_UNION));
2309   ECHECK(CheckClash(fields, struct_def, "_length", BASE_TYPE_VECTOR));
2310   ECHECK(CheckClash(fields, struct_def, "Length", BASE_TYPE_VECTOR));
2311   ECHECK(CheckClash(fields, struct_def, "_byte_vector", BASE_TYPE_STRING));
2312   ECHECK(CheckClash(fields, struct_def, "ByteVector", BASE_TYPE_STRING));
2313   EXPECT('}');
2314   types_.Add(current_namespace_->GetFullyQualifiedName(struct_def->name),
2315              new Type(BASE_TYPE_STRUCT, struct_def, nullptr));
2316   return NoError();
2317 }
2318 
ParseService()2319 CheckedError Parser::ParseService() {
2320   std::vector<std::string> service_comment = doc_comment_;
2321   NEXT();
2322   auto service_name = attribute_;
2323   EXPECT(kTokenIdentifier);
2324   auto &service_def = *new ServiceDef();
2325   service_def.name = service_name;
2326   service_def.file = file_being_parsed_;
2327   service_def.doc_comment = service_comment;
2328   service_def.defined_namespace = current_namespace_;
2329   if (services_.Add(current_namespace_->GetFullyQualifiedName(service_name),
2330                     &service_def))
2331     return Error("service already exists: " + service_name);
2332   ECHECK(ParseMetaData(&service_def.attributes));
2333   EXPECT('{');
2334   do {
2335     std::vector<std::string> doc_comment = doc_comment_;
2336     auto rpc_name = attribute_;
2337     EXPECT(kTokenIdentifier);
2338     EXPECT('(');
2339     Type reqtype, resptype;
2340     ECHECK(ParseTypeIdent(reqtype));
2341     EXPECT(')');
2342     EXPECT(':');
2343     ECHECK(ParseTypeIdent(resptype));
2344     if (reqtype.base_type != BASE_TYPE_STRUCT || reqtype.struct_def->fixed ||
2345         resptype.base_type != BASE_TYPE_STRUCT || resptype.struct_def->fixed)
2346       return Error("rpc request and response types must be tables");
2347     auto &rpc = *new RPCCall();
2348     rpc.name = rpc_name;
2349     rpc.request = reqtype.struct_def;
2350     rpc.response = resptype.struct_def;
2351     rpc.doc_comment = doc_comment;
2352     if (service_def.calls.Add(rpc_name, &rpc))
2353       return Error("rpc already exists: " + rpc_name);
2354     ECHECK(ParseMetaData(&rpc.attributes));
2355     EXPECT(';');
2356   } while (token_ != '}');
2357   NEXT();
2358   return NoError();
2359 }
2360 
SetRootType(const char * name)2361 bool Parser::SetRootType(const char *name) {
2362   root_struct_def_ = LookupStruct(name);
2363   if (!root_struct_def_)
2364     root_struct_def_ =
2365         LookupStruct(current_namespace_->GetFullyQualifiedName(name));
2366   return root_struct_def_ != nullptr;
2367 }
2368 
MarkGenerated()2369 void Parser::MarkGenerated() {
2370   // This function marks all existing definitions as having already
2371   // been generated, which signals no code for included files should be
2372   // generated.
2373   for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
2374     (*it)->generated = true;
2375   }
2376   for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
2377     if (!(*it)->predecl) { (*it)->generated = true; }
2378   }
2379   for (auto it = services_.vec.begin(); it != services_.vec.end(); ++it) {
2380     (*it)->generated = true;
2381   }
2382 }
2383 
ParseNamespace()2384 CheckedError Parser::ParseNamespace() {
2385   NEXT();
2386   auto ns = new Namespace();
2387   namespaces_.push_back(ns);  // Store it here to not leak upon error.
2388   if (token_ != ';') {
2389     for (;;) {
2390       ns->components.push_back(attribute_);
2391       EXPECT(kTokenIdentifier);
2392       if (Is('.')) NEXT() else break;
2393     }
2394   }
2395   namespaces_.pop_back();
2396   current_namespace_ = UniqueNamespace(ns);
2397   EXPECT(';');
2398   return NoError();
2399 }
2400 
2401 // Best effort parsing of .proto declarations, with the aim to turn them
2402 // in the closest corresponding FlatBuffer equivalent.
2403 // We parse everything as identifiers instead of keywords, since we don't
2404 // want protobuf keywords to become invalid identifiers in FlatBuffers.
ParseProtoDecl()2405 CheckedError Parser::ParseProtoDecl() {
2406   bool isextend = IsIdent("extend");
2407   if (IsIdent("package")) {
2408     // These are identical in syntax to FlatBuffer's namespace decl.
2409     ECHECK(ParseNamespace());
2410   } else if (IsIdent("message") || isextend) {
2411     std::vector<std::string> struct_comment = doc_comment_;
2412     NEXT();
2413     StructDef *struct_def = nullptr;
2414     Namespace *parent_namespace = nullptr;
2415     if (isextend) {
2416       if (Is('.')) NEXT();  // qualified names may start with a . ?
2417       auto id = attribute_;
2418       EXPECT(kTokenIdentifier);
2419       ECHECK(ParseNamespacing(&id, nullptr));
2420       struct_def = LookupCreateStruct(id, false);
2421       if (!struct_def)
2422         return Error("cannot extend unknown message type: " + id);
2423     } else {
2424       std::string name = attribute_;
2425       EXPECT(kTokenIdentifier);
2426       ECHECK(StartStruct(name, &struct_def));
2427       // Since message definitions can be nested, we create a new namespace.
2428       auto ns = new Namespace();
2429       // Copy of current namespace.
2430       *ns = *current_namespace_;
2431       // But with current message name.
2432       ns->components.push_back(name);
2433       ns->from_table++;
2434       parent_namespace = current_namespace_;
2435       current_namespace_ = UniqueNamespace(ns);
2436     }
2437     struct_def->doc_comment = struct_comment;
2438     ECHECK(ParseProtoFields(struct_def, isextend, false));
2439     if (!isextend) { current_namespace_ = parent_namespace; }
2440     if (Is(';')) NEXT();
2441   } else if (IsIdent("enum")) {
2442     // These are almost the same, just with different terminator:
2443     EnumDef *enum_def;
2444     ECHECK(ParseEnum(false, &enum_def));
2445     if (Is(';')) NEXT();
2446     // Temp: remove any duplicates, as .fbs files can't handle them.
2447     enum_def->RemoveDuplicates();
2448   } else if (IsIdent("syntax")) {  // Skip these.
2449     NEXT();
2450     EXPECT('=');
2451     EXPECT(kTokenStringConstant);
2452     EXPECT(';');
2453   } else if (IsIdent("option")) {  // Skip these.
2454     ECHECK(ParseProtoOption());
2455     EXPECT(';');
2456   } else if (IsIdent("service")) {  // Skip these.
2457     NEXT();
2458     EXPECT(kTokenIdentifier);
2459     ECHECK(ParseProtoCurliesOrIdent());
2460   } else {
2461     return Error("don\'t know how to parse .proto declaration starting with " +
2462                  TokenToStringId(token_));
2463   }
2464   return NoError();
2465 }
2466 
StartEnum(const std::string & enum_name,bool is_union,EnumDef ** dest)2467 CheckedError Parser::StartEnum(const std::string &enum_name, bool is_union,
2468                                EnumDef **dest) {
2469   auto &enum_def = *new EnumDef();
2470   enum_def.name = enum_name;
2471   enum_def.file = file_being_parsed_;
2472   enum_def.doc_comment = doc_comment_;
2473   enum_def.is_union = is_union;
2474   enum_def.defined_namespace = current_namespace_;
2475   if (enums_.Add(current_namespace_->GetFullyQualifiedName(enum_name),
2476                  &enum_def))
2477     return Error("enum already exists: " + enum_name);
2478   enum_def.underlying_type.base_type =
2479       is_union ? BASE_TYPE_UTYPE : BASE_TYPE_INT;
2480   enum_def.underlying_type.enum_def = &enum_def;
2481   if (dest) *dest = &enum_def;
2482   return NoError();
2483 }
2484 
ParseProtoFields(StructDef * struct_def,bool isextend,bool inside_oneof)2485 CheckedError Parser::ParseProtoFields(StructDef *struct_def, bool isextend,
2486                                       bool inside_oneof) {
2487   EXPECT('{');
2488   while (token_ != '}') {
2489     if (IsIdent("message") || IsIdent("extend") || IsIdent("enum")) {
2490       // Nested declarations.
2491       ECHECK(ParseProtoDecl());
2492     } else if (IsIdent("extensions")) {  // Skip these.
2493       NEXT();
2494       EXPECT(kTokenIntegerConstant);
2495       if (Is(kTokenIdentifier)) {
2496         NEXT();  // to
2497         NEXT();  // num
2498       }
2499       EXPECT(';');
2500     } else if (IsIdent("option")) {  // Skip these.
2501       ECHECK(ParseProtoOption());
2502       EXPECT(';');
2503     } else if (IsIdent("reserved")) {  // Skip these.
2504       NEXT();
2505       while (!Is(';')) { NEXT(); }  // A variety of formats, just skip.
2506       NEXT();
2507     } else {
2508       std::vector<std::string> field_comment = doc_comment_;
2509       // Parse the qualifier.
2510       bool required = false;
2511       bool repeated = false;
2512       bool oneof = false;
2513       if (!inside_oneof) {
2514         if (IsIdent("optional")) {
2515           // This is the default.
2516           NEXT();
2517         } else if (IsIdent("required")) {
2518           required = true;
2519           NEXT();
2520         } else if (IsIdent("repeated")) {
2521           repeated = true;
2522           NEXT();
2523         } else if (IsIdent("oneof")) {
2524           oneof = true;
2525           NEXT();
2526         } else {
2527           // can't error, proto3 allows decls without any of the above.
2528         }
2529       }
2530       StructDef *anonymous_struct = nullptr;
2531       EnumDef *oneof_union = nullptr;
2532       Type type;
2533       if (IsIdent("group") || oneof) {
2534         if (!oneof) NEXT();
2535         if (oneof && opts.proto_oneof_union) {
2536           auto name = MakeCamel(attribute_, true) + "Union";
2537           ECHECK(StartEnum(name, true, &oneof_union));
2538           type = Type(BASE_TYPE_UNION, nullptr, oneof_union);
2539         } else {
2540           auto name = "Anonymous" + NumToString(anonymous_counter++);
2541           ECHECK(StartStruct(name, &anonymous_struct));
2542           type = Type(BASE_TYPE_STRUCT, anonymous_struct);
2543         }
2544       } else {
2545         ECHECK(ParseTypeFromProtoType(&type));
2546       }
2547       // Repeated elements get mapped to a vector.
2548       if (repeated) {
2549         type.element = type.base_type;
2550         type.base_type = BASE_TYPE_VECTOR;
2551         if (type.element == BASE_TYPE_VECTOR) {
2552           // We have a vector or vectors, which FlatBuffers doesn't support.
2553           // For now make it a vector of string (since the source is likely
2554           // "repeated bytes").
2555           // TODO(wvo): A better solution would be to wrap this in a table.
2556           type.element = BASE_TYPE_STRING;
2557         }
2558       }
2559       std::string name = attribute_;
2560       EXPECT(kTokenIdentifier);
2561       if (!oneof) {
2562         // Parse the field id. Since we're just translating schemas, not
2563         // any kind of binary compatibility, we can safely ignore these, and
2564         // assign our own.
2565         EXPECT('=');
2566         EXPECT(kTokenIntegerConstant);
2567       }
2568       FieldDef *field = nullptr;
2569       if (isextend) {
2570         // We allow a field to be re-defined when extending.
2571         // TODO: are there situations where that is problematic?
2572         field = struct_def->fields.Lookup(name);
2573       }
2574       if (!field) ECHECK(AddField(*struct_def, name, type, &field));
2575       field->doc_comment = field_comment;
2576       if (!IsScalar(type.base_type)) field->required = required;
2577       // See if there's a default specified.
2578       if (Is('[')) {
2579         NEXT();
2580         for (;;) {
2581           auto key = attribute_;
2582           ECHECK(ParseProtoKey());
2583           EXPECT('=');
2584           auto val = attribute_;
2585           ECHECK(ParseProtoCurliesOrIdent());
2586           if (key == "default") {
2587             // Temp: skip non-numeric defaults (enums).
2588             auto numeric = strpbrk(val.c_str(), "0123456789-+.");
2589             if (IsScalar(type.base_type) && numeric == val.c_str())
2590               field->value.constant = val;
2591           } else if (key == "deprecated") {
2592             field->deprecated = val == "true";
2593           }
2594           if (!Is(',')) break;
2595           NEXT();
2596         }
2597         EXPECT(']');
2598       }
2599       if (anonymous_struct) {
2600         ECHECK(ParseProtoFields(anonymous_struct, false, oneof));
2601         if (Is(';')) NEXT();
2602       } else if (oneof_union) {
2603         // Parse into a temporary StructDef, then transfer fields into an
2604         // EnumDef describing the oneof as a union.
2605         StructDef oneof_struct;
2606         ECHECK(ParseProtoFields(&oneof_struct, false, oneof));
2607         if (Is(';')) NEXT();
2608         for (auto field_it = oneof_struct.fields.vec.begin();
2609              field_it != oneof_struct.fields.vec.end(); ++field_it) {
2610           const auto &oneof_field = **field_it;
2611           const auto &oneof_type = oneof_field.value.type;
2612           if (oneof_type.base_type != BASE_TYPE_STRUCT ||
2613               !oneof_type.struct_def || oneof_type.struct_def->fixed)
2614             return Error("oneof '" + name +
2615                          "' cannot be mapped to a union because member '" +
2616                          oneof_field.name + "' is not a table type.");
2617           EnumValBuilder evb(*this, *oneof_union);
2618           auto ev = evb.CreateEnumerator(oneof_type.struct_def->name);
2619           ev->union_type = oneof_type;
2620           ev->doc_comment = oneof_field.doc_comment;
2621           ECHECK(evb.AcceptEnumerator(oneof_field.name));
2622         }
2623       } else {
2624         EXPECT(';');
2625       }
2626     }
2627   }
2628   NEXT();
2629   return NoError();
2630 }
2631 
ParseProtoKey()2632 CheckedError Parser::ParseProtoKey() {
2633   if (token_ == '(') {
2634     NEXT();
2635     // Skip "(a.b)" style custom attributes.
2636     while (token_ == '.' || token_ == kTokenIdentifier) NEXT();
2637     EXPECT(')');
2638     while (Is('.')) {
2639       NEXT();
2640       EXPECT(kTokenIdentifier);
2641     }
2642   } else {
2643     EXPECT(kTokenIdentifier);
2644   }
2645   return NoError();
2646 }
2647 
ParseProtoCurliesOrIdent()2648 CheckedError Parser::ParseProtoCurliesOrIdent() {
2649   if (Is('{')) {
2650     NEXT();
2651     for (int nesting = 1; nesting;) {
2652       if (token_ == '{')
2653         nesting++;
2654       else if (token_ == '}')
2655         nesting--;
2656       NEXT();
2657     }
2658   } else {
2659     NEXT();  // Any single token.
2660   }
2661   return NoError();
2662 }
2663 
ParseProtoOption()2664 CheckedError Parser::ParseProtoOption() {
2665   NEXT();
2666   ECHECK(ParseProtoKey());
2667   EXPECT('=');
2668   ECHECK(ParseProtoCurliesOrIdent());
2669   return NoError();
2670 }
2671 
2672 // Parse a protobuf type, and map it to the corresponding FlatBuffer one.
ParseTypeFromProtoType(Type * type)2673 CheckedError Parser::ParseTypeFromProtoType(Type *type) {
2674   struct type_lookup {
2675     const char *proto_type;
2676     BaseType fb_type, element;
2677   };
2678   static type_lookup lookup[] = {
2679     { "float", BASE_TYPE_FLOAT, BASE_TYPE_NONE },
2680     { "double", BASE_TYPE_DOUBLE, BASE_TYPE_NONE },
2681     { "int32", BASE_TYPE_INT, BASE_TYPE_NONE },
2682     { "int64", BASE_TYPE_LONG, BASE_TYPE_NONE },
2683     { "uint32", BASE_TYPE_UINT, BASE_TYPE_NONE },
2684     { "uint64", BASE_TYPE_ULONG, BASE_TYPE_NONE },
2685     { "sint32", BASE_TYPE_INT, BASE_TYPE_NONE },
2686     { "sint64", BASE_TYPE_LONG, BASE_TYPE_NONE },
2687     { "fixed32", BASE_TYPE_UINT, BASE_TYPE_NONE },
2688     { "fixed64", BASE_TYPE_ULONG, BASE_TYPE_NONE },
2689     { "sfixed32", BASE_TYPE_INT, BASE_TYPE_NONE },
2690     { "sfixed64", BASE_TYPE_LONG, BASE_TYPE_NONE },
2691     { "bool", BASE_TYPE_BOOL, BASE_TYPE_NONE },
2692     { "string", BASE_TYPE_STRING, BASE_TYPE_NONE },
2693     { "bytes", BASE_TYPE_VECTOR, BASE_TYPE_UCHAR },
2694     { nullptr, BASE_TYPE_NONE, BASE_TYPE_NONE }
2695   };
2696   for (auto tl = lookup; tl->proto_type; tl++) {
2697     if (attribute_ == tl->proto_type) {
2698       type->base_type = tl->fb_type;
2699       type->element = tl->element;
2700       NEXT();
2701       return NoError();
2702     }
2703   }
2704   if (Is('.')) NEXT();  // qualified names may start with a . ?
2705   ECHECK(ParseTypeIdent(*type));
2706   return NoError();
2707 }
2708 
SkipAnyJsonValue()2709 CheckedError Parser::SkipAnyJsonValue() {
2710   switch (token_) {
2711     case '{': {
2712       size_t fieldn_outer = 0;
2713       return ParseTableDelimiters(
2714           fieldn_outer, nullptr,
2715           [&](const std::string &, size_t &fieldn,
2716               const StructDef *) -> CheckedError {
2717             ECHECK(Recurse([&]() { return SkipAnyJsonValue(); }));
2718             fieldn++;
2719             return NoError();
2720           });
2721     }
2722     case '[': {
2723       uoffset_t count = 0;
2724       return ParseVectorDelimiters(count, [&](uoffset_t &) -> CheckedError {
2725         return Recurse([&]() { return SkipAnyJsonValue(); });
2726       });
2727     }
2728     case kTokenStringConstant:
2729     case kTokenIntegerConstant:
2730     case kTokenFloatConstant: NEXT(); break;
2731     default:
2732       if (IsIdent("true") || IsIdent("false") || IsIdent("null")) {
2733         NEXT();
2734       } else
2735         return TokenError();
2736   }
2737   return NoError();
2738 }
2739 
ParseFlexBufferValue(flexbuffers::Builder * builder)2740 CheckedError Parser::ParseFlexBufferValue(flexbuffers::Builder *builder) {
2741   switch (token_) {
2742     case '{': {
2743       auto start = builder->StartMap();
2744       size_t fieldn_outer = 0;
2745       auto err =
2746           ParseTableDelimiters(fieldn_outer, nullptr,
2747                                [&](const std::string &name, size_t &fieldn,
2748                                    const StructDef *) -> CheckedError {
2749                                  builder->Key(name);
2750                                  ECHECK(ParseFlexBufferValue(builder));
2751                                  fieldn++;
2752                                  return NoError();
2753                                });
2754       ECHECK(err);
2755       builder->EndMap(start);
2756       break;
2757     }
2758     case '[': {
2759       auto start = builder->StartVector();
2760       uoffset_t count = 0;
2761       ECHECK(ParseVectorDelimiters(count, [&](uoffset_t &) -> CheckedError {
2762         return ParseFlexBufferValue(builder);
2763       }));
2764       builder->EndVector(start, false, false);
2765       break;
2766     }
2767     case kTokenStringConstant:
2768       builder->String(attribute_);
2769       EXPECT(kTokenStringConstant);
2770       break;
2771     case kTokenIntegerConstant:
2772       builder->Int(StringToInt(attribute_.c_str()));
2773       EXPECT(kTokenIntegerConstant);
2774       break;
2775     case kTokenFloatConstant: {
2776       double d;
2777       StringToNumber(attribute_.c_str(), &d);
2778       builder->Double(d);
2779       EXPECT(kTokenFloatConstant);
2780       break;
2781     }
2782     default:
2783       if (IsIdent("true")) {
2784         builder->Bool(true);
2785         NEXT();
2786       } else if (IsIdent("false")) {
2787         builder->Bool(false);
2788         NEXT();
2789       } else if (IsIdent("null")) {
2790         builder->Null();
2791         NEXT();
2792       } else
2793         return TokenError();
2794   }
2795   return NoError();
2796 }
2797 
ParseFlexBuffer(const char * source,const char * source_filename,flexbuffers::Builder * builder)2798 bool Parser::ParseFlexBuffer(const char *source, const char *source_filename,
2799                              flexbuffers::Builder *builder) {
2800   auto ok = !StartParseFile(source, source_filename).Check() &&
2801             !ParseFlexBufferValue(builder).Check();
2802   if (ok) builder->Finish();
2803   return ok;
2804 }
2805 
Parse(const char * source,const char ** include_paths,const char * source_filename)2806 bool Parser::Parse(const char *source, const char **include_paths,
2807                    const char *source_filename) {
2808   FLATBUFFERS_ASSERT(0 == recurse_protection_counter);
2809   bool r;
2810 
2811   if (opts.use_flexbuffers) {
2812     r = ParseFlexBuffer(source, source_filename, &flex_builder_);
2813   } else {
2814     r = !ParseRoot(source, include_paths, source_filename).Check();
2815   }
2816   FLATBUFFERS_ASSERT(0 == recurse_protection_counter);
2817   return r;
2818 }
2819 
StartParseFile(const char * source,const char * source_filename)2820 CheckedError Parser::StartParseFile(const char *source,
2821                                     const char *source_filename) {
2822   file_being_parsed_ = source_filename ? source_filename : "";
2823   source_ = source;
2824   ResetState(source_);
2825   error_.clear();
2826   ECHECK(SkipByteOrderMark());
2827   NEXT();
2828   if (Is(kTokenEof)) return Error("input file is empty");
2829   return NoError();
2830 }
2831 
ParseRoot(const char * source,const char ** include_paths,const char * source_filename)2832 CheckedError Parser::ParseRoot(const char *source, const char **include_paths,
2833                                const char *source_filename) {
2834   ECHECK(DoParse(source, include_paths, source_filename, nullptr));
2835 
2836   // Check that all types were defined.
2837   for (auto it = structs_.vec.begin(); it != structs_.vec.end();) {
2838     auto &struct_def = **it;
2839     if (struct_def.predecl) {
2840       if (opts.proto_mode) {
2841         // Protos allow enums to be used before declaration, so check if that
2842         // is the case here.
2843         EnumDef *enum_def = nullptr;
2844         for (size_t components =
2845                  struct_def.defined_namespace->components.size() + 1;
2846              components && !enum_def; components--) {
2847           auto qualified_name =
2848               struct_def.defined_namespace->GetFullyQualifiedName(
2849                   struct_def.name, components - 1);
2850           enum_def = LookupEnum(qualified_name);
2851         }
2852         if (enum_def) {
2853           // This is pretty slow, but a simple solution for now.
2854           auto initial_count = struct_def.refcount;
2855           for (auto struct_it = structs_.vec.begin();
2856                struct_it != structs_.vec.end(); ++struct_it) {
2857             auto &sd = **struct_it;
2858             for (auto field_it = sd.fields.vec.begin();
2859                  field_it != sd.fields.vec.end(); ++field_it) {
2860               auto &field = **field_it;
2861               if (field.value.type.struct_def == &struct_def) {
2862                 field.value.type.struct_def = nullptr;
2863                 field.value.type.enum_def = enum_def;
2864                 auto &bt = field.value.type.base_type == BASE_TYPE_VECTOR
2865                                ? field.value.type.element
2866                                : field.value.type.base_type;
2867                 FLATBUFFERS_ASSERT(bt == BASE_TYPE_STRUCT);
2868                 bt = enum_def->underlying_type.base_type;
2869                 struct_def.refcount--;
2870                 enum_def->refcount++;
2871               }
2872             }
2873           }
2874           if (struct_def.refcount)
2875             return Error("internal: " + NumToString(struct_def.refcount) + "/" +
2876                          NumToString(initial_count) +
2877                          " use(s) of pre-declaration enum not accounted for: " +
2878                          enum_def->name);
2879           structs_.dict.erase(structs_.dict.find(struct_def.name));
2880           it = structs_.vec.erase(it);
2881           delete &struct_def;
2882           continue;  // Skip error.
2883         }
2884       }
2885       auto err = "type referenced but not defined (check namespace): " +
2886                  struct_def.name;
2887       if (struct_def.original_location)
2888         err += ", originally at: " + *struct_def.original_location;
2889       return Error(err);
2890     }
2891     ++it;
2892   }
2893 
2894   // This check has to happen here and not earlier, because only now do we
2895   // know for sure what the type of these are.
2896   for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
2897     auto &enum_def = **it;
2898     if (enum_def.is_union) {
2899       for (auto val_it = enum_def.Vals().begin();
2900            val_it != enum_def.Vals().end(); ++val_it) {
2901         auto &val = **val_it;
2902         if (!SupportsAdvancedUnionFeatures() && val.union_type.struct_def &&
2903             val.union_type.struct_def->fixed)
2904           return Error(
2905               "only tables can be union elements in the generated language: " +
2906               val.name);
2907       }
2908     }
2909   }
2910   return NoError();
2911 }
2912 
DoParse(const char * source,const char ** include_paths,const char * source_filename,const char * include_filename)2913 CheckedError Parser::DoParse(const char *source, const char **include_paths,
2914                              const char *source_filename,
2915                              const char *include_filename) {
2916   if (source_filename) {
2917     if (included_files_.find(source_filename) == included_files_.end()) {
2918       included_files_[source_filename] =
2919           include_filename ? include_filename : "";
2920       files_included_per_file_[source_filename] = std::set<std::string>();
2921     } else {
2922       return NoError();
2923     }
2924   }
2925   if (!include_paths) {
2926     static const char *current_directory[] = { "", nullptr };
2927     include_paths = current_directory;
2928   }
2929   field_stack_.clear();
2930   builder_.Clear();
2931   // Start with a blank namespace just in case this file doesn't have one.
2932   current_namespace_ = empty_namespace_;
2933 
2934   ECHECK(StartParseFile(source, source_filename));
2935 
2936   // Includes must come before type declarations:
2937   for (;;) {
2938     // Parse pre-include proto statements if any:
2939     if (opts.proto_mode && (attribute_ == "option" || attribute_ == "syntax" ||
2940                             attribute_ == "package")) {
2941       ECHECK(ParseProtoDecl());
2942     } else if (IsIdent("native_include")) {
2943       NEXT();
2944       vector_emplace_back(&native_included_files_, attribute_);
2945       EXPECT(kTokenStringConstant);
2946       EXPECT(';');
2947     } else if (IsIdent("include") || (opts.proto_mode && IsIdent("import"))) {
2948       NEXT();
2949       if (opts.proto_mode && attribute_ == "public") NEXT();
2950       auto name = flatbuffers::PosixPath(attribute_.c_str());
2951       EXPECT(kTokenStringConstant);
2952       // Look for the file in include_paths.
2953       std::string filepath;
2954       for (auto paths = include_paths; paths && *paths; paths++) {
2955         filepath = flatbuffers::ConCatPathFileName(*paths, name);
2956         if (FileExists(filepath.c_str())) break;
2957       }
2958       if (filepath.empty())
2959         return Error("unable to locate include file: " + name);
2960       if (source_filename)
2961         files_included_per_file_[source_filename].insert(filepath);
2962       if (included_files_.find(filepath) == included_files_.end()) {
2963         // We found an include file that we have not parsed yet.
2964         // Load it and parse it.
2965         std::string contents;
2966         if (!LoadFile(filepath.c_str(), true, &contents))
2967           return Error("unable to load include file: " + name);
2968         ECHECK(DoParse(contents.c_str(), include_paths, filepath.c_str(),
2969                        name.c_str()));
2970         // We generally do not want to output code for any included files:
2971         if (!opts.generate_all) MarkGenerated();
2972         // Reset these just in case the included file had them, and the
2973         // parent doesn't.
2974         root_struct_def_ = nullptr;
2975         file_identifier_.clear();
2976         file_extension_.clear();
2977         // This is the easiest way to continue this file after an include:
2978         // instead of saving and restoring all the state, we simply start the
2979         // file anew. This will cause it to encounter the same include
2980         // statement again, but this time it will skip it, because it was
2981         // entered into included_files_.
2982         // This is recursive, but only go as deep as the number of include
2983         // statements.
2984         if (source_filename) { included_files_.erase(source_filename); }
2985         return DoParse(source, include_paths, source_filename,
2986                        include_filename);
2987       }
2988       EXPECT(';');
2989     } else {
2990       break;
2991     }
2992   }
2993   // Now parse all other kinds of declarations:
2994   while (token_ != kTokenEof) {
2995     if (opts.proto_mode) {
2996       ECHECK(ParseProtoDecl());
2997     } else if (IsIdent("namespace")) {
2998       ECHECK(ParseNamespace());
2999     } else if (token_ == '{') {
3000       if (!root_struct_def_)
3001         return Error("no root type set to parse json with");
3002       if (builder_.GetSize()) {
3003         return Error("cannot have more than one json object in a file");
3004       }
3005       uoffset_t toff;
3006       ECHECK(ParseTable(*root_struct_def_, nullptr, &toff));
3007       if (opts.size_prefixed) {
3008         builder_.FinishSizePrefixed(
3009             Offset<Table>(toff),
3010             file_identifier_.length() ? file_identifier_.c_str() : nullptr);
3011       } else {
3012         builder_.Finish(Offset<Table>(toff), file_identifier_.length()
3013                                                  ? file_identifier_.c_str()
3014                                                  : nullptr);
3015       }
3016       // Check that JSON file doesn't contain more objects or IDL directives.
3017       // Comments after JSON are allowed.
3018       EXPECT(kTokenEof);
3019     } else if (IsIdent("enum")) {
3020       ECHECK(ParseEnum(false, nullptr));
3021     } else if (IsIdent("union")) {
3022       ECHECK(ParseEnum(true, nullptr));
3023     } else if (IsIdent("root_type")) {
3024       NEXT();
3025       auto root_type = attribute_;
3026       EXPECT(kTokenIdentifier);
3027       ECHECK(ParseNamespacing(&root_type, nullptr));
3028       if (opts.root_type.empty()) {
3029         if (!SetRootType(root_type.c_str()))
3030           return Error("unknown root type: " + root_type);
3031         if (root_struct_def_->fixed) return Error("root type must be a table");
3032       }
3033       EXPECT(';');
3034     } else if (IsIdent("file_identifier")) {
3035       NEXT();
3036       file_identifier_ = attribute_;
3037       EXPECT(kTokenStringConstant);
3038       if (file_identifier_.length() != FlatBufferBuilder::kFileIdentifierLength)
3039         return Error("file_identifier must be exactly " +
3040                      NumToString(FlatBufferBuilder::kFileIdentifierLength) +
3041                      " characters");
3042       EXPECT(';');
3043     } else if (IsIdent("file_extension")) {
3044       NEXT();
3045       file_extension_ = attribute_;
3046       EXPECT(kTokenStringConstant);
3047       EXPECT(';');
3048     } else if (IsIdent("include")) {
3049       return Error("includes must come before declarations");
3050     } else if (IsIdent("attribute")) {
3051       NEXT();
3052       auto name = attribute_;
3053       if (Is(kTokenIdentifier)) {
3054         NEXT();
3055       } else {
3056         EXPECT(kTokenStringConstant);
3057       }
3058       EXPECT(';');
3059       known_attributes_[name] = false;
3060     } else if (IsIdent("rpc_service")) {
3061       ECHECK(ParseService());
3062     } else {
3063       ECHECK(ParseDecl());
3064     }
3065   }
3066   return NoError();
3067 }
3068 
GetIncludedFilesRecursive(const std::string & file_name) const3069 std::set<std::string> Parser::GetIncludedFilesRecursive(
3070     const std::string &file_name) const {
3071   std::set<std::string> included_files;
3072   std::list<std::string> to_process;
3073 
3074   if (file_name.empty()) return included_files;
3075   to_process.push_back(file_name);
3076 
3077   while (!to_process.empty()) {
3078     std::string current = to_process.front();
3079     to_process.pop_front();
3080     included_files.insert(current);
3081 
3082     // Workaround the lack of const accessor in C++98 maps.
3083     auto &new_files =
3084         (*const_cast<std::map<std::string, std::set<std::string>> *>(
3085             &files_included_per_file_))[current];
3086     for (auto it = new_files.begin(); it != new_files.end(); ++it) {
3087       if (included_files.find(*it) == included_files.end())
3088         to_process.push_back(*it);
3089     }
3090   }
3091 
3092   return included_files;
3093 }
3094 
3095 // Schema serialization functionality:
3096 
compareName(const T * a,const T * b)3097 template<typename T> bool compareName(const T *a, const T *b) {
3098   return a->defined_namespace->GetFullyQualifiedName(a->name) <
3099          b->defined_namespace->GetFullyQualifiedName(b->name);
3100 }
3101 
AssignIndices(const std::vector<T * > & defvec)3102 template<typename T> void AssignIndices(const std::vector<T *> &defvec) {
3103   // Pre-sort these vectors, such that we can set the correct indices for them.
3104   auto vec = defvec;
3105   std::sort(vec.begin(), vec.end(), compareName<T>);
3106   for (int i = 0; i < static_cast<int>(vec.size()); i++) vec[i]->index = i;
3107 }
3108 
Serialize()3109 void Parser::Serialize() {
3110   builder_.Clear();
3111   AssignIndices(structs_.vec);
3112   AssignIndices(enums_.vec);
3113   std::vector<Offset<reflection::Object>> object_offsets;
3114   for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
3115     auto offset = (*it)->Serialize(&builder_, *this);
3116     object_offsets.push_back(offset);
3117     (*it)->serialized_location = offset.o;
3118   }
3119   std::vector<Offset<reflection::Enum>> enum_offsets;
3120   for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
3121     auto offset = (*it)->Serialize(&builder_, *this);
3122     enum_offsets.push_back(offset);
3123     (*it)->serialized_location = offset.o;
3124   }
3125   std::vector<Offset<reflection::Service>> service_offsets;
3126   for (auto it = services_.vec.begin(); it != services_.vec.end(); ++it) {
3127     auto offset = (*it)->Serialize(&builder_, *this);
3128     service_offsets.push_back(offset);
3129     (*it)->serialized_location = offset.o;
3130   }
3131   auto objs__ = builder_.CreateVectorOfSortedTables(&object_offsets);
3132   auto enum__ = builder_.CreateVectorOfSortedTables(&enum_offsets);
3133   auto fiid__ = builder_.CreateString(file_identifier_);
3134   auto fext__ = builder_.CreateString(file_extension_);
3135   auto serv__ = builder_.CreateVectorOfSortedTables(&service_offsets);
3136   auto schema_offset = reflection::CreateSchema(
3137       builder_, objs__, enum__, fiid__, fext__,
3138       (root_struct_def_ ? root_struct_def_->serialized_location : 0), serv__);
3139   if (opts.size_prefixed) {
3140     builder_.FinishSizePrefixed(schema_offset, reflection::SchemaIdentifier());
3141   } else {
3142     builder_.Finish(schema_offset, reflection::SchemaIdentifier());
3143   }
3144 }
3145 
GetNamespace(const std::string & qualified_name,std::vector<Namespace * > & namespaces,std::map<std::string,Namespace * > & namespaces_index)3146 static Namespace *GetNamespace(
3147     const std::string &qualified_name, std::vector<Namespace *> &namespaces,
3148     std::map<std::string, Namespace *> &namespaces_index) {
3149   size_t dot = qualified_name.find_last_of('.');
3150   std::string namespace_name = (dot != std::string::npos)
3151                                    ? std::string(qualified_name.c_str(), dot)
3152                                    : "";
3153   Namespace *&ns = namespaces_index[namespace_name];
3154 
3155   if (!ns) {
3156     ns = new Namespace();
3157     namespaces.push_back(ns);
3158 
3159     size_t pos = 0;
3160 
3161     for (;;) {
3162       dot = qualified_name.find('.', pos);
3163       if (dot == std::string::npos) { break; }
3164       ns->components.push_back(qualified_name.substr(pos, dot - pos));
3165       pos = dot + 1;
3166     }
3167   }
3168 
3169   return ns;
3170 }
3171 
Serialize(FlatBufferBuilder * builder,const Parser & parser) const3172 Offset<reflection::Object> StructDef::Serialize(FlatBufferBuilder *builder,
3173                                                 const Parser &parser) const {
3174   std::vector<Offset<reflection::Field>> field_offsets;
3175   for (auto it = fields.vec.begin(); it != fields.vec.end(); ++it) {
3176     field_offsets.push_back((*it)->Serialize(
3177         builder, static_cast<uint16_t>(it - fields.vec.begin()), parser));
3178   }
3179   auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
3180   auto name__ = builder->CreateString(qualified_name);
3181   auto flds__ = builder->CreateVectorOfSortedTables(&field_offsets);
3182   auto attr__ = SerializeAttributes(builder, parser);
3183   auto docs__ = parser.opts.binary_schema_comments
3184                     ? builder->CreateVectorOfStrings(doc_comment)
3185                     : 0;
3186   return reflection::CreateObject(*builder, name__, flds__, fixed,
3187                                   static_cast<int>(minalign),
3188                                   static_cast<int>(bytesize), attr__, docs__);
3189 }
3190 
Deserialize(Parser & parser,const reflection::Object * object)3191 bool StructDef::Deserialize(Parser &parser, const reflection::Object *object) {
3192   if (!DeserializeAttributes(parser, object->attributes())) return false;
3193   DeserializeDoc(doc_comment, object->documentation());
3194   name = parser.UnqualifiedName(object->name()->str());
3195   predecl = false;
3196   sortbysize = attributes.Lookup("original_order") == nullptr && !fixed;
3197   const auto &of = *(object->fields());
3198   auto indexes = std::vector<uoffset_t>(of.size());
3199   for (uoffset_t i = 0; i < of.size(); i++) indexes[of.Get(i)->id()] = i;
3200   size_t tmp_struct_size = 0;
3201   for (size_t i = 0; i < indexes.size(); i++) {
3202     auto field = of.Get(indexes[i]);
3203     auto field_def = new FieldDef();
3204     if (!field_def->Deserialize(parser, field) ||
3205         fields.Add(field_def->name, field_def)) {
3206       delete field_def;
3207       return false;
3208     }
3209     if (fixed) {
3210       // Recompute padding since that's currently not serialized.
3211       auto size = InlineSize(field_def->value.type);
3212       auto next_field =
3213           i + 1 < indexes.size() ? of.Get(indexes[i + 1]) : nullptr;
3214       tmp_struct_size += size;
3215       field_def->padding =
3216           next_field ? (next_field->offset() - field_def->value.offset) - size
3217                      : PaddingBytes(tmp_struct_size, minalign);
3218       tmp_struct_size += field_def->padding;
3219     }
3220   }
3221   FLATBUFFERS_ASSERT(static_cast<int>(tmp_struct_size) == object->bytesize());
3222   return true;
3223 }
3224 
Serialize(FlatBufferBuilder * builder,uint16_t id,const Parser & parser) const3225 Offset<reflection::Field> FieldDef::Serialize(FlatBufferBuilder *builder,
3226                                               uint16_t id,
3227                                               const Parser &parser) const {
3228   auto name__ = builder->CreateString(name);
3229   auto type__ = value.type.Serialize(builder);
3230   auto attr__ = SerializeAttributes(builder, parser);
3231   auto docs__ = parser.opts.binary_schema_comments
3232                     ? builder->CreateVectorOfStrings(doc_comment)
3233                     : 0;
3234   double d;
3235   StringToNumber(value.constant.c_str(), &d);
3236   return reflection::CreateField(
3237       *builder, name__, type__, id, value.offset,
3238       // Is uint64>max(int64) tested?
3239       IsInteger(value.type.base_type) ? StringToInt(value.constant.c_str()) : 0,
3240       // result may be platform-dependent if underlying is float (not double)
3241       IsFloat(value.type.base_type) ? d : 0.0, deprecated, required, key,
3242       attr__, docs__);
3243   // TODO: value.constant is almost always "0", we could save quite a bit of
3244   // space by sharing it. Same for common values of value.type.
3245 }
3246 
Deserialize(Parser & parser,const reflection::Field * field)3247 bool FieldDef::Deserialize(Parser &parser, const reflection::Field *field) {
3248   name = field->name()->str();
3249   defined_namespace = parser.current_namespace_;
3250   if (!value.type.Deserialize(parser, field->type())) return false;
3251   value.offset = field->offset();
3252   if (IsInteger(value.type.base_type)) {
3253     value.constant = NumToString(field->default_integer());
3254   } else if (IsFloat(value.type.base_type)) {
3255     value.constant = FloatToString(field->default_real(), 16);
3256     size_t last_zero = value.constant.find_last_not_of('0');
3257     if (last_zero != std::string::npos && last_zero != 0) {
3258       value.constant.erase(last_zero, std::string::npos);
3259     }
3260   }
3261   deprecated = field->deprecated();
3262   required = field->required();
3263   key = field->key();
3264   if (!DeserializeAttributes(parser, field->attributes())) return false;
3265   // TODO: this should probably be handled by a separate attribute
3266   if (attributes.Lookup("flexbuffer")) {
3267     flexbuffer = true;
3268     parser.uses_flexbuffers_ = true;
3269     if (value.type.base_type != BASE_TYPE_VECTOR ||
3270         value.type.element != BASE_TYPE_UCHAR)
3271       return false;
3272   }
3273   if (auto nested = attributes.Lookup("nested_flatbuffer")) {
3274     auto nested_qualified_name =
3275         parser.current_namespace_->GetFullyQualifiedName(nested->constant);
3276     nested_flatbuffer = parser.LookupStruct(nested_qualified_name);
3277     if (!nested_flatbuffer) return false;
3278   }
3279   DeserializeDoc(doc_comment, field->documentation());
3280   return true;
3281 }
3282 
Serialize(FlatBufferBuilder * builder,const Parser & parser) const3283 Offset<reflection::RPCCall> RPCCall::Serialize(FlatBufferBuilder *builder,
3284                                                const Parser &parser) const {
3285   auto name__ = builder->CreateString(name);
3286   auto attr__ = SerializeAttributes(builder, parser);
3287   auto docs__ = parser.opts.binary_schema_comments
3288                     ? builder->CreateVectorOfStrings(doc_comment)
3289                     : 0;
3290   return reflection::CreateRPCCall(
3291       *builder, name__, request->serialized_location,
3292       response->serialized_location, attr__, docs__);
3293 }
3294 
Deserialize(Parser & parser,const reflection::RPCCall * call)3295 bool RPCCall::Deserialize(Parser &parser, const reflection::RPCCall *call) {
3296   name = call->name()->str();
3297   if (!DeserializeAttributes(parser, call->attributes())) return false;
3298   DeserializeDoc(doc_comment, call->documentation());
3299   request = parser.structs_.Lookup(call->request()->name()->str());
3300   response = parser.structs_.Lookup(call->response()->name()->str());
3301   if (!request || !response) { return false; }
3302   return true;
3303 }
3304 
Serialize(FlatBufferBuilder * builder,const Parser & parser) const3305 Offset<reflection::Service> ServiceDef::Serialize(FlatBufferBuilder *builder,
3306                                                   const Parser &parser) const {
3307   std::vector<Offset<reflection::RPCCall>> servicecall_offsets;
3308   for (auto it = calls.vec.begin(); it != calls.vec.end(); ++it) {
3309     servicecall_offsets.push_back((*it)->Serialize(builder, parser));
3310   }
3311   auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
3312   auto name__ = builder->CreateString(qualified_name);
3313   auto call__ = builder->CreateVector(servicecall_offsets);
3314   auto attr__ = SerializeAttributes(builder, parser);
3315   auto docs__ = parser.opts.binary_schema_comments
3316                     ? builder->CreateVectorOfStrings(doc_comment)
3317                     : 0;
3318   return reflection::CreateService(*builder, name__, call__, attr__, docs__);
3319 }
3320 
Deserialize(Parser & parser,const reflection::Service * service)3321 bool ServiceDef::Deserialize(Parser &parser,
3322                              const reflection::Service *service) {
3323   name = parser.UnqualifiedName(service->name()->str());
3324   if (service->calls()) {
3325     for (uoffset_t i = 0; i < service->calls()->size(); ++i) {
3326       auto call = new RPCCall();
3327       if (!call->Deserialize(parser, service->calls()->Get(i)) ||
3328           calls.Add(call->name, call)) {
3329         delete call;
3330         return false;
3331       }
3332     }
3333   }
3334   if (!DeserializeAttributes(parser, service->attributes())) return false;
3335   DeserializeDoc(doc_comment, service->documentation());
3336   return true;
3337 }
3338 
Serialize(FlatBufferBuilder * builder,const Parser & parser) const3339 Offset<reflection::Enum> EnumDef::Serialize(FlatBufferBuilder *builder,
3340                                             const Parser &parser) const {
3341   std::vector<Offset<reflection::EnumVal>> enumval_offsets;
3342   for (auto it = vals.vec.begin(); it != vals.vec.end(); ++it) {
3343     enumval_offsets.push_back((*it)->Serialize(builder, parser));
3344   }
3345   auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
3346   auto name__ = builder->CreateString(qualified_name);
3347   auto vals__ = builder->CreateVector(enumval_offsets);
3348   auto type__ = underlying_type.Serialize(builder);
3349   auto attr__ = SerializeAttributes(builder, parser);
3350   auto docs__ = parser.opts.binary_schema_comments
3351                     ? builder->CreateVectorOfStrings(doc_comment)
3352                     : 0;
3353   return reflection::CreateEnum(*builder, name__, vals__, is_union, type__,
3354                                 attr__, docs__);
3355 }
3356 
Deserialize(Parser & parser,const reflection::Enum * _enum)3357 bool EnumDef::Deserialize(Parser &parser, const reflection::Enum *_enum) {
3358   name = parser.UnqualifiedName(_enum->name()->str());
3359   for (uoffset_t i = 0; i < _enum->values()->size(); ++i) {
3360     auto val = new EnumVal();
3361     if (!val->Deserialize(parser, _enum->values()->Get(i)) ||
3362         vals.Add(val->name, val)) {
3363       delete val;
3364       return false;
3365     }
3366   }
3367   is_union = _enum->is_union();
3368   if (!underlying_type.Deserialize(parser, _enum->underlying_type())) {
3369     return false;
3370   }
3371   if (!DeserializeAttributes(parser, _enum->attributes())) return false;
3372   DeserializeDoc(doc_comment, _enum->documentation());
3373   return true;
3374 }
3375 
Serialize(FlatBufferBuilder * builder,const Parser & parser) const3376 Offset<reflection::EnumVal> EnumVal::Serialize(FlatBufferBuilder *builder,
3377                                                const Parser &parser) const {
3378   auto name__ = builder->CreateString(name);
3379   auto type__ = union_type.Serialize(builder);
3380   auto docs__ = parser.opts.binary_schema_comments
3381                     ? builder->CreateVectorOfStrings(doc_comment)
3382                     : 0;
3383   return reflection::CreateEnumVal(
3384       *builder, name__, value,
3385       union_type.struct_def ? union_type.struct_def->serialized_location : 0,
3386       type__, docs__);
3387 }
3388 
Deserialize(const Parser & parser,const reflection::EnumVal * val)3389 bool EnumVal::Deserialize(const Parser &parser,
3390                           const reflection::EnumVal *val) {
3391   name = val->name()->str();
3392   value = val->value();
3393   if (!union_type.Deserialize(parser, val->union_type())) return false;
3394   DeserializeDoc(doc_comment, val->documentation());
3395   return true;
3396 }
3397 
Serialize(FlatBufferBuilder * builder) const3398 Offset<reflection::Type> Type::Serialize(FlatBufferBuilder *builder) const {
3399   return reflection::CreateType(
3400       *builder, static_cast<reflection::BaseType>(base_type),
3401       static_cast<reflection::BaseType>(element),
3402       struct_def ? struct_def->index : (enum_def ? enum_def->index : -1),
3403       fixed_length);
3404 }
3405 
Deserialize(const Parser & parser,const reflection::Type * type)3406 bool Type::Deserialize(const Parser &parser, const reflection::Type *type) {
3407   if (type == nullptr) return true;
3408   base_type = static_cast<BaseType>(type->base_type());
3409   element = static_cast<BaseType>(type->element());
3410   fixed_length = type->fixed_length();
3411   if (type->index() >= 0) {
3412     bool is_series = type->base_type() == reflection::Vector ||
3413                      type->base_type() == reflection::Array;
3414     if (type->base_type() == reflection::Obj ||
3415         (is_series && type->element() == reflection::Obj)) {
3416       if (static_cast<size_t>(type->index()) < parser.structs_.vec.size()) {
3417         struct_def = parser.structs_.vec[type->index()];
3418         struct_def->refcount++;
3419       } else {
3420         return false;
3421       }
3422     } else {
3423       if (static_cast<size_t>(type->index()) < parser.enums_.vec.size()) {
3424         enum_def = parser.enums_.vec[type->index()];
3425       } else {
3426         return false;
3427       }
3428     }
3429   }
3430   return true;
3431 }
3432 
3433 flatbuffers::Offset<
3434     flatbuffers::Vector<flatbuffers::Offset<reflection::KeyValue>>>
SerializeAttributes(FlatBufferBuilder * builder,const Parser & parser) const3435 Definition::SerializeAttributes(FlatBufferBuilder *builder,
3436                                 const Parser &parser) const {
3437   std::vector<flatbuffers::Offset<reflection::KeyValue>> attrs;
3438   for (auto kv = attributes.dict.begin(); kv != attributes.dict.end(); ++kv) {
3439     auto it = parser.known_attributes_.find(kv->first);
3440     FLATBUFFERS_ASSERT(it != parser.known_attributes_.end());
3441     if (parser.opts.binary_schema_builtins || !it->second) {
3442       auto key = builder->CreateString(kv->first);
3443       auto val = builder->CreateString(kv->second->constant);
3444       attrs.push_back(reflection::CreateKeyValue(*builder, key, val));
3445     }
3446   }
3447   if (attrs.size()) {
3448     return builder->CreateVectorOfSortedTables(&attrs);
3449   } else {
3450     return 0;
3451   }
3452 }
3453 
DeserializeAttributes(Parser & parser,const Vector<Offset<reflection::KeyValue>> * attrs)3454 bool Definition::DeserializeAttributes(
3455     Parser &parser, const Vector<Offset<reflection::KeyValue>> *attrs) {
3456   if (attrs == nullptr) return true;
3457   for (uoffset_t i = 0; i < attrs->size(); ++i) {
3458     auto kv = attrs->Get(i);
3459     auto value = new Value();
3460     if (kv->value()) { value->constant = kv->value()->str(); }
3461     if (attributes.Add(kv->key()->str(), value)) {
3462       delete value;
3463       return false;
3464     }
3465     parser.known_attributes_[kv->key()->str()];
3466   }
3467   return true;
3468 }
3469 
3470 /************************************************************************/
3471 /* DESERIALIZATION                                                      */
3472 /************************************************************************/
Deserialize(const uint8_t * buf,const size_t size)3473 bool Parser::Deserialize(const uint8_t *buf, const size_t size) {
3474   flatbuffers::Verifier verifier(reinterpret_cast<const uint8_t *>(buf), size);
3475   bool size_prefixed = false;
3476   if (!reflection::SchemaBufferHasIdentifier(buf)) {
3477     if (!flatbuffers::BufferHasIdentifier(buf, reflection::SchemaIdentifier(),
3478                                           true))
3479       return false;
3480     else
3481       size_prefixed = true;
3482   }
3483   auto verify_fn = size_prefixed ? &reflection::VerifySizePrefixedSchemaBuffer
3484                                  : &reflection::VerifySchemaBuffer;
3485   if (!verify_fn(verifier)) { return false; }
3486   auto schema = size_prefixed ? reflection::GetSizePrefixedSchema(buf)
3487                               : reflection::GetSchema(buf);
3488   return Deserialize(schema);
3489 }
3490 
Deserialize(const reflection::Schema * schema)3491 bool Parser::Deserialize(const reflection::Schema *schema) {
3492   file_identifier_ = schema->file_ident() ? schema->file_ident()->str() : "";
3493   file_extension_ = schema->file_ext() ? schema->file_ext()->str() : "";
3494   std::map<std::string, Namespace *> namespaces_index;
3495 
3496   // Create defs without deserializing so references from fields to structs and
3497   // enums can be resolved.
3498   for (auto it = schema->objects()->begin(); it != schema->objects()->end();
3499        ++it) {
3500     auto struct_def = new StructDef();
3501     struct_def->bytesize = it->bytesize();
3502     struct_def->fixed = it->is_struct();
3503     struct_def->minalign = it->minalign();
3504     if (structs_.Add(it->name()->str(), struct_def)) {
3505       delete struct_def;
3506       return false;
3507     }
3508     auto type = new Type(BASE_TYPE_STRUCT, struct_def, nullptr);
3509     if (types_.Add(it->name()->str(), type)) {
3510       delete type;
3511       return false;
3512     }
3513   }
3514   for (auto it = schema->enums()->begin(); it != schema->enums()->end(); ++it) {
3515     auto enum_def = new EnumDef();
3516     if (enums_.Add(it->name()->str(), enum_def)) {
3517       delete enum_def;
3518       return false;
3519     }
3520     auto type = new Type(BASE_TYPE_UNION, nullptr, enum_def);
3521     if (types_.Add(it->name()->str(), type)) {
3522       delete type;
3523       return false;
3524     }
3525   }
3526 
3527   // Now fields can refer to structs and enums by index.
3528   for (auto it = schema->objects()->begin(); it != schema->objects()->end();
3529        ++it) {
3530     std::string qualified_name = it->name()->str();
3531     auto struct_def = structs_.Lookup(qualified_name);
3532     struct_def->defined_namespace =
3533         GetNamespace(qualified_name, namespaces_, namespaces_index);
3534     if (!struct_def->Deserialize(*this, *it)) { return false; }
3535     if (schema->root_table() == *it) { root_struct_def_ = struct_def; }
3536   }
3537   for (auto it = schema->enums()->begin(); it != schema->enums()->end(); ++it) {
3538     std::string qualified_name = it->name()->str();
3539     auto enum_def = enums_.Lookup(qualified_name);
3540     enum_def->defined_namespace =
3541         GetNamespace(qualified_name, namespaces_, namespaces_index);
3542     if (!enum_def->Deserialize(*this, *it)) { return false; }
3543   }
3544 
3545   if (schema->services()) {
3546     for (auto it = schema->services()->begin(); it != schema->services()->end();
3547          ++it) {
3548       std::string qualified_name = it->name()->str();
3549       auto service_def = new ServiceDef();
3550       service_def->defined_namespace =
3551           GetNamespace(qualified_name, namespaces_, namespaces_index);
3552       if (!service_def->Deserialize(*this, *it) ||
3553           services_.Add(qualified_name, service_def)) {
3554         delete service_def;
3555         return false;
3556       }
3557     }
3558   }
3559 
3560   return true;
3561 }
3562 
ConformTo(const Parser & base)3563 std::string Parser::ConformTo(const Parser &base) {
3564   for (auto sit = structs_.vec.begin(); sit != structs_.vec.end(); ++sit) {
3565     auto &struct_def = **sit;
3566     auto qualified_name =
3567         struct_def.defined_namespace->GetFullyQualifiedName(struct_def.name);
3568     auto struct_def_base = base.LookupStruct(qualified_name);
3569     if (!struct_def_base) continue;
3570     for (auto fit = struct_def.fields.vec.begin();
3571          fit != struct_def.fields.vec.end(); ++fit) {
3572       auto &field = **fit;
3573       auto field_base = struct_def_base->fields.Lookup(field.name);
3574       if (field_base) {
3575         if (field.value.offset != field_base->value.offset)
3576           return "offsets differ for field: " + field.name;
3577         if (field.value.constant != field_base->value.constant)
3578           return "defaults differ for field: " + field.name;
3579         if (!EqualByName(field.value.type, field_base->value.type))
3580           return "types differ for field: " + field.name;
3581       } else {
3582         // Doesn't have to exist, deleting fields is fine.
3583         // But we should check if there is a field that has the same offset
3584         // but is incompatible (in the case of field renaming).
3585         for (auto fbit = struct_def_base->fields.vec.begin();
3586              fbit != struct_def_base->fields.vec.end(); ++fbit) {
3587           field_base = *fbit;
3588           if (field.value.offset == field_base->value.offset) {
3589             if (!EqualByName(field.value.type, field_base->value.type))
3590               return "field renamed to different type: " + field.name;
3591             break;
3592           }
3593         }
3594       }
3595     }
3596   }
3597   for (auto eit = enums_.vec.begin(); eit != enums_.vec.end(); ++eit) {
3598     auto &enum_def = **eit;
3599     auto qualified_name =
3600         enum_def.defined_namespace->GetFullyQualifiedName(enum_def.name);
3601     auto enum_def_base = base.enums_.Lookup(qualified_name);
3602     if (!enum_def_base) continue;
3603     for (auto evit = enum_def.Vals().begin(); evit != enum_def.Vals().end();
3604          ++evit) {
3605       auto &enum_val = **evit;
3606       auto enum_val_base = enum_def_base->Lookup(enum_val.name);
3607       if (enum_val_base) {
3608         if (enum_val != *enum_val_base)
3609           return "values differ for enum: " + enum_val.name;
3610       }
3611     }
3612   }
3613   return "";
3614 }
3615 
3616 }  // namespace flatbuffers
3617