1 /*
2  * Copyright 2014 Google Inc. All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <algorithm>
18 #include <list>
19 
20 #ifdef _WIN32
21 #if !defined(_USE_MATH_DEFINES)
22 #define _USE_MATH_DEFINES  // For M_PI.
23 #endif                     // !defined(_USE_MATH_DEFINES)
24 #endif                     // _WIN32
25 
26 #include <math.h>
27 
28 #include "flatbuffers/idl.h"
29 #include "flatbuffers/util.h"
30 
31 namespace flatbuffers {
32 
33 const char *const kTypeNames[] = {
34   #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
35     IDLTYPE,
36     FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
37   #undef FLATBUFFERS_TD
38   nullptr
39 };
40 
41 const char kTypeSizes[] = {
42   #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
43       sizeof(CTYPE),
44     FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
45   #undef FLATBUFFERS_TD
46 };
47 
48 // The enums in the reflection schema should match the ones we use internally.
49 // Compare the last element to check if these go out of sync.
50 static_assert(BASE_TYPE_UNION ==
51               static_cast<BaseType>(reflection::Union),
52               "enums don't match");
53 
54 // Any parsing calls have to be wrapped in this macro, which automates
55 // handling of recursive error checking a bit. It will check the received
56 // CheckedError object, and return straight away on error.
57 #define ECHECK(call) { auto ce = (call); if (ce.Check()) return ce; }
58 
59 // These two functions are called hundreds of times below, so define a short
60 // form:
61 #define NEXT() ECHECK(Next())
62 #define EXPECT(tok) ECHECK(Expect(tok))
63 
ValidateUTF8(const std::string & str)64 static bool ValidateUTF8(const std::string &str) {
65   const char *s = &str[0];
66   const char * const sEnd = s + str.length();
67   while (s < sEnd) {
68     if (FromUTF8(&s) < 0) {
69       return false;
70     }
71   }
72   return true;
73 }
74 
Error(const std::string & msg)75 CheckedError Parser::Error(const std::string &msg) {
76   error_ = file_being_parsed_.length() ? AbsolutePath(file_being_parsed_) : "";
77   #ifdef _WIN32
78     error_ += "(" + NumToString(line_) + ")";  // MSVC alike
79   #else
80     if (file_being_parsed_.length()) error_ += ":";
81     error_ += NumToString(line_) + ":0";  // gcc alike
82   #endif
83   error_ += ": error: " + msg;
84   return CheckedError(true);
85 }
86 
NoError()87 inline CheckedError NoError() { return CheckedError(false); }
88 
89 // Ensure that integer values we parse fit inside the declared integer type.
CheckBitsFit(int64_t val,size_t bits)90 CheckedError Parser::CheckBitsFit(int64_t val, size_t bits) {
91   // Left-shifting a 64-bit value by 64 bits or more is undefined
92   // behavior (C99 6.5.7), so check *before* we shift.
93   if (bits < 64) {
94     // Bits we allow to be used.
95     auto mask = static_cast<int64_t>((1ull << bits) - 1);
96     if ((val & ~mask) != 0 &&  // Positive or unsigned.
97         (val |  mask) != -1)   // Negative.
98       return Error("constant does not fit in a " + NumToString(bits) +
99                    "-bit field");
100   }
101   return NoError();
102 }
103 
104 // atot: templated version of atoi/atof: convert a string to an instance of T.
atot(const char * s,Parser & parser,T * val)105 template<typename T> inline CheckedError atot(const char *s, Parser &parser,
106                                               T *val) {
107   int64_t i = StringToInt(s);
108   ECHECK(parser.CheckBitsFit(i, sizeof(T) * 8));
109   *val = (T)i;
110   return NoError();
111 }
atot(const char * s,Parser & parser,uint64_t * val)112 template<> inline CheckedError atot<uint64_t>(const char *s, Parser &parser,
113                                               uint64_t *val) {
114   (void)parser;
115   *val = StringToUInt(s);
116   return NoError();
117 }
atot(const char * s,Parser & parser,bool * val)118 template<> inline CheckedError atot<bool>(const char *s, Parser &parser,
119                                           bool *val) {
120   (void)parser;
121   *val = 0 != atoi(s);
122   return NoError();
123 }
atot(const char * s,Parser & parser,float * val)124 template<> inline CheckedError atot<float>(const char *s, Parser &parser,
125                                            float *val) {
126   (void)parser;
127   *val = static_cast<float>(strtod(s, nullptr));
128   return NoError();
129 }
atot(const char * s,Parser & parser,double * val)130 template<> inline CheckedError atot<double>(const char *s, Parser &parser,
131                                             double *val) {
132   (void)parser;
133   *val = strtod(s, nullptr);
134   return NoError();
135 }
136 
atot(const char * s,Parser & parser,Offset<void> * val)137 template<> inline CheckedError atot<Offset<void>>(const char *s, Parser &parser,
138                                                   Offset<void> *val) {
139   (void)parser;
140   *val = Offset<void>(atoi(s));
141   return NoError();
142 }
143 
GetFullyQualifiedName(const std::string & name,size_t max_components) const144 std::string Namespace::GetFullyQualifiedName(const std::string &name,
145                                              size_t max_components) const {
146   // Early exit if we don't have a defined namespace.
147   if (components.size() == 0 || !max_components) {
148     return name;
149   }
150   std::stringstream stream;
151   for (size_t i = 0; i < std::min(components.size(), max_components);
152        i++) {
153     if (i) {
154       stream << ".";
155     }
156     stream << components[i];
157   }
158   if (name.length()) stream << "." << name;
159   return stream.str();
160 }
161 
162 
163 
164 // Declare tokens we'll use. Single character tokens are represented by their
165 // ascii character code (e.g. '{'), others above 256.
166 #define FLATBUFFERS_GEN_TOKENS(TD) \
167   TD(Eof, 256, "end of file") \
168   TD(StringConstant, 257, "string constant") \
169   TD(IntegerConstant, 258, "integer constant") \
170   TD(FloatConstant, 259, "float constant") \
171   TD(Identifier, 260, "identifier") \
172   TD(Table, 261, "table") \
173   TD(Struct, 262, "struct") \
174   TD(Enum, 263, "enum") \
175   TD(Union, 264, "union") \
176   TD(NameSpace, 265, "namespace") \
177   TD(RootType, 266, "root_type") \
178   TD(FileIdentifier, 267, "file_identifier") \
179   TD(FileExtension, 268, "file_extension") \
180   TD(Include, 269, "include") \
181   TD(Attribute, 270, "attribute") \
182   TD(Null, 271, "null") \
183   TD(Service, 272, "rpc_service") \
184   TD(NativeInclude, 273, "native_include")
185 #ifdef __GNUC__
186 __extension__  // Stop GCC complaining about trailing comma with -Wpendantic.
187 #endif
188 enum {
189   #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) kToken ## NAME = VALUE,
190     FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
191   #undef FLATBUFFERS_TOKEN
192   #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
193       kToken ## ENUM,
194     FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
195   #undef FLATBUFFERS_TD
196 };
197 
TokenToString(int t)198 static std::string TokenToString(int t) {
199   static const char *tokens[] = {
200     #define FLATBUFFERS_TOKEN(NAME, VALUE, STRING) STRING,
201       FLATBUFFERS_GEN_TOKENS(FLATBUFFERS_TOKEN)
202     #undef FLATBUFFERS_TOKEN
203     #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
204       IDLTYPE,
205       FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
206     #undef FLATBUFFERS_TD
207   };
208   if (t < 256) {  // A single ascii char token.
209     std::string s;
210     s.append(1, static_cast<char>(t));
211     return s;
212   } else {       // Other tokens.
213     return tokens[t - 256];
214   }
215 }
216 
TokenToStringId(int t)217 std::string Parser::TokenToStringId(int t) {
218   return TokenToString(t) + (t == kTokenIdentifier ? ": " + attribute_ : "");
219 }
220 
221 // Parses exactly nibbles worth of hex digits into a number, or error.
ParseHexNum(int nibbles,uint64_t * val)222 CheckedError Parser::ParseHexNum(int nibbles, uint64_t *val) {
223   for (int i = 0; i < nibbles; i++)
224     if (!isxdigit(static_cast<const unsigned char>(cursor_[i])))
225       return Error("escape code must be followed by " + NumToString(nibbles) +
226                    " hex digits");
227   std::string target(cursor_, cursor_ + nibbles);
228   *val = StringToUInt(target.c_str(), nullptr, 16);
229   cursor_ += nibbles;
230   return NoError();
231 }
232 
SkipByteOrderMark()233 CheckedError Parser::SkipByteOrderMark() {
234   if (static_cast<unsigned char>(*cursor_) != 0xef) return NoError();
235   cursor_++;
236   if (static_cast<unsigned char>(*cursor_) != 0xbb) return Error("invalid utf-8 byte order mark");
237   cursor_++;
238   if (static_cast<unsigned char>(*cursor_) != 0xbf) return Error("invalid utf-8 byte order mark");
239   cursor_++;
240   return NoError();
241 }
242 
IsIdentifierStart(char c)243 bool IsIdentifierStart(char c) {
244   return isalpha(static_cast<unsigned char>(c)) || c == '_';
245 }
246 
Next()247 CheckedError Parser::Next() {
248   doc_comment_.clear();
249   bool seen_newline = false;
250   attribute_.clear();
251   for (;;) {
252     char c = *cursor_++;
253     token_ = c;
254     switch (c) {
255       case '\0': cursor_--; token_ = kTokenEof; return NoError();
256       case ' ': case '\r': case '\t': break;
257       case '\n': line_++; seen_newline = true; break;
258       case '{': case '}': case '(': case ')': case '[': case ']':
259       case ',': case ':': case ';': case '=': return NoError();
260       case '.':
261         if(!isdigit(static_cast<const unsigned char>(*cursor_))) return NoError();
262         return Error("floating point constant can\'t start with \".\"");
263       case '\"':
264       case '\'': {
265         int unicode_high_surrogate = -1;
266 
267         while (*cursor_ != c) {
268           if (*cursor_ < ' ' && *cursor_ >= 0)
269             return Error("illegal character in string constant");
270           if (*cursor_ == '\\') {
271             cursor_++;
272             if (unicode_high_surrogate != -1 &&
273                 *cursor_ != 'u') {
274               return Error(
275                 "illegal Unicode sequence (unpaired high surrogate)");
276             }
277             switch (*cursor_) {
278               case 'n':  attribute_ += '\n'; cursor_++; break;
279               case 't':  attribute_ += '\t'; cursor_++; break;
280               case 'r':  attribute_ += '\r'; cursor_++; break;
281               case 'b':  attribute_ += '\b'; cursor_++; break;
282               case 'f':  attribute_ += '\f'; cursor_++; break;
283               case '\"': attribute_ += '\"'; cursor_++; break;
284               case '\'': attribute_ += '\''; cursor_++; break;
285               case '\\': attribute_ += '\\'; cursor_++; break;
286               case '/':  attribute_ += '/';  cursor_++; break;
287               case 'x': {  // Not in the JSON standard
288                 cursor_++;
289                 uint64_t val;
290                 ECHECK(ParseHexNum(2, &val));
291                 attribute_ += static_cast<char>(val);
292                 break;
293               }
294               case 'u': {
295                 cursor_++;
296                 uint64_t val;
297                 ECHECK(ParseHexNum(4, &val));
298                 if (val >= 0xD800 && val <= 0xDBFF) {
299                   if (unicode_high_surrogate != -1) {
300                     return Error(
301                       "illegal Unicode sequence (multiple high surrogates)");
302                   } else {
303                     unicode_high_surrogate = static_cast<int>(val);
304                   }
305                 } else if (val >= 0xDC00 && val <= 0xDFFF) {
306                   if (unicode_high_surrogate == -1) {
307                     return Error(
308                       "illegal Unicode sequence (unpaired low surrogate)");
309                   } else {
310                     int code_point = 0x10000 +
311                       ((unicode_high_surrogate & 0x03FF) << 10) +
312                       (val & 0x03FF);
313                     ToUTF8(code_point, &attribute_);
314                     unicode_high_surrogate = -1;
315                   }
316                 } else {
317                   if (unicode_high_surrogate != -1) {
318                     return Error(
319                       "illegal Unicode sequence (unpaired high surrogate)");
320                   }
321                   ToUTF8(static_cast<int>(val), &attribute_);
322                 }
323                 break;
324               }
325               default: return Error("unknown escape code in string constant");
326             }
327           } else { // printable chars + UTF-8 bytes
328             if (unicode_high_surrogate != -1) {
329               return Error(
330                 "illegal Unicode sequence (unpaired high surrogate)");
331             }
332             attribute_ += *cursor_++;
333           }
334         }
335         if (unicode_high_surrogate != -1) {
336           return Error(
337             "illegal Unicode sequence (unpaired high surrogate)");
338         }
339         cursor_++;
340         if (!opts.allow_non_utf8 && !ValidateUTF8(attribute_)) {
341           return Error("illegal UTF-8 sequence");
342         }
343         token_ = kTokenStringConstant;
344         return NoError();
345       }
346       case '/':
347         if (*cursor_ == '/') {
348           const char *start = ++cursor_;
349           while (*cursor_ && *cursor_ != '\n' && *cursor_ != '\r') cursor_++;
350           if (*start == '/') {  // documentation comment
351             if (cursor_ != source_ && !seen_newline)
352               return Error(
353                     "a documentation comment should be on a line on its own");
354             doc_comment_.push_back(std::string(start + 1, cursor_));
355           }
356           break;
357         } else if (*cursor_ == '*') {
358           cursor_++;
359           // TODO: make nested.
360           while (*cursor_ != '*' || cursor_[1] != '/') {
361             if (*cursor_ == '\n') line_++;
362             if (!*cursor_) return Error("end of file in comment");
363             cursor_++;
364           }
365           cursor_ += 2;
366           break;
367         }
368         // fall thru
369       default:
370         if (IsIdentifierStart(c)) {
371           // Collect all chars of an identifier:
372           const char *start = cursor_ - 1;
373           while (isalnum(static_cast<unsigned char>(*cursor_)) ||
374                  *cursor_ == '_')
375             cursor_++;
376           attribute_.append(start, cursor_);
377           // First, see if it is a type keyword from the table of types:
378           #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
379             PTYPE) \
380             if (attribute_ == IDLTYPE) { \
381               token_ = kToken ## ENUM; \
382               return NoError(); \
383             }
384             FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
385           #undef FLATBUFFERS_TD
386           // If it's a boolean constant keyword, turn those into integers,
387           // which simplifies our logic downstream.
388           if (attribute_ == "true" || attribute_ == "false") {
389             attribute_ = NumToString(attribute_ == "true");
390             token_ = kTokenIntegerConstant;
391             return NoError();
392           }
393           // Check for declaration keywords:
394           if (attribute_ == "table") {
395             token_ = kTokenTable;
396             return NoError();
397           }
398           if (attribute_ == "struct") {
399             token_ = kTokenStruct;
400             return NoError();
401           }
402           if (attribute_ == "enum") {
403             token_ = kTokenEnum;
404             return NoError();
405           }
406           if (attribute_ == "union") {
407             token_ = kTokenUnion;
408             return NoError();
409           }
410           if (attribute_ == "namespace") {
411             token_ = kTokenNameSpace;
412             return NoError();
413           }
414           if (attribute_ == "root_type") {
415             token_ = kTokenRootType;
416             return NoError();
417           }
418           if (attribute_ == "include") {
419             token_ = kTokenInclude;
420             return NoError();
421           }
422           if (attribute_ == "attribute") {
423             token_ = kTokenAttribute;
424             return NoError();
425           }
426           if (attribute_ == "file_identifier") {
427             token_ = kTokenFileIdentifier;
428             return NoError();
429           }
430           if (attribute_ == "file_extension") {
431             token_ = kTokenFileExtension;
432             return NoError();
433           }
434           if (attribute_ == "null") {
435             token_ = kTokenNull;
436             return NoError();
437           }
438           if (attribute_ == "rpc_service") {
439             token_ = kTokenService;
440             return NoError();
441           }
442           if (attribute_ == "native_include") {
443             token_ = kTokenNativeInclude;
444             return NoError();
445           }
446           // If not, it is a user-defined identifier:
447           token_ = kTokenIdentifier;
448           return NoError();
449         } else if (isdigit(static_cast<unsigned char>(c)) || c == '-') {
450           const char *start = cursor_ - 1;
451           if (c == '-' && *cursor_ == '0' &&
452               (cursor_[1] == 'x' || cursor_[1] == 'X')) {
453             ++start;
454             ++cursor_;
455             attribute_.append(&c, &c + 1);
456             c = '0';
457           }
458           if (c == '0' && (*cursor_ == 'x' || *cursor_ == 'X')) {
459               cursor_++;
460               while (isxdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
461               attribute_.append(start + 2, cursor_);
462               attribute_ = NumToString(static_cast<int64_t>(
463                              StringToUInt(attribute_.c_str(), nullptr, 16)));
464               token_ = kTokenIntegerConstant;
465               return NoError();
466           }
467           while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
468           if (*cursor_ == '.' || *cursor_ == 'e' || *cursor_ == 'E') {
469             if (*cursor_ == '.') {
470               cursor_++;
471               while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
472             }
473             // See if this float has a scientific notation suffix. Both JSON
474             // and C++ (through strtod() we use) have the same format:
475             if (*cursor_ == 'e' || *cursor_ == 'E') {
476               cursor_++;
477               if (*cursor_ == '+' || *cursor_ == '-') cursor_++;
478               while (isdigit(static_cast<unsigned char>(*cursor_))) cursor_++;
479             }
480             token_ = kTokenFloatConstant;
481           } else {
482             token_ = kTokenIntegerConstant;
483           }
484           attribute_.append(start, cursor_);
485           return NoError();
486         }
487         std::string ch;
488         ch = c;
489         if (c < ' ' || c > '~') ch = "code: " + NumToString(c);
490         return Error("illegal character: " + ch);
491     }
492   }
493 }
494 
495 // Check if a given token is next.
Is(int t)496 bool Parser::Is(int t) {
497   return t == token_;
498 }
499 
500 // Expect a given token to be next, consume it, or error if not present.
Expect(int t)501 CheckedError Parser::Expect(int t) {
502   if (t != token_) {
503     return Error("expecting: " + TokenToString(t) + " instead got: " +
504                  TokenToStringId(token_));
505   }
506   NEXT();
507   return NoError();
508 }
509 
ParseNamespacing(std::string * id,std::string * last)510 CheckedError Parser::ParseNamespacing(std::string *id, std::string *last) {
511   while (Is('.')) {
512     NEXT();
513     *id += ".";
514     *id += attribute_;
515     if (last) *last = attribute_;
516     EXPECT(kTokenIdentifier);
517   }
518   return NoError();
519 }
520 
LookupEnum(const std::string & id)521 EnumDef *Parser::LookupEnum(const std::string &id) {
522   // Search thru parent namespaces.
523   for (int components = static_cast<int>(namespaces_.back()->components.size());
524        components >= 0; components--) {
525     auto ed = enums_.Lookup(
526                 namespaces_.back()->GetFullyQualifiedName(id, components));
527     if (ed) return ed;
528   }
529   return nullptr;
530 }
531 
ParseTypeIdent(Type & type)532 CheckedError Parser::ParseTypeIdent(Type &type) {
533   std::string id = attribute_;
534   EXPECT(kTokenIdentifier);
535   ECHECK(ParseNamespacing(&id, nullptr));
536   auto enum_def = LookupEnum(id);
537   if (enum_def) {
538     type = enum_def->underlying_type;
539     if (enum_def->is_union) type.base_type = BASE_TYPE_UNION;
540   } else {
541     type.base_type = BASE_TYPE_STRUCT;
542     type.struct_def = LookupCreateStruct(id);
543   }
544   return NoError();
545 }
546 
547 // Parse any IDL type.
ParseType(Type & type)548 CheckedError Parser::ParseType(Type &type) {
549   if (token_ >= kTokenBOOL && token_ <= kTokenSTRING) {
550     type.base_type = static_cast<BaseType>(token_ - kTokenNONE);
551     NEXT();
552   } else {
553     if (token_ == kTokenIdentifier) {
554       ECHECK(ParseTypeIdent(type));
555     } else if (token_ == '[') {
556       NEXT();
557       Type subtype;
558       ECHECK(ParseType(subtype));
559       if (subtype.base_type == BASE_TYPE_VECTOR) {
560         // We could support this, but it will complicate things, and it's
561         // easier to work around with a struct around the inner vector.
562         return Error(
563               "nested vector types not supported (wrap in table first).");
564       }
565       type = Type(BASE_TYPE_VECTOR, subtype.struct_def, subtype.enum_def);
566       type.element = subtype.base_type;
567       EXPECT(']');
568     } else {
569       return Error("illegal type syntax");
570     }
571   }
572   return NoError();
573 }
574 
AddField(StructDef & struct_def,const std::string & name,const Type & type,FieldDef ** dest)575 CheckedError Parser::AddField(StructDef &struct_def, const std::string &name,
576                               const Type &type, FieldDef **dest) {
577   auto &field = *new FieldDef();
578   field.value.offset =
579     FieldIndexToOffset(static_cast<voffset_t>(struct_def.fields.vec.size()));
580   field.name = name;
581   field.file = struct_def.file;
582   field.value.type = type;
583   if (struct_def.fixed) {  // statically compute the field offset
584     auto size = InlineSize(type);
585     auto alignment = InlineAlignment(type);
586     // structs_ need to have a predictable format, so we need to align to
587     // the largest scalar
588     struct_def.minalign = std::max(struct_def.minalign, alignment);
589     struct_def.PadLastField(alignment);
590     field.value.offset = static_cast<voffset_t>(struct_def.bytesize);
591     struct_def.bytesize += size;
592   }
593   if (struct_def.fields.Add(name, &field))
594     return Error("field already exists: " + name);
595   *dest = &field;
596   return NoError();
597 }
598 
ParseField(StructDef & struct_def)599 CheckedError Parser::ParseField(StructDef &struct_def) {
600   std::string name = attribute_;
601   std::vector<std::string> dc = doc_comment_;
602   EXPECT(kTokenIdentifier);
603   EXPECT(':');
604   Type type;
605   ECHECK(ParseType(type));
606 
607   if (struct_def.fixed && !IsScalar(type.base_type) && !IsStruct(type))
608     return Error("structs_ may contain only scalar or struct fields");
609 
610   FieldDef *typefield = nullptr;
611   if (type.base_type == BASE_TYPE_UNION) {
612     // For union fields, add a second auto-generated field to hold the type,
613     // with a special suffix.
614     ECHECK(AddField(struct_def, name + UnionTypeFieldSuffix(),
615                     type.enum_def->underlying_type, &typefield));
616   } else if (type.base_type == BASE_TYPE_VECTOR &&
617              type.element == BASE_TYPE_UNION) {
618     // Only cpp supports the union vector feature so far.
619     if (opts.lang_to_generate != IDLOptions::kCpp) {
620       return Error("Vectors of unions are not yet supported in all "
621                    "the specified programming languages.");
622     }
623     // For vector of union fields, add a second auto-generated vector field to
624     // hold the types, with a special suffix.
625     Type union_vector(BASE_TYPE_VECTOR, nullptr, type.enum_def);
626     union_vector.element = BASE_TYPE_UTYPE;
627     ECHECK(AddField(struct_def, name + UnionTypeFieldSuffix(),
628                     union_vector, &typefield));
629   }
630 
631   FieldDef *field;
632   ECHECK(AddField(struct_def, name, type, &field));
633 
634   if (token_ == '=') {
635     NEXT();
636     if (!IsScalar(type.base_type))
637       return Error("default values currently only supported for scalars");
638     ECHECK(ParseSingleValue(field->value));
639   }
640   if (IsFloat(field->value.type.base_type)) {
641     if (!strpbrk(field->value.constant.c_str(), ".eE"))
642       field->value.constant += ".0";
643   }
644 
645   if (type.enum_def &&
646       IsScalar(type.base_type) &&
647       !struct_def.fixed &&
648       !type.enum_def->attributes.Lookup("bit_flags") &&
649       !type.enum_def->ReverseLookup(static_cast<int>(
650                          StringToInt(field->value.constant.c_str()))))
651     return Error("enum " + type.enum_def->name +
652           " does not have a declaration for this field\'s default of " +
653           field->value.constant);
654 
655   field->doc_comment = dc;
656   ECHECK(ParseMetaData(&field->attributes));
657   field->deprecated = field->attributes.Lookup("deprecated") != nullptr;
658   auto hash_name = field->attributes.Lookup("hash");
659   if (hash_name) {
660     switch (type.base_type) {
661       case BASE_TYPE_INT:
662       case BASE_TYPE_UINT: {
663         if (FindHashFunction32(hash_name->constant.c_str()) == nullptr)
664           return Error("Unknown hashing algorithm for 32 bit types: " +
665                 hash_name->constant);
666         break;
667       }
668       case BASE_TYPE_LONG:
669       case BASE_TYPE_ULONG: {
670         if (FindHashFunction64(hash_name->constant.c_str()) == nullptr)
671           return Error("Unknown hashing algorithm for 64 bit types: " +
672                 hash_name->constant);
673         break;
674       }
675       default:
676         return Error(
677               "only int, uint, long and ulong data types support hashing.");
678     }
679   }
680   auto cpp_type = field->attributes.Lookup("cpp_type");
681   if (cpp_type) {
682     if (!hash_name)
683       return Error("cpp_type can only be used with a hashed field");
684   }
685   if (field->deprecated && struct_def.fixed)
686     return Error("can't deprecate fields in a struct");
687   field->required = field->attributes.Lookup("required") != nullptr;
688   if (field->required && (struct_def.fixed ||
689                          IsScalar(field->value.type.base_type)))
690     return Error("only non-scalar fields in tables may be 'required'");
691   field->key = field->attributes.Lookup("key") != nullptr;
692   if (field->key) {
693     if (struct_def.has_key)
694       return Error("only one field may be set as 'key'");
695     struct_def.has_key = true;
696     if (!IsScalar(field->value.type.base_type)) {
697       field->required = true;
698       if (field->value.type.base_type != BASE_TYPE_STRING)
699         return Error("'key' field must be string or scalar type");
700     }
701   }
702 
703   field->native_inline = field->attributes.Lookup("native_inline") != nullptr;
704   if (field->native_inline && !IsStruct(field->value.type))
705     return Error("native_inline can only be defined on structs'");
706 
707   auto nested = field->attributes.Lookup("nested_flatbuffer");
708   if (nested) {
709     if (nested->type.base_type != BASE_TYPE_STRING)
710       return Error(
711             "nested_flatbuffer attribute must be a string (the root type)");
712     if (field->value.type.base_type != BASE_TYPE_VECTOR ||
713         field->value.type.element != BASE_TYPE_UCHAR)
714       return Error(
715             "nested_flatbuffer attribute may only apply to a vector of ubyte");
716     // This will cause an error if the root type of the nested flatbuffer
717     // wasn't defined elsewhere.
718     LookupCreateStruct(nested->constant);
719   }
720 
721   if (typefield) {
722     // If this field is a union, and it has a manually assigned id,
723     // the automatically added type field should have an id as well (of N - 1).
724     auto attr = field->attributes.Lookup("id");
725     if (attr) {
726       auto id = atoi(attr->constant.c_str());
727       auto val = new Value();
728       val->type = attr->type;
729       val->constant = NumToString(id - 1);
730       typefield->attributes.Add("id", val);
731     }
732   }
733 
734   EXPECT(';');
735   return NoError();
736 }
737 
ParseAnyValue(Value & val,FieldDef * field,size_t parent_fieldn,const StructDef * parent_struct_def)738 CheckedError Parser::ParseAnyValue(Value &val, FieldDef *field,
739                                    size_t parent_fieldn,
740                                    const StructDef *parent_struct_def) {
741   switch (val.type.base_type) {
742     case BASE_TYPE_UNION: {
743       assert(field);
744       std::string constant;
745       // Find corresponding type field we may have already parsed.
746       for (auto elem = field_stack_.rbegin();
747            elem != field_stack_.rbegin() + parent_fieldn; ++elem) {
748         auto &type = elem->second->value.type;
749         if (type.base_type == BASE_TYPE_UTYPE &&
750             type.enum_def == val.type.enum_def) {
751           constant = elem->first.constant;
752           break;
753         }
754       }
755       if (constant.empty()) {
756         // We haven't seen the type field yet. Sadly a lot of JSON writers
757         // output these in alphabetical order, meaning it comes after this
758         // value. So we scan past the value to find it, then come back here.
759         auto type_name = field->name + UnionTypeFieldSuffix();
760         assert(parent_struct_def);
761         auto type_field = parent_struct_def->fields.Lookup(type_name);
762         assert(type_field);  // Guaranteed by ParseField().
763         // Remember where we are in the source file, so we can come back here.
764         auto backup = *static_cast<ParserState *>(this);
765         ECHECK(SkipAnyJsonValue());  // The table.
766         EXPECT(',');
767         auto next_name = attribute_;
768         if (Is(kTokenStringConstant)) {
769           NEXT();
770         } else {
771           EXPECT(kTokenIdentifier);
772         }
773         if (next_name != type_name)
774           return Error("missing type field after this union value: " +
775                        type_name);
776         EXPECT(':');
777         Value type_val = type_field->value;
778         ECHECK(ParseAnyValue(type_val, type_field, 0, nullptr));
779         constant = type_val.constant;
780         // Got the information we needed, now rewind:
781         *static_cast<ParserState *>(this) = backup;
782       }
783       uint8_t enum_idx;
784       ECHECK(atot(constant.c_str(), *this, &enum_idx));
785       auto enum_val = val.type.enum_def->ReverseLookup(enum_idx);
786       if (!enum_val) return Error("illegal type id for: " + field->name);
787       ECHECK(ParseTable(*enum_val->struct_def, &val.constant, nullptr));
788       break;
789     }
790     case BASE_TYPE_STRUCT:
791       ECHECK(ParseTable(*val.type.struct_def, &val.constant, nullptr));
792       break;
793     case BASE_TYPE_STRING: {
794       auto s = attribute_;
795       EXPECT(kTokenStringConstant);
796       val.constant = NumToString(builder_.CreateString(s).o);
797       break;
798     }
799     case BASE_TYPE_VECTOR: {
800       EXPECT('[');
801       uoffset_t off;
802       ECHECK(ParseVector(val.type.VectorType(), &off));
803       val.constant = NumToString(off);
804       break;
805     }
806     case BASE_TYPE_INT:
807     case BASE_TYPE_UINT:
808     case BASE_TYPE_LONG:
809     case BASE_TYPE_ULONG: {
810       if (field && field->attributes.Lookup("hash") &&
811           (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
812         ECHECK(ParseHash(val, field));
813       } else {
814         ECHECK(ParseSingleValue(val));
815       }
816       break;
817     }
818     default:
819       ECHECK(ParseSingleValue(val));
820       break;
821   }
822   return NoError();
823 }
824 
SerializeStruct(const StructDef & struct_def,const Value & val)825 void Parser::SerializeStruct(const StructDef &struct_def, const Value &val) {
826   assert(val.constant.length() == struct_def.bytesize);
827   builder_.Align(struct_def.minalign);
828   builder_.PushBytes(reinterpret_cast<const uint8_t *>(val.constant.c_str()),
829                      struct_def.bytesize);
830   builder_.AddStructOffset(val.offset, builder_.GetSize());
831 }
832 
ParseTable(const StructDef & struct_def,std::string * value,uoffset_t * ovalue)833 CheckedError Parser::ParseTable(const StructDef &struct_def, std::string *value,
834                                 uoffset_t *ovalue) {
835   EXPECT('{');
836   size_t fieldn = 0;
837   for (;;) {
838     if ((!opts.strict_json || !fieldn) && Is('}')) { NEXT(); break; }
839     std::string name = attribute_;
840     if (Is(kTokenStringConstant)) {
841       NEXT();
842     } else {
843       EXPECT(opts.strict_json ? kTokenStringConstant : kTokenIdentifier);
844     }
845     auto field = struct_def.fields.Lookup(name);
846     if (!field) {
847       if (!opts.skip_unexpected_fields_in_json) {
848         return Error("unknown field: " + name);
849       } else {
850         EXPECT(':');
851         ECHECK(SkipAnyJsonValue());
852       }
853     } else {
854       EXPECT(':');
855       if (Is(kTokenNull)) {
856         NEXT(); // Ignore this field.
857       } else {
858         Value val = field->value;
859         ECHECK(ParseAnyValue(val, field, fieldn, &struct_def));
860         // Hardcoded insertion-sort with error-check.
861         // If fields are specified in order, then this loop exits immediately.
862         auto elem = field_stack_.rbegin();
863         for (; elem != field_stack_.rbegin() + fieldn; ++elem) {
864           auto existing_field = elem->second;
865           if (existing_field == field)
866             return Error("field set more than once: " + field->name);
867           if (existing_field->value.offset < field->value.offset) break;
868         }
869         // Note: elem points to before the insertion point, thus .base() points
870         // to the correct spot.
871         field_stack_.insert(elem.base(), std::make_pair(val, field));
872         fieldn++;
873       }
874     }
875     if (Is('}')) { NEXT(); break; }
876     EXPECT(',');
877   }
878 
879   if (struct_def.fixed && fieldn != struct_def.fields.vec.size())
880     return Error("struct: wrong number of initializers: " + struct_def.name);
881 
882   auto start = struct_def.fixed
883                  ? builder_.StartStruct(struct_def.minalign)
884                  : builder_.StartTable();
885 
886   for (size_t size = struct_def.sortbysize ? sizeof(largest_scalar_t) : 1;
887        size;
888        size /= 2) {
889     // Go through elements in reverse, since we're building the data backwards.
890     for (auto it = field_stack_.rbegin();
891              it != field_stack_.rbegin() + fieldn; ++it) {
892       auto &field_value = it->first;
893       auto field = it->second;
894       if (!struct_def.sortbysize ||
895           size == SizeOf(field_value.type.base_type)) {
896         switch (field_value.type.base_type) {
897           #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
898             PTYPE) \
899             case BASE_TYPE_ ## ENUM: \
900               builder_.Pad(field->padding); \
901               if (struct_def.fixed) { \
902                 CTYPE val; \
903                 ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
904                 builder_.PushElement(val); \
905               } else { \
906                 CTYPE val, valdef; \
907                 ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
908                 ECHECK(atot(field->value.constant.c_str(), *this, &valdef)); \
909                 builder_.AddElement(field_value.offset, val, valdef); \
910               } \
911               break;
912             FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD);
913           #undef FLATBUFFERS_TD
914           #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
915             PTYPE) \
916             case BASE_TYPE_ ## ENUM: \
917               builder_.Pad(field->padding); \
918               if (IsStruct(field->value.type)) { \
919                 SerializeStruct(*field->value.type.struct_def, field_value); \
920               } else { \
921                 CTYPE val; \
922                 ECHECK(atot(field_value.constant.c_str(), *this, &val)); \
923                 builder_.AddOffset(field_value.offset, val); \
924               } \
925               break;
926             FLATBUFFERS_GEN_TYPES_POINTER(FLATBUFFERS_TD);
927           #undef FLATBUFFERS_TD
928         }
929       }
930     }
931   }
932   for (size_t i = 0; i < fieldn; i++) field_stack_.pop_back();
933 
934   if (struct_def.fixed) {
935     builder_.ClearOffsets();
936     builder_.EndStruct();
937     assert(value);
938     // Temporarily store this struct in the value string, since it is to
939     // be serialized in-place elsewhere.
940     value->assign(
941           reinterpret_cast<const char *>(builder_.GetCurrentBufferPointer()),
942           struct_def.bytesize);
943     builder_.PopBytes(struct_def.bytesize);
944     assert(!ovalue);
945   } else {
946     auto val = builder_.EndTable(start,
947                           static_cast<voffset_t>(struct_def.fields.vec.size()));
948     if (ovalue) *ovalue = val;
949     if (value) *value = NumToString(val);
950   }
951   return NoError();
952 }
953 
ParseVector(const Type & type,uoffset_t * ovalue)954 CheckedError Parser::ParseVector(const Type &type, uoffset_t *ovalue) {
955   int count = 0;
956   for (;;) {
957     if ((!opts.strict_json || !count) && Is(']')) { NEXT(); break; }
958     Value val;
959     val.type = type;
960     ECHECK(ParseAnyValue(val, nullptr, 0, nullptr));
961     field_stack_.push_back(std::make_pair(val, nullptr));
962     count++;
963     if (Is(']')) { NEXT(); break; }
964     EXPECT(',');
965   }
966 
967   builder_.StartVector(count * InlineSize(type) / InlineAlignment(type),
968                        InlineAlignment(type));
969   for (int i = 0; i < count; i++) {
970     // start at the back, since we're building the data backwards.
971     auto &val = field_stack_.back().first;
972     switch (val.type.base_type) {
973       #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, PTYPE) \
974         case BASE_TYPE_ ## ENUM: \
975           if (IsStruct(val.type)) SerializeStruct(*val.type.struct_def, val); \
976           else { \
977              CTYPE elem; \
978              ECHECK(atot(val.constant.c_str(), *this, &elem)); \
979              builder_.PushElement(elem); \
980           } \
981           break;
982         FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
983       #undef FLATBUFFERS_TD
984     }
985     field_stack_.pop_back();
986   }
987 
988   builder_.ClearOffsets();
989   *ovalue = builder_.EndVector(count);
990   return NoError();
991 }
992 
ParseMetaData(SymbolTable<Value> * attributes)993 CheckedError Parser::ParseMetaData(SymbolTable<Value> *attributes) {
994   if (Is('(')) {
995     NEXT();
996     for (;;) {
997       auto name = attribute_;
998       EXPECT(kTokenIdentifier);
999       if (known_attributes_.find(name) == known_attributes_.end())
1000         return Error("user define attributes must be declared before use: " +
1001                      name);
1002       auto e = new Value();
1003       attributes->Add(name, e);
1004       if (Is(':')) {
1005         NEXT();
1006         ECHECK(ParseSingleValue(*e));
1007       }
1008       if (Is(')')) { NEXT(); break; }
1009       EXPECT(',');
1010     }
1011   }
1012   return NoError();
1013 }
1014 
TryTypedValue(int dtoken,bool check,Value & e,BaseType req,bool * destmatch)1015 CheckedError Parser::TryTypedValue(int dtoken, bool check, Value &e,
1016                                    BaseType req, bool *destmatch) {
1017   bool match = dtoken == token_;
1018   if (match) {
1019     *destmatch = true;
1020     e.constant = attribute_;
1021     if (!check) {
1022       if (e.type.base_type == BASE_TYPE_NONE) {
1023         e.type.base_type = req;
1024       } else {
1025         return Error(std::string("type mismatch: expecting: ") +
1026                      kTypeNames[e.type.base_type] +
1027                      ", found: " +
1028                      kTypeNames[req]);
1029       }
1030     }
1031     NEXT();
1032   }
1033   return NoError();
1034 }
1035 
ParseEnumFromString(Type & type,int64_t * result)1036 CheckedError Parser::ParseEnumFromString(Type &type, int64_t *result) {
1037   *result = 0;
1038   // Parse one or more enum identifiers, separated by spaces.
1039   const char *next = attribute_.c_str();
1040   do {
1041     const char *divider = strchr(next, ' ');
1042     std::string word;
1043     if (divider) {
1044       word = std::string(next, divider);
1045       next = divider + strspn(divider, " ");
1046     } else {
1047       word = next;
1048       next += word.length();
1049     }
1050     if (type.enum_def) {  // The field has an enum type
1051       auto enum_val = type.enum_def->vals.Lookup(word);
1052       if (!enum_val)
1053         return Error("unknown enum value: " + word +
1054               ", for enum: " + type.enum_def->name);
1055       *result |= enum_val->value;
1056     } else {  // No enum type, probably integral field.
1057       if (!IsInteger(type.base_type))
1058         return Error("not a valid value for this field: " + word);
1059       // TODO: could check if its a valid number constant here.
1060       const char *dot = strrchr(word.c_str(), '.');
1061       if (!dot)
1062         return Error("enum values need to be qualified by an enum type");
1063       std::string enum_def_str(word.c_str(), dot);
1064       std::string enum_val_str(dot + 1, word.c_str() + word.length());
1065       auto enum_def = LookupEnum(enum_def_str);
1066       if (!enum_def) return Error("unknown enum: " + enum_def_str);
1067       auto enum_val = enum_def->vals.Lookup(enum_val_str);
1068       if (!enum_val) return Error("unknown enum value: " + enum_val_str);
1069       *result |= enum_val->value;
1070     }
1071   } while(*next);
1072   return NoError();
1073 }
1074 
1075 
ParseHash(Value & e,FieldDef * field)1076 CheckedError Parser::ParseHash(Value &e, FieldDef* field) {
1077   assert(field);
1078   Value *hash_name = field->attributes.Lookup("hash");
1079   switch (e.type.base_type) {
1080     case BASE_TYPE_INT:
1081     case BASE_TYPE_UINT: {
1082       auto hash = FindHashFunction32(hash_name->constant.c_str());
1083       uint32_t hashed_value = hash(attribute_.c_str());
1084       e.constant = NumToString(hashed_value);
1085       break;
1086     }
1087     case BASE_TYPE_LONG:
1088     case BASE_TYPE_ULONG: {
1089       auto hash = FindHashFunction64(hash_name->constant.c_str());
1090       uint64_t hashed_value = hash(attribute_.c_str());
1091       e.constant = NumToString(hashed_value);
1092       break;
1093     }
1094     default:
1095       assert(0);
1096   }
1097   NEXT();
1098   return NoError();
1099 }
1100 
ParseSingleValue(Value & e)1101 CheckedError Parser::ParseSingleValue(Value &e) {
1102   // First see if this could be a conversion function:
1103   if (token_ == kTokenIdentifier && *cursor_ == '(') {
1104     auto functionname = attribute_;
1105     NEXT();
1106     EXPECT('(');
1107     ECHECK(ParseSingleValue(e));
1108     EXPECT(')');
1109     #define FLATBUFFERS_FN_DOUBLE(name, op) \
1110       if (functionname == name) { \
1111         auto x = strtod(e.constant.c_str(), nullptr); \
1112         e.constant = NumToString(op); \
1113       }
1114     FLATBUFFERS_FN_DOUBLE("deg", x / M_PI * 180);
1115     FLATBUFFERS_FN_DOUBLE("rad", x * M_PI / 180);
1116     FLATBUFFERS_FN_DOUBLE("sin", sin(x));
1117     FLATBUFFERS_FN_DOUBLE("cos", cos(x));
1118     FLATBUFFERS_FN_DOUBLE("tan", tan(x));
1119     FLATBUFFERS_FN_DOUBLE("asin", asin(x));
1120     FLATBUFFERS_FN_DOUBLE("acos", acos(x));
1121     FLATBUFFERS_FN_DOUBLE("atan", atan(x));
1122     // TODO(wvo): add more useful conversion functions here.
1123     #undef FLATBUFFERS_FN_DOUBLE
1124   // Then check if this could be a string/identifier enum value:
1125   } else if (e.type.base_type != BASE_TYPE_STRING &&
1126       e.type.base_type != BASE_TYPE_NONE &&
1127       (token_ == kTokenIdentifier || token_ == kTokenStringConstant)) {
1128     if (IsIdentifierStart(attribute_[0])) {  // Enum value.
1129       int64_t val;
1130       ECHECK(ParseEnumFromString(e.type, &val));
1131       e.constant = NumToString(val);
1132       NEXT();
1133     } else {  // Numeric constant in string.
1134       if (IsInteger(e.type.base_type)) {
1135         char *end;
1136         e.constant = NumToString(StringToInt(attribute_.c_str(), &end));
1137         if (*end)
1138           return Error("invalid integer: " + attribute_);
1139       } else if (IsFloat(e.type.base_type)) {
1140         char *end;
1141         e.constant = NumToString(strtod(attribute_.c_str(), &end));
1142         if (*end)
1143           return Error("invalid float: " + attribute_);
1144       } else {
1145         assert(0);  // Shouldn't happen, we covered all types.
1146         e.constant = "0";
1147       }
1148       NEXT();
1149     }
1150   } else {
1151     bool match = false;
1152     ECHECK(TryTypedValue(kTokenIntegerConstant,
1153                          IsScalar(e.type.base_type),
1154                          e,
1155                          BASE_TYPE_INT,
1156                          &match));
1157     ECHECK(TryTypedValue(kTokenFloatConstant,
1158                          IsFloat(e.type.base_type),
1159                          e,
1160                          BASE_TYPE_FLOAT,
1161                          &match));
1162     ECHECK(TryTypedValue(kTokenStringConstant,
1163                          e.type.base_type == BASE_TYPE_STRING,
1164                          e,
1165                          BASE_TYPE_STRING,
1166                          &match));
1167     if (!match)
1168       return Error("cannot parse value starting with: " +
1169                    TokenToStringId(token_));
1170   }
1171   return NoError();
1172 }
1173 
LookupCreateStruct(const std::string & name,bool create_if_new,bool definition)1174 StructDef *Parser::LookupCreateStruct(const std::string &name,
1175                                       bool create_if_new, bool definition) {
1176   std::string qualified_name = namespaces_.back()->GetFullyQualifiedName(name);
1177   // See if it exists pre-declared by an unqualified use.
1178   auto struct_def = structs_.Lookup(name);
1179   if (struct_def && struct_def->predecl) {
1180     if (definition) {
1181       // Make sure it has the current namespace, and is registered under its
1182       // qualified name.
1183       struct_def->defined_namespace = namespaces_.back();
1184       structs_.Move(name, qualified_name);
1185     }
1186     return struct_def;
1187   }
1188   // See if it exists pre-declared by an qualified use.
1189   struct_def = structs_.Lookup(qualified_name);
1190   if (struct_def && struct_def->predecl) {
1191     if (definition) {
1192       // Make sure it has the current namespace.
1193       struct_def->defined_namespace = namespaces_.back();
1194     }
1195     return struct_def;
1196   }
1197   if (!definition) {
1198     // Search thru parent namespaces.
1199     for (size_t components = namespaces_.back()->components.size();
1200          components && !struct_def; components--) {
1201       struct_def = structs_.Lookup(
1202           namespaces_.back()->GetFullyQualifiedName(name, components - 1));
1203     }
1204   }
1205   if (!struct_def && create_if_new) {
1206     struct_def = new StructDef();
1207     if (definition) {
1208       structs_.Add(qualified_name, struct_def);
1209       struct_def->name = name;
1210       struct_def->defined_namespace = namespaces_.back();
1211     } else {
1212       // Not a definition.
1213       // Rather than failing, we create a "pre declared" StructDef, due to
1214       // circular references, and check for errors at the end of parsing.
1215       // It is defined in the root namespace, since we don't know what the
1216       // final namespace will be.
1217       // TODO: maybe safer to use special namespace?
1218       structs_.Add(name, struct_def);
1219       struct_def->name = name;
1220       struct_def->defined_namespace = new Namespace();
1221       namespaces_.insert(namespaces_.begin(), struct_def->defined_namespace);
1222     }
1223   }
1224   return struct_def;
1225 }
1226 
ParseEnum(bool is_union,EnumDef ** dest)1227 CheckedError Parser::ParseEnum(bool is_union, EnumDef **dest) {
1228   std::vector<std::string> enum_comment = doc_comment_;
1229   NEXT();
1230   std::string enum_name = attribute_;
1231   EXPECT(kTokenIdentifier);
1232   auto &enum_def = *new EnumDef();
1233   enum_def.name = enum_name;
1234   enum_def.file = file_being_parsed_;
1235   enum_def.doc_comment = enum_comment;
1236   enum_def.is_union = is_union;
1237   enum_def.defined_namespace = namespaces_.back();
1238   if (enums_.Add(namespaces_.back()->GetFullyQualifiedName(enum_name),
1239                  &enum_def))
1240     return Error("enum already exists: " + enum_name);
1241   if (is_union) {
1242     enum_def.underlying_type.base_type = BASE_TYPE_UTYPE;
1243     enum_def.underlying_type.enum_def = &enum_def;
1244   } else {
1245     if (opts.proto_mode) {
1246       enum_def.underlying_type.base_type = BASE_TYPE_INT;
1247     } else {
1248       // Give specialized error message, since this type spec used to
1249       // be optional in the first FlatBuffers release.
1250       if (!Is(':')) {
1251         return Error("must specify the underlying integer type for this"
1252               " enum (e.g. \': short\', which was the default).");
1253       } else {
1254         NEXT();
1255       }
1256       // Specify the integer type underlying this enum.
1257       ECHECK(ParseType(enum_def.underlying_type));
1258       if (!IsInteger(enum_def.underlying_type.base_type))
1259         return Error("underlying enum type must be integral");
1260     }
1261     // Make this type refer back to the enum it was derived from.
1262     enum_def.underlying_type.enum_def = &enum_def;
1263   }
1264   ECHECK(ParseMetaData(&enum_def.attributes));
1265   EXPECT('{');
1266   if (is_union) enum_def.vals.Add("NONE", new EnumVal("NONE", 0));
1267   for (;;) {
1268     if (opts.proto_mode && attribute_ == "option") {
1269       ECHECK(ParseProtoOption());
1270     } else {
1271       auto value_name = attribute_;
1272       auto full_name = value_name;
1273       std::vector<std::string> value_comment = doc_comment_;
1274       EXPECT(kTokenIdentifier);
1275       if (is_union) {
1276         ECHECK(ParseNamespacing(&full_name, &value_name));
1277         if (opts.union_value_namespacing) {
1278           // Since we can't namespace the actual enum identifiers, turn
1279           // namespace parts into part of the identifier.
1280           value_name = full_name;
1281           std::replace(value_name.begin(), value_name.end(), '.', '_');
1282         }
1283       }
1284       auto prevsize = enum_def.vals.vec.size();
1285       auto value = enum_def.vals.vec.size()
1286         ? enum_def.vals.vec.back()->value + 1
1287         : 0;
1288       auto &ev = *new EnumVal(value_name, value);
1289       if (enum_def.vals.Add(value_name, &ev))
1290         return Error("enum value already exists: " + value_name);
1291       ev.doc_comment = value_comment;
1292       if (is_union) {
1293         ev.struct_def = LookupCreateStruct(full_name);
1294       }
1295       if (Is('=')) {
1296         NEXT();
1297         ev.value = StringToInt(attribute_.c_str());
1298         EXPECT(kTokenIntegerConstant);
1299         if (!opts.proto_mode && prevsize &&
1300             enum_def.vals.vec[prevsize - 1]->value >= ev.value)
1301           return Error("enum values must be specified in ascending order");
1302       }
1303       if (opts.proto_mode && Is('[')) {
1304         NEXT();
1305         // ignore attributes on enums.
1306         while (token_ != ']') NEXT();
1307         NEXT();
1308       }
1309     }
1310     if (!Is(opts.proto_mode ? ';' : ',')) break;
1311     NEXT();
1312     if (Is('}')) break;
1313   }
1314   EXPECT('}');
1315   if (enum_def.attributes.Lookup("bit_flags")) {
1316     for (auto it = enum_def.vals.vec.begin(); it != enum_def.vals.vec.end();
1317          ++it) {
1318       if (static_cast<size_t>((*it)->value) >=
1319            SizeOf(enum_def.underlying_type.base_type) * 8)
1320         return Error("bit flag out of range of underlying integral type");
1321       (*it)->value = 1LL << (*it)->value;
1322     }
1323   }
1324   if (dest) *dest = &enum_def;
1325   types_.Add(namespaces_.back()->GetFullyQualifiedName(enum_def.name),
1326              new Type(BASE_TYPE_UNION, nullptr, &enum_def));
1327   return NoError();
1328 }
1329 
StartStruct(const std::string & name,StructDef ** dest)1330 CheckedError Parser::StartStruct(const std::string &name, StructDef **dest) {
1331   auto &struct_def = *LookupCreateStruct(name, true, true);
1332   if (!struct_def.predecl) return Error("datatype already exists: " + name);
1333   struct_def.predecl = false;
1334   struct_def.name = name;
1335   struct_def.file = file_being_parsed_;
1336   // Move this struct to the back of the vector just in case it was predeclared,
1337   // to preserve declaration order.
1338   *remove(structs_.vec.begin(), structs_.vec.end(), &struct_def) = &struct_def;
1339   *dest = &struct_def;
1340   return NoError();
1341 }
1342 
CheckClash(std::vector<FieldDef * > & fields,StructDef * struct_def,const char * suffix,BaseType basetype)1343 CheckedError Parser::CheckClash(std::vector<FieldDef*> &fields,
1344                                 StructDef *struct_def,
1345                                 const char *suffix,
1346                                 BaseType basetype) {
1347   auto len = strlen(suffix);
1348   for (auto it = fields.begin(); it != fields.end(); ++it) {
1349     auto &fname = (*it)->name;
1350     if (fname.length() > len &&
1351         fname.compare(fname.length() - len, len, suffix) == 0 &&
1352         (*it)->value.type.base_type != BASE_TYPE_UTYPE) {
1353       auto field = struct_def->fields.Lookup(
1354                                              fname.substr(0, fname.length() - len));
1355       if (field && field->value.type.base_type == basetype)
1356         return Error("Field " + fname +
1357                      " would clash with generated functions for field " +
1358                      field->name);
1359     }
1360   }
1361   return NoError();
1362 }
1363 
compareFieldDefs(const FieldDef * a,const FieldDef * b)1364 static bool compareFieldDefs(const FieldDef *a, const FieldDef *b) {
1365   auto a_id = atoi(a->attributes.Lookup("id")->constant.c_str());
1366   auto b_id = atoi(b->attributes.Lookup("id")->constant.c_str());
1367   return a_id < b_id;
1368 }
1369 
ParseDecl()1370 CheckedError Parser::ParseDecl() {
1371   std::vector<std::string> dc = doc_comment_;
1372   bool fixed = Is(kTokenStruct);
1373   if (fixed) NEXT() else EXPECT(kTokenTable);
1374   std::string name = attribute_;
1375   EXPECT(kTokenIdentifier);
1376   StructDef *struct_def;
1377   ECHECK(StartStruct(name, &struct_def));
1378   struct_def->doc_comment = dc;
1379   struct_def->fixed = fixed;
1380   ECHECK(ParseMetaData(&struct_def->attributes));
1381   struct_def->sortbysize =
1382     struct_def->attributes.Lookup("original_order") == nullptr && !fixed;
1383   EXPECT('{');
1384   while (token_ != '}') ECHECK(ParseField(*struct_def));
1385   auto force_align = struct_def->attributes.Lookup("force_align");
1386   if (fixed && force_align) {
1387     auto align = static_cast<size_t>(atoi(force_align->constant.c_str()));
1388     if (force_align->type.base_type != BASE_TYPE_INT ||
1389         align < struct_def->minalign ||
1390         align > FLATBUFFERS_MAX_ALIGNMENT ||
1391         align & (align - 1))
1392       return Error("force_align must be a power of two integer ranging from the"
1393                    "struct\'s natural alignment to " +
1394                    NumToString(FLATBUFFERS_MAX_ALIGNMENT));
1395     struct_def->minalign = align;
1396   }
1397   struct_def->PadLastField(struct_def->minalign);
1398   // Check if this is a table that has manual id assignments
1399   auto &fields = struct_def->fields.vec;
1400   if (!struct_def->fixed && fields.size()) {
1401     size_t num_id_fields = 0;
1402     for (auto it = fields.begin(); it != fields.end(); ++it) {
1403       if ((*it)->attributes.Lookup("id")) num_id_fields++;
1404     }
1405     // If any fields have ids..
1406     if (num_id_fields) {
1407       // Then all fields must have them.
1408       if (num_id_fields != fields.size())
1409         return Error(
1410               "either all fields or no fields must have an 'id' attribute");
1411       // Simply sort by id, then the fields are the same as if no ids had
1412       // been specified.
1413       std::sort(fields.begin(), fields.end(), compareFieldDefs);
1414       // Verify we have a contiguous set, and reassign vtable offsets.
1415       for (int i = 0; i < static_cast<int>(fields.size()); i++) {
1416         if (i != atoi(fields[i]->attributes.Lookup("id")->constant.c_str()))
1417           return Error("field id\'s must be consecutive from 0, id " +
1418                 NumToString(i) + " missing or set twice");
1419         fields[i]->value.offset = FieldIndexToOffset(static_cast<voffset_t>(i));
1420       }
1421     }
1422   }
1423 
1424   ECHECK(CheckClash(fields, struct_def, UnionTypeFieldSuffix(),
1425                     BASE_TYPE_UNION));
1426   ECHECK(CheckClash(fields, struct_def, "Type", BASE_TYPE_UNION));
1427   ECHECK(CheckClash(fields, struct_def, "_length", BASE_TYPE_VECTOR));
1428   ECHECK(CheckClash(fields, struct_def, "Length", BASE_TYPE_VECTOR));
1429   ECHECK(CheckClash(fields, struct_def, "_byte_vector", BASE_TYPE_STRING));
1430   ECHECK(CheckClash(fields, struct_def, "ByteVector", BASE_TYPE_STRING));
1431   EXPECT('}');
1432   types_.Add(namespaces_.back()->GetFullyQualifiedName(struct_def->name),
1433              new Type(BASE_TYPE_STRUCT, struct_def, nullptr));
1434   return NoError();
1435 }
1436 
ParseService()1437 CheckedError Parser::ParseService() {
1438   std::vector<std::string> service_comment = doc_comment_;
1439   NEXT();
1440   auto service_name = attribute_;
1441   EXPECT(kTokenIdentifier);
1442   auto &service_def = *new ServiceDef();
1443   service_def.name = service_name;
1444   service_def.file = file_being_parsed_;
1445   service_def.doc_comment = service_comment;
1446   service_def.defined_namespace = namespaces_.back();
1447   if (services_.Add(namespaces_.back()->GetFullyQualifiedName(service_name),
1448                     &service_def))
1449     return Error("service already exists: " + service_name);
1450   ECHECK(ParseMetaData(&service_def.attributes));
1451   EXPECT('{');
1452   do {
1453     auto rpc_name = attribute_;
1454     EXPECT(kTokenIdentifier);
1455     EXPECT('(');
1456     Type reqtype, resptype;
1457     ECHECK(ParseTypeIdent(reqtype));
1458     EXPECT(')');
1459     EXPECT(':');
1460     ECHECK(ParseTypeIdent(resptype));
1461     if (reqtype.base_type != BASE_TYPE_STRUCT || reqtype.struct_def->fixed ||
1462         resptype.base_type != BASE_TYPE_STRUCT || resptype.struct_def->fixed)
1463         return Error("rpc request and response types must be tables");
1464     auto &rpc = *new RPCCall();
1465     rpc.name = rpc_name;
1466     rpc.request = reqtype.struct_def;
1467     rpc.response = resptype.struct_def;
1468     if (service_def.calls.Add(rpc_name, &rpc))
1469       return Error("rpc already exists: " + rpc_name);
1470     ECHECK(ParseMetaData(&rpc.attributes));
1471     EXPECT(';');
1472   } while (token_ != '}');
1473   NEXT();
1474   return NoError();
1475 }
1476 
SetRootType(const char * name)1477 bool Parser::SetRootType(const char *name) {
1478   root_struct_def_ = structs_.Lookup(name);
1479   if (!root_struct_def_)
1480     root_struct_def_ = structs_.Lookup(
1481                          namespaces_.back()->GetFullyQualifiedName(name));
1482   return root_struct_def_ != nullptr;
1483 }
1484 
MarkGenerated()1485 void Parser::MarkGenerated() {
1486   // This function marks all existing definitions as having already
1487   // been generated, which signals no code for included files should be
1488   // generated.
1489   for (auto it = enums_.vec.begin();
1490            it != enums_.vec.end(); ++it) {
1491     (*it)->generated = true;
1492   }
1493   for (auto it = structs_.vec.begin();
1494            it != structs_.vec.end(); ++it) {
1495     (*it)->generated = true;
1496   }
1497   for (auto it = services_.vec.begin();
1498            it != services_.vec.end(); ++it) {
1499     (*it)->generated = true;
1500   }
1501 }
1502 
ParseNamespace()1503 CheckedError Parser::ParseNamespace() {
1504   NEXT();
1505   auto ns = new Namespace();
1506   namespaces_.push_back(ns);
1507   if (token_ != ';') {
1508     for (;;) {
1509       ns->components.push_back(attribute_);
1510       EXPECT(kTokenIdentifier);
1511       if (Is('.')) NEXT() else break;
1512     }
1513   }
1514   EXPECT(';');
1515   return NoError();
1516 }
1517 
compareEnumVals(const EnumVal * a,const EnumVal * b)1518 static bool compareEnumVals(const EnumVal *a, const EnumVal* b) {
1519   return a->value < b->value;
1520 }
1521 
1522 // Best effort parsing of .proto declarations, with the aim to turn them
1523 // in the closest corresponding FlatBuffer equivalent.
1524 // We parse everything as identifiers instead of keywords, since we don't
1525 // want protobuf keywords to become invalid identifiers in FlatBuffers.
ParseProtoDecl()1526 CheckedError Parser::ParseProtoDecl() {
1527   bool isextend = attribute_ == "extend";
1528   if (attribute_ == "package") {
1529     // These are identical in syntax to FlatBuffer's namespace decl.
1530     ECHECK(ParseNamespace());
1531   } else if (attribute_ == "message" || isextend) {
1532     std::vector<std::string> struct_comment = doc_comment_;
1533     NEXT();
1534     StructDef *struct_def = nullptr;
1535     if (isextend) {
1536       if (Is('.')) NEXT();  // qualified names may start with a . ?
1537       auto id = attribute_;
1538       EXPECT(kTokenIdentifier);
1539       ECHECK(ParseNamespacing(&id, nullptr));
1540       struct_def = LookupCreateStruct(id, false);
1541       if (!struct_def)
1542         return Error("cannot extend unknown message type: " + id);
1543     } else {
1544       std::string name = attribute_;
1545       EXPECT(kTokenIdentifier);
1546       ECHECK(StartStruct(name, &struct_def));
1547       // Since message definitions can be nested, we create a new namespace.
1548       auto ns = new Namespace();
1549       // Copy of current namespace.
1550       *ns = *namespaces_.back();
1551       // But with current message name.
1552       ns->components.push_back(name);
1553       namespaces_.push_back(ns);
1554     }
1555     struct_def->doc_comment = struct_comment;
1556     ECHECK(ParseProtoFields(struct_def, isextend, false));
1557     if (!isextend) {
1558       // We have to remove the nested namespace, but we can't just throw it
1559       // away, so put it at the beginning of the vector.
1560       auto ns = namespaces_.back();
1561       namespaces_.pop_back();
1562       namespaces_.insert(namespaces_.begin(), ns);
1563     }
1564     if (Is(';')) NEXT();
1565   } else if (attribute_ == "enum") {
1566     // These are almost the same, just with different terminator:
1567     EnumDef *enum_def;
1568     ECHECK(ParseEnum(false, &enum_def));
1569     if (Is(';')) NEXT();
1570     // Protobuf allows them to be specified in any order, so sort afterwards.
1571     auto &v = enum_def->vals.vec;
1572     std::sort(v.begin(), v.end(), compareEnumVals);
1573 
1574     // Temp: remove any duplicates, as .fbs files can't handle them.
1575     for (auto it = v.begin(); it != v.end(); ) {
1576       if (it != v.begin() && it[0]->value == it[-1]->value) it = v.erase(it);
1577       else ++it;
1578     }
1579   } else if (attribute_ == "syntax") {  // Skip these.
1580     NEXT();
1581     EXPECT('=');
1582     EXPECT(kTokenStringConstant);
1583     EXPECT(';');
1584   } else if (attribute_ == "option") {  // Skip these.
1585     ECHECK(ParseProtoOption());
1586     EXPECT(';');
1587   } else if (attribute_ == "service") {  // Skip these.
1588     NEXT();
1589     EXPECT(kTokenIdentifier);
1590     ECHECK(ParseProtoCurliesOrIdent());
1591   } else {
1592     return Error("don\'t know how to parse .proto declaration starting with " +
1593           TokenToStringId(token_));
1594   }
1595   return NoError();
1596 }
1597 
ParseProtoFields(StructDef * struct_def,bool isextend,bool inside_oneof)1598 CheckedError Parser::ParseProtoFields(StructDef *struct_def, bool isextend,
1599                                       bool inside_oneof) {
1600   EXPECT('{');
1601   while (token_ != '}') {
1602     if (attribute_ == "message" || attribute_ == "extend" ||
1603         attribute_ == "enum") {
1604       // Nested declarations.
1605       ECHECK(ParseProtoDecl());
1606     } else if (attribute_ == "extensions") {  // Skip these.
1607       NEXT();
1608       EXPECT(kTokenIntegerConstant);
1609       if (Is(kTokenIdentifier)) {
1610         NEXT();  // to
1611         NEXT();  // num
1612       }
1613       EXPECT(';');
1614     } else if (attribute_ == "option") {  // Skip these.
1615       ECHECK(ParseProtoOption());
1616       EXPECT(';');
1617     } else if (attribute_ == "reserved") {  // Skip these.
1618       NEXT();
1619       EXPECT(kTokenIntegerConstant);
1620       while (Is(',')) { NEXT(); EXPECT(kTokenIntegerConstant); }
1621       EXPECT(';');
1622     } else {
1623       std::vector<std::string> field_comment = doc_comment_;
1624       // Parse the qualifier.
1625       bool required = false;
1626       bool repeated = false;
1627       bool oneof = false;
1628       if (!inside_oneof) {
1629         if (attribute_ == "optional") {
1630           // This is the default.
1631           EXPECT(kTokenIdentifier);
1632         } else if (attribute_ == "required") {
1633           required = true;
1634           EXPECT(kTokenIdentifier);
1635         } else if (attribute_ == "repeated") {
1636           repeated = true;
1637           EXPECT(kTokenIdentifier);
1638         } else if (attribute_ == "oneof") {
1639           oneof = true;
1640           EXPECT(kTokenIdentifier);
1641         } else {
1642           // can't error, proto3 allows decls without any of the above.
1643         }
1644       }
1645       StructDef *anonymous_struct = nullptr;
1646       Type type;
1647       if (attribute_ == "group" || oneof) {
1648         if (!oneof) EXPECT(kTokenIdentifier);
1649         auto name = "Anonymous" + NumToString(anonymous_counter++);
1650         ECHECK(StartStruct(name, &anonymous_struct));
1651         type = Type(BASE_TYPE_STRUCT, anonymous_struct);
1652       } else {
1653         ECHECK(ParseTypeFromProtoType(&type));
1654       }
1655       // Repeated elements get mapped to a vector.
1656       if (repeated) {
1657         type.element = type.base_type;
1658         type.base_type = BASE_TYPE_VECTOR;
1659       }
1660       std::string name = attribute_;
1661       // Protos may use our keywords "attribute" & "namespace" as an identifier.
1662       if (Is(kTokenAttribute) || Is(kTokenNameSpace)) {
1663         NEXT();
1664         // TODO: simpler to just not make these keywords?
1665         name += "_";  // Have to make it not a keyword.
1666       } else {
1667         EXPECT(kTokenIdentifier);
1668       }
1669       if (!oneof) {
1670         // Parse the field id. Since we're just translating schemas, not
1671         // any kind of binary compatibility, we can safely ignore these, and
1672         // assign our own.
1673         EXPECT('=');
1674         EXPECT(kTokenIntegerConstant);
1675       }
1676       FieldDef *field = nullptr;
1677       if (isextend) {
1678         // We allow a field to be re-defined when extending.
1679         // TODO: are there situations where that is problematic?
1680         field = struct_def->fields.Lookup(name);
1681       }
1682       if (!field) ECHECK(AddField(*struct_def, name, type, &field));
1683       field->doc_comment = field_comment;
1684       if (!IsScalar(type.base_type)) field->required = required;
1685       // See if there's a default specified.
1686       if (Is('[')) {
1687         NEXT();
1688         for (;;) {
1689           auto key = attribute_;
1690           ECHECK(ParseProtoKey());
1691           EXPECT('=');
1692           auto val = attribute_;
1693           ECHECK(ParseProtoCurliesOrIdent());
1694           if (key == "default") {
1695             // Temp: skip non-numeric defaults (enums).
1696             auto numeric = strpbrk(val.c_str(), "0123456789-+.");
1697             if (IsScalar(type.base_type) && numeric == val.c_str())
1698               field->value.constant = val;
1699           } else if (key == "deprecated") {
1700             field->deprecated = val == "true";
1701           }
1702           if (!Is(',')) break;
1703           NEXT();
1704         }
1705         EXPECT(']');
1706       }
1707       if (anonymous_struct) {
1708         ECHECK(ParseProtoFields(anonymous_struct, false, oneof));
1709         if (Is(';')) NEXT();
1710       } else {
1711         EXPECT(';');
1712       }
1713     }
1714   }
1715   NEXT();
1716   return NoError();
1717 }
1718 
ParseProtoKey()1719 CheckedError Parser::ParseProtoKey() {
1720   if (token_ == '(') {
1721     NEXT();
1722     // Skip "(a.b)" style custom attributes.
1723     while (token_ == '.' || token_ == kTokenIdentifier) NEXT();
1724     EXPECT(')');
1725     while (Is('.')) { NEXT(); EXPECT(kTokenIdentifier); }
1726   } else {
1727     EXPECT(kTokenIdentifier);
1728   }
1729   return NoError();
1730 }
1731 
ParseProtoCurliesOrIdent()1732 CheckedError Parser::ParseProtoCurliesOrIdent() {
1733   if (Is('{')) {
1734     NEXT();
1735     for (int nesting = 1; nesting; ) {
1736       if (token_ == '{') nesting++;
1737       else if (token_ == '}') nesting--;
1738       NEXT();
1739     }
1740   } else {
1741     NEXT();  // Any single token.
1742   }
1743   return NoError();
1744 }
1745 
ParseProtoOption()1746 CheckedError Parser::ParseProtoOption() {
1747   NEXT();
1748   ECHECK(ParseProtoKey());
1749   EXPECT('=');
1750   ECHECK(ParseProtoCurliesOrIdent());
1751   return NoError();
1752 }
1753 
1754 // Parse a protobuf type, and map it to the corresponding FlatBuffer one.
ParseTypeFromProtoType(Type * type)1755 CheckedError Parser::ParseTypeFromProtoType(Type *type) {
1756   struct type_lookup { const char *proto_type; BaseType fb_type; };
1757   static type_lookup lookup[] = {
1758     { "float", BASE_TYPE_FLOAT },  { "double", BASE_TYPE_DOUBLE },
1759     { "int32", BASE_TYPE_INT },    { "int64", BASE_TYPE_LONG },
1760     { "uint32", BASE_TYPE_UINT },  { "uint64", BASE_TYPE_ULONG },
1761     { "sint32", BASE_TYPE_INT },   { "sint64", BASE_TYPE_LONG },
1762     { "fixed32", BASE_TYPE_UINT }, { "fixed64", BASE_TYPE_ULONG },
1763     { "sfixed32", BASE_TYPE_INT }, { "sfixed64", BASE_TYPE_LONG },
1764     { "bool", BASE_TYPE_BOOL },
1765     { "string", BASE_TYPE_STRING },
1766     { "bytes", BASE_TYPE_STRING },
1767     { nullptr, BASE_TYPE_NONE }
1768   };
1769   for (auto tl = lookup; tl->proto_type; tl++) {
1770     if (attribute_ == tl->proto_type) {
1771       type->base_type = tl->fb_type;
1772       NEXT();
1773       return NoError();
1774     }
1775   }
1776   if (Is('.')) NEXT();  // qualified names may start with a . ?
1777   ECHECK(ParseTypeIdent(*type));
1778   return NoError();
1779 }
1780 
SkipAnyJsonValue()1781 CheckedError Parser::SkipAnyJsonValue() {
1782   switch (token_) {
1783     case '{':
1784       ECHECK(SkipJsonObject());
1785       break;
1786     case kTokenStringConstant:
1787       ECHECK(SkipJsonString());
1788       break;
1789     case '[':
1790       ECHECK(SkipJsonArray());
1791       break;
1792     case kTokenIntegerConstant:
1793       EXPECT(kTokenIntegerConstant);
1794       break;
1795     case kTokenFloatConstant:
1796       EXPECT(kTokenFloatConstant);
1797       break;
1798     default:
1799       return Error(std::string("Unexpected token:") + std::string(1, static_cast<char>(token_)));
1800   }
1801   return NoError();
1802 }
1803 
SkipJsonObject()1804 CheckedError Parser::SkipJsonObject() {
1805   EXPECT('{');
1806   size_t fieldn = 0;
1807 
1808   for (;;) {
1809     if ((!opts.strict_json || !fieldn) && Is('}')) break;
1810 
1811     if (!Is(kTokenStringConstant)) {
1812       EXPECT(opts.strict_json ? kTokenStringConstant : kTokenIdentifier);
1813     }
1814     else {
1815       NEXT();
1816     }
1817 
1818     EXPECT(':');
1819     ECHECK(SkipAnyJsonValue());
1820     fieldn++;
1821 
1822     if (Is('}')) break;
1823     EXPECT(',');
1824   }
1825 
1826   NEXT();
1827   return NoError();
1828 }
1829 
SkipJsonArray()1830 CheckedError Parser::SkipJsonArray() {
1831   EXPECT('[');
1832 
1833   for (;;) {
1834     if (Is(']')) break;
1835 
1836     ECHECK(SkipAnyJsonValue());
1837 
1838     if (Is(']')) break;
1839     EXPECT(',');
1840   }
1841 
1842   NEXT();
1843   return NoError();
1844 }
1845 
SkipJsonString()1846 CheckedError Parser::SkipJsonString() {
1847   EXPECT(kTokenStringConstant);
1848   return NoError();
1849 }
1850 
Parse(const char * source,const char ** include_paths,const char * source_filename)1851 bool Parser::Parse(const char *source, const char **include_paths,
1852                    const char *source_filename) {
1853   return !DoParse(source, include_paths, source_filename).Check();
1854 }
1855 
DoParse(const char * source,const char ** include_paths,const char * source_filename)1856 CheckedError Parser::DoParse(const char *source, const char **include_paths,
1857                              const char *source_filename) {
1858   file_being_parsed_ = source_filename ? source_filename : "";
1859   if (source_filename &&
1860       included_files_.find(source_filename) == included_files_.end()) {
1861     included_files_[source_filename] = true;
1862     files_included_per_file_[source_filename] = std::set<std::string>();
1863   }
1864   if (!include_paths) {
1865     static const char *current_directory[] = { "", nullptr };
1866     include_paths = current_directory;
1867   }
1868   source_ = cursor_ = source;
1869   line_ = 1;
1870   error_.clear();
1871   field_stack_.clear();
1872   builder_.Clear();
1873   // Start with a blank namespace just in case this file doesn't have one.
1874   namespaces_.push_back(new Namespace());
1875   ECHECK(SkipByteOrderMark());
1876   NEXT();
1877   // Includes must come before type declarations:
1878   for (;;) {
1879     // Parse pre-include proto statements if any:
1880     if (opts.proto_mode &&
1881         (attribute_ == "option" || attribute_ == "syntax" ||
1882          attribute_ == "package")) {
1883         ECHECK(ParseProtoDecl());
1884     } else if (Is(kTokenNativeInclude)) {
1885       NEXT();
1886       native_included_files_.emplace_back(attribute_);
1887       EXPECT(kTokenStringConstant);
1888     } else if (Is(kTokenInclude) ||
1889                (opts.proto_mode &&
1890                 attribute_ == "import" &&
1891                 Is(kTokenIdentifier))) {
1892       NEXT();
1893       if (opts.proto_mode && attribute_ == "public") NEXT();
1894       auto name = attribute_;
1895       EXPECT(kTokenStringConstant);
1896       // Look for the file in include_paths.
1897       std::string filepath;
1898       for (auto paths = include_paths; paths && *paths; paths++) {
1899         filepath = flatbuffers::ConCatPathFileName(*paths, name);
1900         if(FileExists(filepath.c_str())) break;
1901       }
1902       if (filepath.empty())
1903         return Error("unable to locate include file: " + name);
1904       if (source_filename)
1905         files_included_per_file_[source_filename].insert(filepath);
1906       if (included_files_.find(filepath) == included_files_.end()) {
1907         // We found an include file that we have not parsed yet.
1908         // Load it and parse it.
1909         std::string contents;
1910         if (!LoadFile(filepath.c_str(), true, &contents))
1911           return Error("unable to load include file: " + name);
1912         ECHECK(DoParse(contents.c_str(), include_paths, filepath.c_str()));
1913         // We generally do not want to output code for any included files:
1914         if (!opts.generate_all) MarkGenerated();
1915         // This is the easiest way to continue this file after an include:
1916         // instead of saving and restoring all the state, we simply start the
1917         // file anew. This will cause it to encounter the same include
1918         // statement again, but this time it will skip it, because it was
1919         // entered into included_files_.
1920         // This is recursive, but only go as deep as the number of include
1921         // statements.
1922         return DoParse(source, include_paths, source_filename);
1923       }
1924       EXPECT(';');
1925     } else {
1926       break;
1927     }
1928   }
1929   // Now parse all other kinds of declarations:
1930   while (token_ != kTokenEof) {
1931     if (opts.proto_mode) {
1932       ECHECK(ParseProtoDecl());
1933     } else if (token_ == kTokenNameSpace) {
1934       ECHECK(ParseNamespace());
1935     } else if (token_ == '{') {
1936       if (!root_struct_def_)
1937         return Error("no root type set to parse json with");
1938       if (builder_.GetSize()) {
1939         return Error("cannot have more than one json object in a file");
1940       }
1941       uoffset_t toff;
1942       ECHECK(ParseTable(*root_struct_def_, nullptr, &toff));
1943       builder_.Finish(Offset<Table>(toff),
1944                 file_identifier_.length() ? file_identifier_.c_str() : nullptr);
1945     } else if (token_ == kTokenEnum) {
1946       ECHECK(ParseEnum(false, nullptr));
1947     } else if (token_ == kTokenUnion) {
1948       ECHECK(ParseEnum(true, nullptr));
1949     } else if (token_ == kTokenRootType) {
1950       NEXT();
1951       auto root_type = attribute_;
1952       EXPECT(kTokenIdentifier);
1953       ECHECK(ParseNamespacing(&root_type, nullptr));
1954       if (!SetRootType(root_type.c_str()))
1955         return Error("unknown root type: " + root_type);
1956       if (root_struct_def_->fixed)
1957         return Error("root type must be a table");
1958       EXPECT(';');
1959     } else if (token_ == kTokenFileIdentifier) {
1960       NEXT();
1961       file_identifier_ = attribute_;
1962       EXPECT(kTokenStringConstant);
1963       if (file_identifier_.length() !=
1964           FlatBufferBuilder::kFileIdentifierLength)
1965         return Error("file_identifier must be exactly " +
1966               NumToString(FlatBufferBuilder::kFileIdentifierLength) +
1967               " characters");
1968       EXPECT(';');
1969     } else if (token_ == kTokenFileExtension) {
1970       NEXT();
1971       file_extension_ = attribute_;
1972       EXPECT(kTokenStringConstant);
1973       EXPECT(';');
1974     } else if(token_ == kTokenInclude) {
1975       return Error("includes must come before declarations");
1976     } else if(token_ == kTokenAttribute) {
1977       NEXT();
1978       auto name = attribute_;
1979       EXPECT(kTokenStringConstant);
1980       EXPECT(';');
1981       known_attributes_[name] = false;
1982     } else if (token_ == kTokenService) {
1983       ECHECK(ParseService());
1984     } else {
1985       ECHECK(ParseDecl());
1986     }
1987   }
1988   for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
1989     if ((*it)->predecl) {
1990       return Error("type referenced but not defined: " + (*it)->name);
1991     }
1992   }
1993   for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
1994     auto &enum_def = **it;
1995     if (enum_def.is_union) {
1996       for (auto val_it = enum_def.vals.vec.begin();
1997            val_it != enum_def.vals.vec.end();
1998            ++val_it) {
1999         auto &val = **val_it;
2000         if (val.struct_def && val.struct_def->fixed)
2001           return Error("only tables can be union elements: " + val.name);
2002       }
2003     }
2004   }
2005   return NoError();
2006 }
2007 
GetIncludedFilesRecursive(const std::string & file_name) const2008 std::set<std::string> Parser::GetIncludedFilesRecursive(
2009     const std::string &file_name) const {
2010   std::set<std::string> included_files;
2011   std::list<std::string> to_process;
2012 
2013   if (file_name.empty()) return included_files;
2014   to_process.push_back(file_name);
2015 
2016   while (!to_process.empty()) {
2017     std::string current = to_process.front();
2018     to_process.pop_front();
2019     included_files.insert(current);
2020 
2021     auto new_files = files_included_per_file_.at(current);
2022     for (auto it = new_files.begin(); it != new_files.end(); ++it) {
2023       if (included_files.find(*it) == included_files.end())
2024         to_process.push_back(*it);
2025     }
2026   }
2027 
2028   return included_files;
2029 }
2030 
2031 // Schema serialization functionality:
2032 
compareName(const T * a,const T * b)2033 template<typename T> bool compareName(const T* a, const T* b) {
2034     return a->defined_namespace->GetFullyQualifiedName(a->name)
2035         < b->defined_namespace->GetFullyQualifiedName(b->name);
2036 }
2037 
AssignIndices(const std::vector<T * > & defvec)2038 template<typename T> void AssignIndices(const std::vector<T *> &defvec) {
2039   // Pre-sort these vectors, such that we can set the correct indices for them.
2040   auto vec = defvec;
2041   std::sort(vec.begin(), vec.end(), compareName<T>);
2042   for (int i = 0; i < static_cast<int>(vec.size()); i++) vec[i]->index = i;
2043 }
2044 
Serialize()2045 void Parser::Serialize() {
2046   builder_.Clear();
2047   AssignIndices(structs_.vec);
2048   AssignIndices(enums_.vec);
2049   std::vector<Offset<reflection::Object>> object_offsets;
2050   for (auto it = structs_.vec.begin(); it != structs_.vec.end(); ++it) {
2051     auto offset = (*it)->Serialize(&builder_, *this);
2052     object_offsets.push_back(offset);
2053     (*it)->serialized_location = offset.o;
2054   }
2055   std::vector<Offset<reflection::Enum>> enum_offsets;
2056   for (auto it = enums_.vec.begin(); it != enums_.vec.end(); ++it) {
2057     auto offset = (*it)->Serialize(&builder_, *this);
2058     enum_offsets.push_back(offset);
2059     (*it)->serialized_location = offset.o;
2060   }
2061   auto schema_offset = reflection::CreateSchema(
2062                          builder_,
2063                          builder_.CreateVectorOfSortedTables(&object_offsets),
2064                          builder_.CreateVectorOfSortedTables(&enum_offsets),
2065                          builder_.CreateString(file_identifier_),
2066                          builder_.CreateString(file_extension_),
2067                          root_struct_def_
2068                            ? root_struct_def_->serialized_location
2069                            : 0);
2070   builder_.Finish(schema_offset, reflection::SchemaIdentifier());
2071 }
2072 
Serialize(FlatBufferBuilder * builder,const Parser & parser) const2073 Offset<reflection::Object> StructDef::Serialize(FlatBufferBuilder *builder,
2074                                                 const Parser &parser) const {
2075   std::vector<Offset<reflection::Field>> field_offsets;
2076   for (auto it = fields.vec.begin(); it != fields.vec.end(); ++it) {
2077     field_offsets.push_back(
2078       (*it)->Serialize(builder,
2079                        static_cast<uint16_t>(it - fields.vec.begin()), parser));
2080   }
2081   auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
2082   return reflection::CreateObject(*builder,
2083                                   builder->CreateString(qualified_name),
2084                                   builder->CreateVectorOfSortedTables(
2085                                     &field_offsets),
2086                                   fixed,
2087                                   static_cast<int>(minalign),
2088                                   static_cast<int>(bytesize),
2089                                   SerializeAttributes(builder, parser),
2090                                   parser.opts.binary_schema_comments
2091                                     ? builder->CreateVectorOfStrings(
2092                                         doc_comment)
2093                                     : 0);
2094 }
2095 
Serialize(FlatBufferBuilder * builder,uint16_t id,const Parser & parser) const2096 Offset<reflection::Field> FieldDef::Serialize(FlatBufferBuilder *builder,
2097                                               uint16_t id,
2098                                               const Parser &parser) const {
2099   return reflection::CreateField(*builder,
2100                                  builder->CreateString(name),
2101                                  value.type.Serialize(builder),
2102                                  id,
2103                                  value.offset,
2104                                  IsInteger(value.type.base_type)
2105                                    ? StringToInt(value.constant.c_str())
2106                                    : 0,
2107                                  IsFloat(value.type.base_type)
2108                                    ? strtod(value.constant.c_str(), nullptr)
2109                                    : 0.0,
2110                                  deprecated,
2111                                  required,
2112                                  key,
2113                                  SerializeAttributes(builder, parser),
2114                                  parser.opts.binary_schema_comments
2115                                    ? builder->CreateVectorOfStrings(doc_comment)
2116                                    : 0);
2117   // TODO: value.constant is almost always "0", we could save quite a bit of
2118   // space by sharing it. Same for common values of value.type.
2119 }
2120 
Serialize(FlatBufferBuilder * builder,const Parser & parser) const2121 Offset<reflection::Enum> EnumDef::Serialize(FlatBufferBuilder *builder,
2122                                             const Parser &parser) const {
2123   std::vector<Offset<reflection::EnumVal>> enumval_offsets;
2124   for (auto it = vals.vec.begin(); it != vals.vec.end(); ++it) {
2125     enumval_offsets.push_back((*it)->Serialize(builder));
2126   }
2127   auto qualified_name = defined_namespace->GetFullyQualifiedName(name);
2128   return reflection::CreateEnum(*builder,
2129                                 builder->CreateString(qualified_name),
2130                                 builder->CreateVector(enumval_offsets),
2131                                 is_union,
2132                                 underlying_type.Serialize(builder),
2133                                 SerializeAttributes(builder, parser),
2134                                 parser.opts.binary_schema_comments
2135                                   ? builder->CreateVectorOfStrings(doc_comment)
2136                                   : 0);
2137 }
2138 
Serialize(FlatBufferBuilder * builder) const2139 Offset<reflection::EnumVal> EnumVal::Serialize(FlatBufferBuilder *builder) const
2140                                                                                {
2141   return reflection::CreateEnumVal(*builder,
2142                                    builder->CreateString(name),
2143                                    value,
2144                                    struct_def
2145                                      ? struct_def->serialized_location
2146                                      : 0);
2147 }
2148 
Serialize(FlatBufferBuilder * builder) const2149 Offset<reflection::Type> Type::Serialize(FlatBufferBuilder *builder) const {
2150   return reflection::CreateType(*builder,
2151                                 static_cast<reflection::BaseType>(base_type),
2152                                 static_cast<reflection::BaseType>(element),
2153                                 struct_def ? struct_def->index :
2154                                              (enum_def ? enum_def->index : -1));
2155 }
2156 
2157 flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<
2158   reflection::KeyValue>>>
SerializeAttributes(FlatBufferBuilder * builder,const Parser & parser) const2159     Definition::SerializeAttributes(FlatBufferBuilder *builder,
2160                                     const Parser &parser) const {
2161   std::vector<flatbuffers::Offset<reflection::KeyValue>> attrs;
2162   for (auto kv = attributes.dict.begin(); kv != attributes.dict.end(); ++kv) {
2163     auto it = parser.known_attributes_.find(kv->first);
2164     assert(it != parser.known_attributes_.end());
2165     if (!it->second) {  // Custom attribute.
2166       attrs.push_back(
2167           reflection::CreateKeyValue(*builder, builder->CreateString(kv->first),
2168                                      builder->CreateString(
2169                                          kv->second->constant)));
2170     }
2171   }
2172   if (attrs.size()) {
2173     return builder->CreateVectorOfSortedTables(&attrs);
2174   } else {
2175     return 0;
2176   }
2177 }
2178 
ConformTo(const Parser & base)2179 std::string Parser::ConformTo(const Parser &base) {
2180   for (auto sit = structs_.vec.begin(); sit != structs_.vec.end(); ++sit) {
2181     auto &struct_def = **sit;
2182     auto qualified_name =
2183         struct_def.defined_namespace->GetFullyQualifiedName(struct_def.name);
2184     auto struct_def_base = base.structs_.Lookup(qualified_name);
2185     if (!struct_def_base) continue;
2186     for (auto fit = struct_def.fields.vec.begin();
2187              fit != struct_def.fields.vec.end(); ++fit) {
2188       auto &field = **fit;
2189       auto field_base = struct_def_base->fields.Lookup(field.name);
2190       if (field_base) {
2191         if (field.value.offset != field_base->value.offset)
2192           return "offsets differ for field: " + field.name;
2193         if (field.value.constant != field_base->value.constant)
2194           return "defaults differ for field: " + field.name;
2195         if (!EqualByName(field.value.type, field_base->value.type))
2196           return "types differ for field: " + field.name;
2197       } else {
2198         // Doesn't have to exist, deleting fields is fine.
2199         // But we should check if there is a field that has the same offset
2200         // but is incompatible (in the case of field renaming).
2201         for (auto fbit = struct_def_base->fields.vec.begin();
2202                  fbit != struct_def_base->fields.vec.end(); ++fbit) {
2203           field_base = *fbit;
2204           if (field.value.offset == field_base->value.offset) {
2205             if (!EqualByName(field.value.type, field_base->value.type))
2206               return "field renamed to different type: " + field.name;
2207             break;
2208           }
2209         }
2210       }
2211     }
2212   }
2213   for (auto eit = enums_.vec.begin(); eit != enums_.vec.end(); ++eit) {
2214     auto &enum_def = **eit;
2215     auto qualified_name =
2216         enum_def.defined_namespace->GetFullyQualifiedName(enum_def.name);
2217     auto enum_def_base = base.enums_.Lookup(qualified_name);
2218     if (!enum_def_base) continue;
2219     for (auto evit = enum_def.vals.vec.begin();
2220              evit != enum_def.vals.vec.end(); ++evit) {
2221       auto &enum_val = **evit;
2222       auto enum_val_base = enum_def_base->vals.Lookup(enum_val.name);
2223       if (enum_val_base) {
2224         if (enum_val.value != enum_val_base->value)
2225           return "values differ for enum: " + enum_val.name;
2226       }
2227     }
2228   }
2229   return "";
2230 }
2231 
2232 }  // namespace flatbuffers
2233