1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // Recursive descent FTW.
36 
37 #include <float.h>
38 #include <google/protobuf/stubs/hash.h>
39 #include <limits>
40 
41 
42 #include <google/protobuf/compiler/parser.h>
43 #include <google/protobuf/descriptor.h>
44 #include <google/protobuf/descriptor.pb.h>
45 #include <google/protobuf/wire_format.h>
46 #include <google/protobuf/io/tokenizer.h>
47 #include <google/protobuf/stubs/common.h>
48 #include <google/protobuf/stubs/strutil.h>
49 #include <google/protobuf/stubs/map_util.h>
50 
51 namespace google {
52 namespace protobuf {
53 namespace compiler {
54 
55 using internal::WireFormat;
56 
57 namespace {
58 
59 typedef hash_map<string, FieldDescriptorProto::Type> TypeNameMap;
60 
MakeTypeNameTable()61 TypeNameMap MakeTypeNameTable() {
62   TypeNameMap result;
63 
64   result["double"  ] = FieldDescriptorProto::TYPE_DOUBLE;
65   result["float"   ] = FieldDescriptorProto::TYPE_FLOAT;
66   result["uint64"  ] = FieldDescriptorProto::TYPE_UINT64;
67   result["fixed64" ] = FieldDescriptorProto::TYPE_FIXED64;
68   result["fixed32" ] = FieldDescriptorProto::TYPE_FIXED32;
69   result["bool"    ] = FieldDescriptorProto::TYPE_BOOL;
70   result["string"  ] = FieldDescriptorProto::TYPE_STRING;
71   result["group"   ] = FieldDescriptorProto::TYPE_GROUP;
72 
73   result["bytes"   ] = FieldDescriptorProto::TYPE_BYTES;
74   result["uint32"  ] = FieldDescriptorProto::TYPE_UINT32;
75   result["sfixed32"] = FieldDescriptorProto::TYPE_SFIXED32;
76   result["sfixed64"] = FieldDescriptorProto::TYPE_SFIXED64;
77   result["int32"   ] = FieldDescriptorProto::TYPE_INT32;
78   result["int64"   ] = FieldDescriptorProto::TYPE_INT64;
79   result["sint32"  ] = FieldDescriptorProto::TYPE_SINT32;
80   result["sint64"  ] = FieldDescriptorProto::TYPE_SINT64;
81 
82   return result;
83 }
84 
85 const TypeNameMap kTypeNames = MakeTypeNameTable();
86 
87 }  // anonymous namespace
88 
89 // Makes code slightly more readable.  The meaning of "DO(foo)" is
90 // "Execute foo and fail if it fails.", where failure is indicated by
91 // returning false.
92 #define DO(STATEMENT) if (STATEMENT) {} else return false
93 
94 // ===================================================================
95 
Parser()96 Parser::Parser()
97   : input_(NULL),
98     error_collector_(NULL),
99     source_location_table_(NULL),
100     had_errors_(false),
101     require_syntax_identifier_(false),
102     stop_after_syntax_identifier_(false) {
103 }
104 
~Parser()105 Parser::~Parser() {
106 }
107 
108 // ===================================================================
109 
LookingAt(const char * text)110 inline bool Parser::LookingAt(const char* text) {
111   return input_->current().text == text;
112 }
113 
LookingAtType(io::Tokenizer::TokenType token_type)114 inline bool Parser::LookingAtType(io::Tokenizer::TokenType token_type) {
115   return input_->current().type == token_type;
116 }
117 
AtEnd()118 inline bool Parser::AtEnd() {
119   return LookingAtType(io::Tokenizer::TYPE_END);
120 }
121 
TryConsume(const char * text)122 bool Parser::TryConsume(const char* text) {
123   if (LookingAt(text)) {
124     input_->Next();
125     return true;
126   } else {
127     return false;
128   }
129 }
130 
Consume(const char * text,const char * error)131 bool Parser::Consume(const char* text, const char* error) {
132   if (TryConsume(text)) {
133     return true;
134   } else {
135     AddError(error);
136     return false;
137   }
138 }
139 
Consume(const char * text)140 bool Parser::Consume(const char* text) {
141   if (TryConsume(text)) {
142     return true;
143   } else {
144     AddError("Expected \"" + string(text) + "\".");
145     return false;
146   }
147 }
148 
ConsumeIdentifier(string * output,const char * error)149 bool Parser::ConsumeIdentifier(string* output, const char* error) {
150   if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
151     *output = input_->current().text;
152     input_->Next();
153     return true;
154   } else {
155     AddError(error);
156     return false;
157   }
158 }
159 
ConsumeInteger(int * output,const char * error)160 bool Parser::ConsumeInteger(int* output, const char* error) {
161   if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
162     uint64 value = 0;
163     if (!io::Tokenizer::ParseInteger(input_->current().text,
164                                      kint32max, &value)) {
165       AddError("Integer out of range.");
166       // We still return true because we did, in fact, parse an integer.
167     }
168     *output = value;
169     input_->Next();
170     return true;
171   } else {
172     AddError(error);
173     return false;
174   }
175 }
176 
ConsumeSignedInteger(int * output,const char * error)177 bool Parser::ConsumeSignedInteger(int* output, const char* error) {
178   bool is_negative = false;
179   uint64 max_value = kint32max;
180   if (TryConsume("-")) {
181     is_negative = true;
182     max_value += 1;
183   }
184   uint64 value = 0;
185   DO(ConsumeInteger64(max_value, &value, error));
186   if (is_negative) value *= -1;
187   *output = value;
188   return true;
189 }
190 
ConsumeInteger64(uint64 max_value,uint64 * output,const char * error)191 bool Parser::ConsumeInteger64(uint64 max_value, uint64* output,
192                               const char* error) {
193   if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
194     if (!io::Tokenizer::ParseInteger(input_->current().text, max_value,
195                                      output)) {
196       AddError("Integer out of range.");
197       // We still return true because we did, in fact, parse an integer.
198       *output = 0;
199     }
200     input_->Next();
201     return true;
202   } else {
203     AddError(error);
204     return false;
205   }
206 }
207 
ConsumeNumber(double * output,const char * error)208 bool Parser::ConsumeNumber(double* output, const char* error) {
209   if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
210     *output = io::Tokenizer::ParseFloat(input_->current().text);
211     input_->Next();
212     return true;
213   } else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
214     // Also accept integers.
215     uint64 value = 0;
216     if (!io::Tokenizer::ParseInteger(input_->current().text,
217                                      kuint64max, &value)) {
218       AddError("Integer out of range.");
219       // We still return true because we did, in fact, parse a number.
220     }
221     *output = value;
222     input_->Next();
223     return true;
224   } else if (LookingAt("inf")) {
225     *output = numeric_limits<double>::infinity();
226     input_->Next();
227     return true;
228   } else if (LookingAt("nan")) {
229     *output = numeric_limits<double>::quiet_NaN();
230     input_->Next();
231     return true;
232   } else {
233     AddError(error);
234     return false;
235   }
236 }
237 
ConsumeString(string * output,const char * error)238 bool Parser::ConsumeString(string* output, const char* error) {
239   if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
240     io::Tokenizer::ParseString(input_->current().text, output);
241     input_->Next();
242     // Allow C++ like concatenation of adjacent string tokens.
243     while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
244       io::Tokenizer::ParseStringAppend(input_->current().text, output);
245       input_->Next();
246     }
247     return true;
248   } else {
249     AddError(error);
250     return false;
251   }
252 }
253 
TryConsumeEndOfDeclaration(const char * text,const LocationRecorder * location)254 bool Parser::TryConsumeEndOfDeclaration(const char* text,
255                                         const LocationRecorder* location) {
256   if (LookingAt(text)) {
257     string leading, trailing;
258     input_->NextWithComments(&trailing, NULL, &leading);
259 
260     // Save the leading comments for next time, and recall the leading comments
261     // from last time.
262     leading.swap(upcoming_doc_comments_);
263 
264     if (location != NULL) {
265       location->AttachComments(&leading, &trailing);
266     }
267     return true;
268   } else {
269     return false;
270   }
271 }
272 
ConsumeEndOfDeclaration(const char * text,const LocationRecorder * location)273 bool Parser::ConsumeEndOfDeclaration(const char* text,
274                                      const LocationRecorder* location) {
275   if (TryConsumeEndOfDeclaration(text, location)) {
276     return true;
277   } else {
278     AddError("Expected \"" + string(text) + "\".");
279     return false;
280   }
281 }
282 
283 // -------------------------------------------------------------------
284 
AddError(int line,int column,const string & error)285 void Parser::AddError(int line, int column, const string& error) {
286   if (error_collector_ != NULL) {
287     error_collector_->AddError(line, column, error);
288   }
289   had_errors_ = true;
290 }
291 
AddError(const string & error)292 void Parser::AddError(const string& error) {
293   AddError(input_->current().line, input_->current().column, error);
294 }
295 
296 // -------------------------------------------------------------------
297 
LocationRecorder(Parser * parser)298 Parser::LocationRecorder::LocationRecorder(Parser* parser)
299   : parser_(parser),
300     location_(parser_->source_code_info_->add_location()) {
301   location_->add_span(parser_->input_->current().line);
302   location_->add_span(parser_->input_->current().column);
303 }
304 
LocationRecorder(const LocationRecorder & parent)305 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent) {
306   Init(parent);
307 }
308 
LocationRecorder(const LocationRecorder & parent,int path1)309 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
310                                            int path1) {
311   Init(parent);
312   AddPath(path1);
313 }
314 
LocationRecorder(const LocationRecorder & parent,int path1,int path2)315 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
316                                            int path1, int path2) {
317   Init(parent);
318   AddPath(path1);
319   AddPath(path2);
320 }
321 
Init(const LocationRecorder & parent)322 void Parser::LocationRecorder::Init(const LocationRecorder& parent) {
323   parser_ = parent.parser_;
324   location_ = parser_->source_code_info_->add_location();
325   location_->mutable_path()->CopyFrom(parent.location_->path());
326 
327   location_->add_span(parser_->input_->current().line);
328   location_->add_span(parser_->input_->current().column);
329 }
330 
~LocationRecorder()331 Parser::LocationRecorder::~LocationRecorder() {
332   if (location_->span_size() <= 2) {
333     EndAt(parser_->input_->previous());
334   }
335 }
336 
AddPath(int path_component)337 void Parser::LocationRecorder::AddPath(int path_component) {
338   location_->add_path(path_component);
339 }
340 
StartAt(const io::Tokenizer::Token & token)341 void Parser::LocationRecorder::StartAt(const io::Tokenizer::Token& token) {
342   location_->set_span(0, token.line);
343   location_->set_span(1, token.column);
344 }
345 
StartAt(const LocationRecorder & other)346 void Parser::LocationRecorder::StartAt(const LocationRecorder& other) {
347   location_->set_span(0, other.location_->span(0));
348   location_->set_span(1, other.location_->span(1));
349 }
350 
EndAt(const io::Tokenizer::Token & token)351 void Parser::LocationRecorder::EndAt(const io::Tokenizer::Token& token) {
352   if (token.line != location_->span(0)) {
353     location_->add_span(token.line);
354   }
355   location_->add_span(token.end_column);
356 }
357 
RecordLegacyLocation(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location)358 void Parser::LocationRecorder::RecordLegacyLocation(const Message* descriptor,
359     DescriptorPool::ErrorCollector::ErrorLocation location) {
360   if (parser_->source_location_table_ != NULL) {
361     parser_->source_location_table_->Add(
362         descriptor, location, location_->span(0), location_->span(1));
363   }
364 }
365 
AttachComments(string * leading,string * trailing) const366 void Parser::LocationRecorder::AttachComments(
367     string* leading, string* trailing) const {
368   GOOGLE_CHECK(!location_->has_leading_comments());
369   GOOGLE_CHECK(!location_->has_trailing_comments());
370 
371   if (!leading->empty()) {
372     location_->mutable_leading_comments()->swap(*leading);
373   }
374   if (!trailing->empty()) {
375     location_->mutable_trailing_comments()->swap(*trailing);
376   }
377 }
378 
379 // -------------------------------------------------------------------
380 
SkipStatement()381 void Parser::SkipStatement() {
382   while (true) {
383     if (AtEnd()) {
384       return;
385     } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
386       if (TryConsumeEndOfDeclaration(";", NULL)) {
387         return;
388       } else if (TryConsume("{")) {
389         SkipRestOfBlock();
390         return;
391       } else if (LookingAt("}")) {
392         return;
393       }
394     }
395     input_->Next();
396   }
397 }
398 
SkipRestOfBlock()399 void Parser::SkipRestOfBlock() {
400   while (true) {
401     if (AtEnd()) {
402       return;
403     } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
404       if (TryConsumeEndOfDeclaration("}", NULL)) {
405         return;
406       } else if (TryConsume("{")) {
407         SkipRestOfBlock();
408       }
409     }
410     input_->Next();
411   }
412 }
413 
414 // ===================================================================
415 
Parse(io::Tokenizer * input,FileDescriptorProto * file)416 bool Parser::Parse(io::Tokenizer* input, FileDescriptorProto* file) {
417   input_ = input;
418   had_errors_ = false;
419   syntax_identifier_.clear();
420 
421   // Note that |file| could be NULL at this point if
422   // stop_after_syntax_identifier_ is true.  So, we conservatively allocate
423   // SourceCodeInfo on the stack, then swap it into the FileDescriptorProto
424   // later on.
425   SourceCodeInfo source_code_info;
426   source_code_info_ = &source_code_info;
427 
428   if (LookingAtType(io::Tokenizer::TYPE_START)) {
429     // Advance to first token.
430     input_->NextWithComments(NULL, NULL, &upcoming_doc_comments_);
431   }
432 
433   {
434     LocationRecorder root_location(this);
435 
436     if (require_syntax_identifier_ || LookingAt("syntax")) {
437       if (!ParseSyntaxIdentifier()) {
438         // Don't attempt to parse the file if we didn't recognize the syntax
439         // identifier.
440         return false;
441       }
442     } else if (!stop_after_syntax_identifier_) {
443       syntax_identifier_ = "proto2";
444     }
445 
446     if (stop_after_syntax_identifier_) return !had_errors_;
447 
448     // Repeatedly parse statements until we reach the end of the file.
449     while (!AtEnd()) {
450       if (!ParseTopLevelStatement(file, root_location)) {
451         // This statement failed to parse.  Skip it, but keep looping to parse
452         // other statements.
453         SkipStatement();
454 
455         if (LookingAt("}")) {
456           AddError("Unmatched \"}\".");
457           input_->NextWithComments(NULL, NULL, &upcoming_doc_comments_);
458         }
459       }
460     }
461   }
462 
463   input_ = NULL;
464   source_code_info_ = NULL;
465   source_code_info.Swap(file->mutable_source_code_info());
466   return !had_errors_;
467 }
468 
ParseSyntaxIdentifier()469 bool Parser::ParseSyntaxIdentifier() {
470   DO(Consume("syntax", "File must begin with 'syntax = \"proto2\";'."));
471   DO(Consume("="));
472   io::Tokenizer::Token syntax_token = input_->current();
473   string syntax;
474   DO(ConsumeString(&syntax, "Expected syntax identifier."));
475   DO(ConsumeEndOfDeclaration(";", NULL));
476 
477   syntax_identifier_ = syntax;
478 
479   if (syntax != "proto2" && !stop_after_syntax_identifier_) {
480     AddError(syntax_token.line, syntax_token.column,
481       "Unrecognized syntax identifier \"" + syntax + "\".  This parser "
482       "only recognizes \"proto2\".");
483     return false;
484   }
485 
486   return true;
487 }
488 
ParseTopLevelStatement(FileDescriptorProto * file,const LocationRecorder & root_location)489 bool Parser::ParseTopLevelStatement(FileDescriptorProto* file,
490                                     const LocationRecorder& root_location) {
491   if (TryConsumeEndOfDeclaration(";", NULL)) {
492     // empty statement; ignore
493     return true;
494   } else if (LookingAt("message")) {
495     LocationRecorder location(root_location,
496       FileDescriptorProto::kMessageTypeFieldNumber, file->message_type_size());
497     return ParseMessageDefinition(file->add_message_type(), location, file);
498   } else if (LookingAt("enum")) {
499     LocationRecorder location(root_location,
500       FileDescriptorProto::kEnumTypeFieldNumber, file->enum_type_size());
501     return ParseEnumDefinition(file->add_enum_type(), location, file);
502   } else if (LookingAt("service")) {
503     LocationRecorder location(root_location,
504       FileDescriptorProto::kServiceFieldNumber, file->service_size());
505     return ParseServiceDefinition(file->add_service(), location, file);
506   } else if (LookingAt("extend")) {
507     LocationRecorder location(root_location,
508         FileDescriptorProto::kExtensionFieldNumber);
509     return ParseExtend(file->mutable_extension(),
510                        file->mutable_message_type(),
511                        root_location,
512                        FileDescriptorProto::kMessageTypeFieldNumber,
513                        location, file);
514   } else if (LookingAt("import")) {
515     return ParseImport(file->mutable_dependency(),
516                        file->mutable_public_dependency(),
517                        file->mutable_weak_dependency(),
518                        root_location, file);
519   } else if (LookingAt("package")) {
520     return ParsePackage(file, root_location, file);
521   } else if (LookingAt("option")) {
522     LocationRecorder location(root_location,
523         FileDescriptorProto::kOptionsFieldNumber);
524     return ParseOption(file->mutable_options(), location, file,
525                        OPTION_STATEMENT);
526   } else {
527     AddError("Expected top-level statement (e.g. \"message\").");
528     return false;
529   }
530 }
531 
532 // -------------------------------------------------------------------
533 // Messages
534 
ParseMessageDefinition(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)535 bool Parser::ParseMessageDefinition(
536     DescriptorProto* message,
537     const LocationRecorder& message_location,
538     const FileDescriptorProto* containing_file) {
539   DO(Consume("message"));
540   {
541     LocationRecorder location(message_location,
542                               DescriptorProto::kNameFieldNumber);
543     location.RecordLegacyLocation(
544         message, DescriptorPool::ErrorCollector::NAME);
545     DO(ConsumeIdentifier(message->mutable_name(), "Expected message name."));
546   }
547   DO(ParseMessageBlock(message, message_location, containing_file));
548   return true;
549 }
550 
551 namespace {
552 
553 const int kMaxExtensionRangeSentinel = -1;
554 
IsMessageSetWireFormatMessage(const DescriptorProto & message)555 bool IsMessageSetWireFormatMessage(const DescriptorProto& message) {
556   const MessageOptions& options = message.options();
557   for (int i = 0; i < options.uninterpreted_option_size(); ++i) {
558     const UninterpretedOption& uninterpreted = options.uninterpreted_option(i);
559     if (uninterpreted.name_size() == 1 &&
560         uninterpreted.name(0).name_part() == "message_set_wire_format" &&
561         uninterpreted.identifier_value() == "true") {
562       return true;
563     }
564   }
565   return false;
566 }
567 
568 // Modifies any extension ranges that specified 'max' as the end of the
569 // extension range, and sets them to the type-specific maximum. The actual max
570 // tag number can only be determined after all options have been parsed.
AdjustExtensionRangesWithMaxEndNumber(DescriptorProto * message)571 void AdjustExtensionRangesWithMaxEndNumber(DescriptorProto* message) {
572   const bool is_message_set = IsMessageSetWireFormatMessage(*message);
573   const int max_extension_number = is_message_set ?
574       kint32max :
575       FieldDescriptor::kMaxNumber + 1;
576   for (int i = 0; i < message->extension_range_size(); ++i) {
577     if (message->extension_range(i).end() == kMaxExtensionRangeSentinel) {
578       message->mutable_extension_range(i)->set_end(max_extension_number);
579     }
580   }
581 }
582 
583 }  // namespace
584 
ParseMessageBlock(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)585 bool Parser::ParseMessageBlock(DescriptorProto* message,
586                                const LocationRecorder& message_location,
587                                const FileDescriptorProto* containing_file) {
588   DO(ConsumeEndOfDeclaration("{", &message_location));
589 
590   while (!TryConsumeEndOfDeclaration("}", NULL)) {
591     if (AtEnd()) {
592       AddError("Reached end of input in message definition (missing '}').");
593       return false;
594     }
595 
596     if (!ParseMessageStatement(message, message_location, containing_file)) {
597       // This statement failed to parse.  Skip it, but keep looping to parse
598       // other statements.
599       SkipStatement();
600     }
601   }
602 
603   if (message->extension_range_size() > 0) {
604     AdjustExtensionRangesWithMaxEndNumber(message);
605   }
606   return true;
607 }
608 
ParseMessageStatement(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)609 bool Parser::ParseMessageStatement(DescriptorProto* message,
610                                    const LocationRecorder& message_location,
611                                    const FileDescriptorProto* containing_file) {
612   if (TryConsumeEndOfDeclaration(";", NULL)) {
613     // empty statement; ignore
614     return true;
615   } else if (LookingAt("message")) {
616     LocationRecorder location(message_location,
617                               DescriptorProto::kNestedTypeFieldNumber,
618                               message->nested_type_size());
619     return ParseMessageDefinition(message->add_nested_type(), location,
620                                   containing_file);
621   } else if (LookingAt("enum")) {
622     LocationRecorder location(message_location,
623                               DescriptorProto::kEnumTypeFieldNumber,
624                               message->enum_type_size());
625     return ParseEnumDefinition(message->add_enum_type(), location,
626                                containing_file);
627   } else if (LookingAt("extensions")) {
628     LocationRecorder location(message_location,
629                               DescriptorProto::kExtensionRangeFieldNumber);
630     return ParseExtensions(message, location, containing_file);
631   } else if (LookingAt("extend")) {
632     LocationRecorder location(message_location,
633                               DescriptorProto::kExtensionFieldNumber);
634     return ParseExtend(message->mutable_extension(),
635                        message->mutable_nested_type(),
636                        message_location,
637                        DescriptorProto::kNestedTypeFieldNumber,
638                        location, containing_file);
639   } else if (LookingAt("option")) {
640     LocationRecorder location(message_location,
641                               DescriptorProto::kOptionsFieldNumber);
642     return ParseOption(message->mutable_options(), location,
643                        containing_file, OPTION_STATEMENT);
644   } else if (LookingAt("oneof")) {
645     int oneof_index = message->oneof_decl_size();
646     LocationRecorder oneof_location(message_location,
647                                     DescriptorProto::kOneofDeclFieldNumber,
648                                     oneof_index);
649 
650     return ParseOneof(message->add_oneof_decl(), message,
651                       oneof_index, oneof_location, message_location,
652                       containing_file);
653   } else {
654     LocationRecorder location(message_location,
655                               DescriptorProto::kFieldFieldNumber,
656                               message->field_size());
657     return ParseMessageField(message->add_field(),
658                              message->mutable_nested_type(),
659                              message_location,
660                              DescriptorProto::kNestedTypeFieldNumber,
661                              location,
662                              containing_file);
663   }
664 }
665 
ParseMessageField(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)666 bool Parser::ParseMessageField(FieldDescriptorProto* field,
667                                RepeatedPtrField<DescriptorProto>* messages,
668                                const LocationRecorder& parent_location,
669                                int location_field_number_for_nested_type,
670                                const LocationRecorder& field_location,
671                                const FileDescriptorProto* containing_file) {
672   {
673     LocationRecorder location(field_location,
674                               FieldDescriptorProto::kLabelFieldNumber);
675     FieldDescriptorProto::Label label;
676     DO(ParseLabel(&label, containing_file));
677     field->set_label(label);
678   }
679 
680   return ParseMessageFieldNoLabel(field, messages, parent_location,
681                                   location_field_number_for_nested_type,
682                                   field_location,
683                                   containing_file);
684 }
685 
ParseMessageFieldNoLabel(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)686 bool Parser::ParseMessageFieldNoLabel(
687     FieldDescriptorProto* field,
688     RepeatedPtrField<DescriptorProto>* messages,
689     const LocationRecorder& parent_location,
690     int location_field_number_for_nested_type,
691     const LocationRecorder& field_location,
692     const FileDescriptorProto* containing_file) {
693   // Parse type.
694   {
695     LocationRecorder location(field_location);  // add path later
696     location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::TYPE);
697 
698     FieldDescriptorProto::Type type = FieldDescriptorProto::TYPE_INT32;
699     string type_name;
700     DO(ParseType(&type, &type_name));
701     if (type_name.empty()) {
702       location.AddPath(FieldDescriptorProto::kTypeFieldNumber);
703       field->set_type(type);
704     } else {
705       location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
706       field->set_type_name(type_name);
707     }
708   }
709 
710   // Parse name and '='.
711   io::Tokenizer::Token name_token = input_->current();
712   {
713     LocationRecorder location(field_location,
714                               FieldDescriptorProto::kNameFieldNumber);
715     location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::NAME);
716     DO(ConsumeIdentifier(field->mutable_name(), "Expected field name."));
717   }
718   DO(Consume("=", "Missing field number."));
719 
720   // Parse field number.
721   {
722     LocationRecorder location(field_location,
723                               FieldDescriptorProto::kNumberFieldNumber);
724     location.RecordLegacyLocation(
725         field, DescriptorPool::ErrorCollector::NUMBER);
726     int number;
727     DO(ConsumeInteger(&number, "Expected field number."));
728     field->set_number(number);
729   }
730 
731   // Parse options.
732   DO(ParseFieldOptions(field, field_location, containing_file));
733 
734   // Deal with groups.
735   if (field->has_type() && field->type() == FieldDescriptorProto::TYPE_GROUP) {
736     // Awkward:  Since a group declares both a message type and a field, we
737     //   have to create overlapping locations.
738     LocationRecorder group_location(parent_location);
739     group_location.StartAt(field_location);
740     group_location.AddPath(location_field_number_for_nested_type);
741     group_location.AddPath(messages->size());
742 
743     DescriptorProto* group = messages->Add();
744     group->set_name(field->name());
745 
746     // Record name location to match the field name's location.
747     {
748       LocationRecorder location(group_location,
749                                 DescriptorProto::kNameFieldNumber);
750       location.StartAt(name_token);
751       location.EndAt(name_token);
752       location.RecordLegacyLocation(
753           group, DescriptorPool::ErrorCollector::NAME);
754     }
755 
756     // The field's type_name also comes from the name.  Confusing!
757     {
758       LocationRecorder location(field_location,
759                                 FieldDescriptorProto::kTypeNameFieldNumber);
760       location.StartAt(name_token);
761       location.EndAt(name_token);
762     }
763 
764     // As a hack for backwards-compatibility, we force the group name to start
765     // with a capital letter and lower-case the field name.  New code should
766     // not use groups; it should use nested messages.
767     if (group->name()[0] < 'A' || 'Z' < group->name()[0]) {
768       AddError(name_token.line, name_token.column,
769         "Group names must start with a capital letter.");
770     }
771     LowerString(field->mutable_name());
772 
773     field->set_type_name(group->name());
774     if (LookingAt("{")) {
775       DO(ParseMessageBlock(group, group_location, containing_file));
776     } else {
777       AddError("Missing group body.");
778       return false;
779     }
780   } else {
781     DO(ConsumeEndOfDeclaration(";", &field_location));
782   }
783 
784   return true;
785 }
786 
ParseFieldOptions(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)787 bool Parser::ParseFieldOptions(FieldDescriptorProto* field,
788                                const LocationRecorder& field_location,
789                                const FileDescriptorProto* containing_file) {
790   if (!LookingAt("[")) return true;
791 
792   LocationRecorder location(field_location,
793                             FieldDescriptorProto::kOptionsFieldNumber);
794 
795   DO(Consume("["));
796 
797   // Parse field options.
798   do {
799     if (LookingAt("default")) {
800       // We intentionally pass field_location rather than location here, since
801       // the default value is not actually an option.
802       DO(ParseDefaultAssignment(field, field_location, containing_file));
803     } else {
804       DO(ParseOption(field->mutable_options(), location,
805                      containing_file, OPTION_ASSIGNMENT));
806     }
807   } while (TryConsume(","));
808 
809   DO(Consume("]"));
810   return true;
811 }
812 
ParseDefaultAssignment(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)813 bool Parser::ParseDefaultAssignment(
814     FieldDescriptorProto* field,
815     const LocationRecorder& field_location,
816     const FileDescriptorProto* containing_file) {
817   if (field->has_default_value()) {
818     AddError("Already set option \"default\".");
819     field->clear_default_value();
820   }
821 
822   DO(Consume("default"));
823   DO(Consume("="));
824 
825   LocationRecorder location(field_location,
826                             FieldDescriptorProto::kDefaultValueFieldNumber);
827   location.RecordLegacyLocation(
828       field, DescriptorPool::ErrorCollector::DEFAULT_VALUE);
829   string* default_value = field->mutable_default_value();
830 
831   if (!field->has_type()) {
832     // The field has a type name, but we don't know if it is a message or an
833     // enum yet. (If it were a primitive type, |field| would have a type set
834     // already.) In this case, simply take the current string as the default
835     // value; we will catch the error later if it is not a valid enum value.
836     // (N.B. that we do not check whether the current token is an identifier:
837     // doing so throws strange errors when the user mistypes a primitive
838     // typename and we assume it's an enum. E.g.: "optional int foo = 1 [default
839     // = 42]". In such a case the fundamental error is really that "int" is not
840     // a type, not that "42" is not an identifier. See b/12533582.)
841     *default_value = input_->current().text;
842     input_->Next();
843     return true;
844   }
845 
846   switch (field->type()) {
847     case FieldDescriptorProto::TYPE_INT32:
848     case FieldDescriptorProto::TYPE_INT64:
849     case FieldDescriptorProto::TYPE_SINT32:
850     case FieldDescriptorProto::TYPE_SINT64:
851     case FieldDescriptorProto::TYPE_SFIXED32:
852     case FieldDescriptorProto::TYPE_SFIXED64: {
853       uint64 max_value = kint64max;
854       if (field->type() == FieldDescriptorProto::TYPE_INT32 ||
855           field->type() == FieldDescriptorProto::TYPE_SINT32 ||
856           field->type() == FieldDescriptorProto::TYPE_SFIXED32) {
857         max_value = kint32max;
858       }
859 
860       // These types can be negative.
861       if (TryConsume("-")) {
862         default_value->append("-");
863         // Two's complement always has one more negative value than positive.
864         ++max_value;
865       }
866       // Parse the integer to verify that it is not out-of-range.
867       uint64 value;
868       DO(ConsumeInteger64(max_value, &value,
869                           "Expected integer for field default value."));
870       // And stringify it again.
871       default_value->append(SimpleItoa(value));
872       break;
873     }
874 
875     case FieldDescriptorProto::TYPE_UINT32:
876     case FieldDescriptorProto::TYPE_UINT64:
877     case FieldDescriptorProto::TYPE_FIXED32:
878     case FieldDescriptorProto::TYPE_FIXED64: {
879       uint64 max_value = kuint64max;
880       if (field->type() == FieldDescriptorProto::TYPE_UINT32 ||
881           field->type() == FieldDescriptorProto::TYPE_FIXED32) {
882         max_value = kuint32max;
883       }
884 
885       // Numeric, not negative.
886       if (TryConsume("-")) {
887         AddError("Unsigned field can't have negative default value.");
888       }
889       // Parse the integer to verify that it is not out-of-range.
890       uint64 value;
891       DO(ConsumeInteger64(max_value, &value,
892                           "Expected integer for field default value."));
893       // And stringify it again.
894       default_value->append(SimpleItoa(value));
895       break;
896     }
897 
898     case FieldDescriptorProto::TYPE_FLOAT:
899     case FieldDescriptorProto::TYPE_DOUBLE:
900       // These types can be negative.
901       if (TryConsume("-")) {
902         default_value->append("-");
903       }
904       // Parse the integer because we have to convert hex integers to decimal
905       // floats.
906       double value;
907       DO(ConsumeNumber(&value, "Expected number."));
908       // And stringify it again.
909       default_value->append(SimpleDtoa(value));
910       break;
911 
912     case FieldDescriptorProto::TYPE_BOOL:
913       if (TryConsume("true")) {
914         default_value->assign("true");
915       } else if (TryConsume("false")) {
916         default_value->assign("false");
917       } else {
918         AddError("Expected \"true\" or \"false\".");
919         return false;
920       }
921       break;
922 
923     case FieldDescriptorProto::TYPE_STRING:
924       // Note: When file opton java_string_check_utf8 is true, if a
925       // non-string representation (eg byte[]) is later supported, it must
926       // be checked for UTF-8-ness.
927       DO(ConsumeString(default_value, "Expected string for field default "
928                        "value."));
929       break;
930 
931     case FieldDescriptorProto::TYPE_BYTES:
932       DO(ConsumeString(default_value, "Expected string."));
933       *default_value = CEscape(*default_value);
934       break;
935 
936     case FieldDescriptorProto::TYPE_ENUM:
937       DO(ConsumeIdentifier(default_value, "Expected enum identifier for field "
938                                           "default value."));
939       break;
940 
941     case FieldDescriptorProto::TYPE_MESSAGE:
942     case FieldDescriptorProto::TYPE_GROUP:
943       AddError("Messages can't have default values.");
944       return false;
945   }
946 
947   return true;
948 }
949 
ParseOptionNamePart(UninterpretedOption * uninterpreted_option,const LocationRecorder & part_location,const FileDescriptorProto * containing_file)950 bool Parser::ParseOptionNamePart(UninterpretedOption* uninterpreted_option,
951                                  const LocationRecorder& part_location,
952                                  const FileDescriptorProto* containing_file) {
953   UninterpretedOption::NamePart* name = uninterpreted_option->add_name();
954   string identifier;  // We parse identifiers into this string.
955   if (LookingAt("(")) {  // This is an extension.
956     DO(Consume("("));
957 
958     {
959       LocationRecorder location(
960           part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
961       // An extension name consists of dot-separated identifiers, and may begin
962       // with a dot.
963       if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
964         DO(ConsumeIdentifier(&identifier, "Expected identifier."));
965         name->mutable_name_part()->append(identifier);
966       }
967       while (LookingAt(".")) {
968         DO(Consume("."));
969         name->mutable_name_part()->append(".");
970         DO(ConsumeIdentifier(&identifier, "Expected identifier."));
971         name->mutable_name_part()->append(identifier);
972       }
973     }
974 
975     DO(Consume(")"));
976     name->set_is_extension(true);
977   } else {  // This is a regular field.
978     LocationRecorder location(
979         part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
980     DO(ConsumeIdentifier(&identifier, "Expected identifier."));
981     name->mutable_name_part()->append(identifier);
982     name->set_is_extension(false);
983   }
984   return true;
985 }
986 
ParseUninterpretedBlock(string * value)987 bool Parser::ParseUninterpretedBlock(string* value) {
988   // Note that enclosing braces are not added to *value.
989   // We do NOT use ConsumeEndOfStatement for this brace because it's delimiting
990   // an expression, not a block of statements.
991   DO(Consume("{"));
992   int brace_depth = 1;
993   while (!AtEnd()) {
994     if (LookingAt("{")) {
995       brace_depth++;
996     } else if (LookingAt("}")) {
997       brace_depth--;
998       if (brace_depth == 0) {
999         input_->Next();
1000         return true;
1001       }
1002     }
1003     // TODO(sanjay): Interpret line/column numbers to preserve formatting
1004     if (!value->empty()) value->push_back(' ');
1005     value->append(input_->current().text);
1006     input_->Next();
1007   }
1008   AddError("Unexpected end of stream while parsing aggregate value.");
1009   return false;
1010 }
1011 
1012 // We don't interpret the option here. Instead we store it in an
1013 // UninterpretedOption, to be interpreted later.
ParseOption(Message * options,const LocationRecorder & options_location,const FileDescriptorProto * containing_file,OptionStyle style)1014 bool Parser::ParseOption(Message* options,
1015                          const LocationRecorder& options_location,
1016                          const FileDescriptorProto* containing_file,
1017                          OptionStyle style) {
1018   // Create an entry in the uninterpreted_option field.
1019   const FieldDescriptor* uninterpreted_option_field = options->GetDescriptor()->
1020       FindFieldByName("uninterpreted_option");
1021   GOOGLE_CHECK(uninterpreted_option_field != NULL)
1022       << "No field named \"uninterpreted_option\" in the Options proto.";
1023 
1024   const Reflection* reflection = options->GetReflection();
1025 
1026   LocationRecorder location(
1027       options_location, uninterpreted_option_field->number(),
1028       reflection->FieldSize(*options, uninterpreted_option_field));
1029 
1030   if (style == OPTION_STATEMENT) {
1031     DO(Consume("option"));
1032   }
1033 
1034   UninterpretedOption* uninterpreted_option = down_cast<UninterpretedOption*>(
1035       options->GetReflection()->AddMessage(options,
1036                                            uninterpreted_option_field));
1037 
1038   // Parse dot-separated name.
1039   {
1040     LocationRecorder name_location(location,
1041                                    UninterpretedOption::kNameFieldNumber);
1042     name_location.RecordLegacyLocation(
1043         uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_NAME);
1044 
1045     {
1046       LocationRecorder part_location(name_location,
1047                                      uninterpreted_option->name_size());
1048       DO(ParseOptionNamePart(uninterpreted_option, part_location,
1049                              containing_file));
1050     }
1051 
1052     while (LookingAt(".")) {
1053       DO(Consume("."));
1054       LocationRecorder part_location(name_location,
1055                                      uninterpreted_option->name_size());
1056       DO(ParseOptionNamePart(uninterpreted_option, part_location,
1057                              containing_file));
1058     }
1059   }
1060 
1061   DO(Consume("="));
1062 
1063   {
1064     LocationRecorder value_location(location);
1065     value_location.RecordLegacyLocation(
1066         uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_VALUE);
1067 
1068     // All values are a single token, except for negative numbers, which consist
1069     // of a single '-' symbol, followed by a positive number.
1070     bool is_negative = TryConsume("-");
1071 
1072     switch (input_->current().type) {
1073       case io::Tokenizer::TYPE_START:
1074         GOOGLE_LOG(FATAL) << "Trying to read value before any tokens have been read.";
1075         return false;
1076 
1077       case io::Tokenizer::TYPE_END:
1078         AddError("Unexpected end of stream while parsing option value.");
1079         return false;
1080 
1081       case io::Tokenizer::TYPE_IDENTIFIER: {
1082         value_location.AddPath(
1083             UninterpretedOption::kIdentifierValueFieldNumber);
1084         if (is_negative) {
1085           AddError("Invalid '-' symbol before identifier.");
1086           return false;
1087         }
1088         string value;
1089         DO(ConsumeIdentifier(&value, "Expected identifier."));
1090         uninterpreted_option->set_identifier_value(value);
1091         break;
1092       }
1093 
1094       case io::Tokenizer::TYPE_INTEGER: {
1095         uint64 value;
1096         uint64 max_value =
1097             is_negative ? static_cast<uint64>(kint64max) + 1 : kuint64max;
1098         DO(ConsumeInteger64(max_value, &value, "Expected integer."));
1099         if (is_negative) {
1100           value_location.AddPath(
1101               UninterpretedOption::kNegativeIntValueFieldNumber);
1102           uninterpreted_option->set_negative_int_value(
1103               -static_cast<int64>(value));
1104         } else {
1105           value_location.AddPath(
1106               UninterpretedOption::kPositiveIntValueFieldNumber);
1107           uninterpreted_option->set_positive_int_value(value);
1108         }
1109         break;
1110       }
1111 
1112       case io::Tokenizer::TYPE_FLOAT: {
1113         value_location.AddPath(UninterpretedOption::kDoubleValueFieldNumber);
1114         double value;
1115         DO(ConsumeNumber(&value, "Expected number."));
1116         uninterpreted_option->set_double_value(is_negative ? -value : value);
1117         break;
1118       }
1119 
1120       case io::Tokenizer::TYPE_STRING: {
1121         value_location.AddPath(UninterpretedOption::kStringValueFieldNumber);
1122         if (is_negative) {
1123           AddError("Invalid '-' symbol before string.");
1124           return false;
1125         }
1126         string value;
1127         DO(ConsumeString(&value, "Expected string."));
1128         uninterpreted_option->set_string_value(value);
1129         break;
1130       }
1131 
1132       case io::Tokenizer::TYPE_SYMBOL:
1133         if (LookingAt("{")) {
1134           value_location.AddPath(
1135               UninterpretedOption::kAggregateValueFieldNumber);
1136           DO(ParseUninterpretedBlock(
1137               uninterpreted_option->mutable_aggregate_value()));
1138         } else {
1139           AddError("Expected option value.");
1140           return false;
1141         }
1142         break;
1143     }
1144   }
1145 
1146   if (style == OPTION_STATEMENT) {
1147     DO(ConsumeEndOfDeclaration(";", &location));
1148   }
1149 
1150 
1151   return true;
1152 }
1153 
ParseExtensions(DescriptorProto * message,const LocationRecorder & extensions_location,const FileDescriptorProto * containing_file)1154 bool Parser::ParseExtensions(DescriptorProto* message,
1155                              const LocationRecorder& extensions_location,
1156                              const FileDescriptorProto* containing_file) {
1157   // Parse the declaration.
1158   DO(Consume("extensions"));
1159 
1160   do {
1161     // Note that kExtensionRangeFieldNumber was already pushed by the parent.
1162     LocationRecorder location(extensions_location,
1163                               message->extension_range_size());
1164 
1165     DescriptorProto::ExtensionRange* range = message->add_extension_range();
1166     location.RecordLegacyLocation(
1167         range, DescriptorPool::ErrorCollector::NUMBER);
1168 
1169     int start, end;
1170     io::Tokenizer::Token start_token;
1171 
1172     {
1173       LocationRecorder start_location(
1174           location, DescriptorProto::ExtensionRange::kStartFieldNumber);
1175       start_token = input_->current();
1176       DO(ConsumeInteger(&start, "Expected field number range."));
1177     }
1178 
1179     if (TryConsume("to")) {
1180       LocationRecorder end_location(
1181           location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1182       if (TryConsume("max")) {
1183         // Set to the sentinel value - 1 since we increment the value below.
1184         // The actual value of the end of the range should be set with
1185         // AdjustExtensionRangesWithMaxEndNumber.
1186         end = kMaxExtensionRangeSentinel - 1;
1187       } else {
1188         DO(ConsumeInteger(&end, "Expected integer."));
1189       }
1190     } else {
1191       LocationRecorder end_location(
1192           location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1193       end_location.StartAt(start_token);
1194       end_location.EndAt(start_token);
1195       end = start;
1196     }
1197 
1198     // Users like to specify inclusive ranges, but in code we like the end
1199     // number to be exclusive.
1200     ++end;
1201 
1202     range->set_start(start);
1203     range->set_end(end);
1204   } while (TryConsume(","));
1205 
1206   DO(ConsumeEndOfDeclaration(";", &extensions_location));
1207   return true;
1208 }
1209 
ParseExtend(RepeatedPtrField<FieldDescriptorProto> * extensions,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & extend_location,const FileDescriptorProto * containing_file)1210 bool Parser::ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
1211                          RepeatedPtrField<DescriptorProto>* messages,
1212                          const LocationRecorder& parent_location,
1213                          int location_field_number_for_nested_type,
1214                          const LocationRecorder& extend_location,
1215                          const FileDescriptorProto* containing_file) {
1216   DO(Consume("extend"));
1217 
1218   // Parse the extendee type.
1219   io::Tokenizer::Token extendee_start = input_->current();
1220   string extendee;
1221   DO(ParseUserDefinedType(&extendee));
1222   io::Tokenizer::Token extendee_end = input_->previous();
1223 
1224   // Parse the block.
1225   DO(ConsumeEndOfDeclaration("{", &extend_location));
1226 
1227   bool is_first = true;
1228 
1229   do {
1230     if (AtEnd()) {
1231       AddError("Reached end of input in extend definition (missing '}').");
1232       return false;
1233     }
1234 
1235     // Note that kExtensionFieldNumber was already pushed by the parent.
1236     LocationRecorder location(extend_location, extensions->size());
1237 
1238     FieldDescriptorProto* field = extensions->Add();
1239 
1240     {
1241       LocationRecorder extendee_location(
1242           location, FieldDescriptorProto::kExtendeeFieldNumber);
1243       extendee_location.StartAt(extendee_start);
1244       extendee_location.EndAt(extendee_end);
1245 
1246       if (is_first) {
1247         extendee_location.RecordLegacyLocation(
1248             field, DescriptorPool::ErrorCollector::EXTENDEE);
1249         is_first = false;
1250       }
1251     }
1252 
1253     field->set_extendee(extendee);
1254 
1255     if (!ParseMessageField(field, messages, parent_location,
1256                            location_field_number_for_nested_type,
1257                            location,
1258                            containing_file)) {
1259       // This statement failed to parse.  Skip it, but keep looping to parse
1260       // other statements.
1261       SkipStatement();
1262     }
1263   } while (!TryConsumeEndOfDeclaration("}", NULL));
1264 
1265   return true;
1266 }
1267 
ParseOneof(OneofDescriptorProto * oneof_decl,DescriptorProto * containing_type,int oneof_index,const LocationRecorder & oneof_location,const LocationRecorder & containing_type_location,const FileDescriptorProto * containing_file)1268 bool Parser::ParseOneof(OneofDescriptorProto* oneof_decl,
1269                         DescriptorProto* containing_type,
1270                         int oneof_index,
1271                         const LocationRecorder& oneof_location,
1272                         const LocationRecorder& containing_type_location,
1273                         const FileDescriptorProto* containing_file) {
1274   DO(Consume("oneof"));
1275 
1276   {
1277     LocationRecorder name_location(oneof_location,
1278                                    OneofDescriptorProto::kNameFieldNumber);
1279     DO(ConsumeIdentifier(oneof_decl->mutable_name(), "Expected oneof name."));
1280   }
1281 
1282   DO(ConsumeEndOfDeclaration("{", &oneof_location));
1283 
1284   do {
1285     if (AtEnd()) {
1286       AddError("Reached end of input in oneof definition (missing '}').");
1287       return false;
1288     }
1289 
1290     // Print a nice error if the user accidentally tries to place a label
1291     // on an individual member of a oneof.
1292     if (LookingAt("required") ||
1293         LookingAt("optional") ||
1294         LookingAt("repeated")) {
1295       AddError("Fields in oneofs must not have labels (required / optional "
1296                "/ repeated).");
1297       // We can continue parsing here because we understand what the user
1298       // meant.  The error report will still make parsing fail overall.
1299       input_->Next();
1300     }
1301 
1302     LocationRecorder field_location(containing_type_location,
1303                                     DescriptorProto::kFieldFieldNumber,
1304                                     containing_type->field_size());
1305 
1306     FieldDescriptorProto* field = containing_type->add_field();
1307     field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1308     field->set_oneof_index(oneof_index);
1309 
1310     if (!ParseMessageFieldNoLabel(field,
1311                                   containing_type->mutable_nested_type(),
1312                                   containing_type_location,
1313                                   DescriptorProto::kNestedTypeFieldNumber,
1314                                   field_location,
1315                                   containing_file)) {
1316       // This statement failed to parse.  Skip it, but keep looping to parse
1317       // other statements.
1318       SkipStatement();
1319     }
1320   } while (!TryConsumeEndOfDeclaration("}", NULL));
1321 
1322   return true;
1323 }
1324 
1325 // -------------------------------------------------------------------
1326 // Enums
1327 
ParseEnumDefinition(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1328 bool Parser::ParseEnumDefinition(EnumDescriptorProto* enum_type,
1329                                  const LocationRecorder& enum_location,
1330                                  const FileDescriptorProto* containing_file) {
1331   DO(Consume("enum"));
1332 
1333   {
1334     LocationRecorder location(enum_location,
1335                               EnumDescriptorProto::kNameFieldNumber);
1336     location.RecordLegacyLocation(
1337         enum_type, DescriptorPool::ErrorCollector::NAME);
1338     DO(ConsumeIdentifier(enum_type->mutable_name(), "Expected enum name."));
1339   }
1340 
1341   DO(ParseEnumBlock(enum_type, enum_location, containing_file));
1342   return true;
1343 }
1344 
ParseEnumBlock(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1345 bool Parser::ParseEnumBlock(EnumDescriptorProto* enum_type,
1346                             const LocationRecorder& enum_location,
1347                             const FileDescriptorProto* containing_file) {
1348   DO(ConsumeEndOfDeclaration("{", &enum_location));
1349 
1350   while (!TryConsumeEndOfDeclaration("}", NULL)) {
1351     if (AtEnd()) {
1352       AddError("Reached end of input in enum definition (missing '}').");
1353       return false;
1354     }
1355 
1356     if (!ParseEnumStatement(enum_type, enum_location, containing_file)) {
1357       // This statement failed to parse.  Skip it, but keep looping to parse
1358       // other statements.
1359       SkipStatement();
1360     }
1361   }
1362 
1363   return true;
1364 }
1365 
ParseEnumStatement(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1366 bool Parser::ParseEnumStatement(EnumDescriptorProto* enum_type,
1367                                 const LocationRecorder& enum_location,
1368                                 const FileDescriptorProto* containing_file) {
1369   if (TryConsumeEndOfDeclaration(";", NULL)) {
1370     // empty statement; ignore
1371     return true;
1372   } else if (LookingAt("option")) {
1373     LocationRecorder location(enum_location,
1374                               EnumDescriptorProto::kOptionsFieldNumber);
1375     return ParseOption(enum_type->mutable_options(), location,
1376                        containing_file, OPTION_STATEMENT);
1377   } else {
1378     LocationRecorder location(enum_location,
1379         EnumDescriptorProto::kValueFieldNumber, enum_type->value_size());
1380     return ParseEnumConstant(enum_type->add_value(), location, containing_file);
1381   }
1382 }
1383 
ParseEnumConstant(EnumValueDescriptorProto * enum_value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)1384 bool Parser::ParseEnumConstant(EnumValueDescriptorProto* enum_value,
1385                                const LocationRecorder& enum_value_location,
1386                                const FileDescriptorProto* containing_file) {
1387   // Parse name.
1388   {
1389     LocationRecorder location(enum_value_location,
1390                               EnumValueDescriptorProto::kNameFieldNumber);
1391     location.RecordLegacyLocation(
1392         enum_value, DescriptorPool::ErrorCollector::NAME);
1393     DO(ConsumeIdentifier(enum_value->mutable_name(),
1394                          "Expected enum constant name."));
1395   }
1396 
1397   DO(Consume("=", "Missing numeric value for enum constant."));
1398 
1399   // Parse value.
1400   {
1401     LocationRecorder location(
1402         enum_value_location, EnumValueDescriptorProto::kNumberFieldNumber);
1403     location.RecordLegacyLocation(
1404         enum_value, DescriptorPool::ErrorCollector::NUMBER);
1405 
1406     int number;
1407     DO(ConsumeSignedInteger(&number, "Expected integer."));
1408     enum_value->set_number(number);
1409   }
1410 
1411   DO(ParseEnumConstantOptions(enum_value, enum_value_location,
1412                               containing_file));
1413 
1414   DO(ConsumeEndOfDeclaration(";", &enum_value_location));
1415 
1416   return true;
1417 }
1418 
ParseEnumConstantOptions(EnumValueDescriptorProto * value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)1419 bool Parser::ParseEnumConstantOptions(
1420     EnumValueDescriptorProto* value,
1421     const LocationRecorder& enum_value_location,
1422     const FileDescriptorProto* containing_file) {
1423   if (!LookingAt("[")) return true;
1424 
1425   LocationRecorder location(
1426       enum_value_location, EnumValueDescriptorProto::kOptionsFieldNumber);
1427 
1428   DO(Consume("["));
1429 
1430   do {
1431     DO(ParseOption(value->mutable_options(), location,
1432                    containing_file, OPTION_ASSIGNMENT));
1433   } while (TryConsume(","));
1434 
1435   DO(Consume("]"));
1436   return true;
1437 }
1438 
1439 // -------------------------------------------------------------------
1440 // Services
1441 
ParseServiceDefinition(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)1442 bool Parser::ParseServiceDefinition(
1443     ServiceDescriptorProto* service,
1444     const LocationRecorder& service_location,
1445     const FileDescriptorProto* containing_file) {
1446   DO(Consume("service"));
1447 
1448   {
1449     LocationRecorder location(service_location,
1450                               ServiceDescriptorProto::kNameFieldNumber);
1451     location.RecordLegacyLocation(
1452         service, DescriptorPool::ErrorCollector::NAME);
1453     DO(ConsumeIdentifier(service->mutable_name(), "Expected service name."));
1454   }
1455 
1456   DO(ParseServiceBlock(service, service_location, containing_file));
1457   return true;
1458 }
1459 
ParseServiceBlock(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)1460 bool Parser::ParseServiceBlock(ServiceDescriptorProto* service,
1461                                const LocationRecorder& service_location,
1462                                const FileDescriptorProto* containing_file) {
1463   DO(ConsumeEndOfDeclaration("{", &service_location));
1464 
1465   while (!TryConsumeEndOfDeclaration("}", NULL)) {
1466     if (AtEnd()) {
1467       AddError("Reached end of input in service definition (missing '}').");
1468       return false;
1469     }
1470 
1471     if (!ParseServiceStatement(service, service_location, containing_file)) {
1472       // This statement failed to parse.  Skip it, but keep looping to parse
1473       // other statements.
1474       SkipStatement();
1475     }
1476   }
1477 
1478   return true;
1479 }
1480 
ParseServiceStatement(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)1481 bool Parser::ParseServiceStatement(ServiceDescriptorProto* service,
1482                                    const LocationRecorder& service_location,
1483                                    const FileDescriptorProto* containing_file) {
1484   if (TryConsumeEndOfDeclaration(";", NULL)) {
1485     // empty statement; ignore
1486     return true;
1487   } else if (LookingAt("option")) {
1488     LocationRecorder location(
1489         service_location, ServiceDescriptorProto::kOptionsFieldNumber);
1490     return ParseOption(service->mutable_options(), location,
1491                        containing_file, OPTION_STATEMENT);
1492   } else {
1493     LocationRecorder location(service_location,
1494         ServiceDescriptorProto::kMethodFieldNumber, service->method_size());
1495     return ParseServiceMethod(service->add_method(), location, containing_file);
1496   }
1497 }
1498 
ParseServiceMethod(MethodDescriptorProto * method,const LocationRecorder & method_location,const FileDescriptorProto * containing_file)1499 bool Parser::ParseServiceMethod(MethodDescriptorProto* method,
1500                                 const LocationRecorder& method_location,
1501                                 const FileDescriptorProto* containing_file) {
1502   DO(Consume("rpc"));
1503 
1504   {
1505     LocationRecorder location(method_location,
1506                               MethodDescriptorProto::kNameFieldNumber);
1507     location.RecordLegacyLocation(
1508         method, DescriptorPool::ErrorCollector::NAME);
1509     DO(ConsumeIdentifier(method->mutable_name(), "Expected method name."));
1510   }
1511 
1512   // Parse input type.
1513   DO(Consume("("));
1514   {
1515     LocationRecorder location(method_location,
1516                               MethodDescriptorProto::kInputTypeFieldNumber);
1517     location.RecordLegacyLocation(
1518         method, DescriptorPool::ErrorCollector::INPUT_TYPE);
1519     DO(ParseUserDefinedType(method->mutable_input_type()));
1520   }
1521   DO(Consume(")"));
1522 
1523   // Parse output type.
1524   DO(Consume("returns"));
1525   DO(Consume("("));
1526   {
1527     LocationRecorder location(method_location,
1528                               MethodDescriptorProto::kOutputTypeFieldNumber);
1529     location.RecordLegacyLocation(
1530         method, DescriptorPool::ErrorCollector::OUTPUT_TYPE);
1531     DO(ParseUserDefinedType(method->mutable_output_type()));
1532   }
1533   DO(Consume(")"));
1534 
1535   if (LookingAt("{")) {
1536     // Options!
1537     DO(ParseOptions(method_location,
1538                     containing_file,
1539                     MethodDescriptorProto::kOptionsFieldNumber,
1540                     method->mutable_options()));
1541   } else {
1542     DO(ConsumeEndOfDeclaration(";", &method_location));
1543   }
1544 
1545   return true;
1546 }
1547 
1548 
ParseOptions(const LocationRecorder & parent_location,const FileDescriptorProto * containing_file,const int optionsFieldNumber,Message * mutable_options)1549 bool Parser::ParseOptions(const LocationRecorder& parent_location,
1550                           const FileDescriptorProto* containing_file,
1551                           const int optionsFieldNumber,
1552                           Message* mutable_options) {
1553   // Options!
1554   ConsumeEndOfDeclaration("{", &parent_location);
1555   while (!TryConsumeEndOfDeclaration("}", NULL)) {
1556     if (AtEnd()) {
1557       AddError("Reached end of input in method options (missing '}').");
1558       return false;
1559     }
1560 
1561     if (TryConsumeEndOfDeclaration(";", NULL)) {
1562       // empty statement; ignore
1563     } else {
1564       LocationRecorder location(parent_location,
1565                                 optionsFieldNumber);
1566       if (!ParseOption(mutable_options, location, containing_file,
1567                        OPTION_STATEMENT)) {
1568         // This statement failed to parse.  Skip it, but keep looping to
1569         // parse other statements.
1570         SkipStatement();
1571       }
1572     }
1573   }
1574 
1575   return true;
1576 }
1577 
1578 // -------------------------------------------------------------------
1579 
ParseLabel(FieldDescriptorProto::Label * label,const FileDescriptorProto * containing_file)1580 bool Parser::ParseLabel(FieldDescriptorProto::Label* label,
1581                         const FileDescriptorProto* containing_file) {
1582   if (TryConsume("optional")) {
1583     *label = FieldDescriptorProto::LABEL_OPTIONAL;
1584     return true;
1585   } else if (TryConsume("repeated")) {
1586     *label = FieldDescriptorProto::LABEL_REPEATED;
1587     return true;
1588   } else if (TryConsume("required")) {
1589     *label = FieldDescriptorProto::LABEL_REQUIRED;
1590     return true;
1591   } else {
1592     AddError("Expected \"required\", \"optional\", or \"repeated\".");
1593     // We can actually reasonably recover here by just assuming the user
1594     // forgot the label altogether.
1595     *label = FieldDescriptorProto::LABEL_OPTIONAL;
1596     return true;
1597   }
1598 }
1599 
ParseType(FieldDescriptorProto::Type * type,string * type_name)1600 bool Parser::ParseType(FieldDescriptorProto::Type* type,
1601                        string* type_name) {
1602   TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
1603   if (iter != kTypeNames.end()) {
1604     *type = iter->second;
1605     input_->Next();
1606   } else {
1607     DO(ParseUserDefinedType(type_name));
1608   }
1609   return true;
1610 }
1611 
ParseUserDefinedType(string * type_name)1612 bool Parser::ParseUserDefinedType(string* type_name) {
1613   type_name->clear();
1614 
1615   TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
1616   if (iter != kTypeNames.end()) {
1617     // Note:  The only place enum types are allowed is for field types, but
1618     //   if we are parsing a field type then we would not get here because
1619     //   primitives are allowed there as well.  So this error message doesn't
1620     //   need to account for enums.
1621     AddError("Expected message type.");
1622 
1623     // Pretend to accept this type so that we can go on parsing.
1624     *type_name = input_->current().text;
1625     input_->Next();
1626     return true;
1627   }
1628 
1629   // A leading "." means the name is fully-qualified.
1630   if (TryConsume(".")) type_name->append(".");
1631 
1632   // Consume the first part of the name.
1633   string identifier;
1634   DO(ConsumeIdentifier(&identifier, "Expected type name."));
1635   type_name->append(identifier);
1636 
1637   // Consume more parts.
1638   while (TryConsume(".")) {
1639     type_name->append(".");
1640     DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1641     type_name->append(identifier);
1642   }
1643 
1644   return true;
1645 }
1646 
1647 // ===================================================================
1648 
ParsePackage(FileDescriptorProto * file,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)1649 bool Parser::ParsePackage(FileDescriptorProto* file,
1650                           const LocationRecorder& root_location,
1651                           const FileDescriptorProto* containing_file) {
1652   if (file->has_package()) {
1653     AddError("Multiple package definitions.");
1654     // Don't append the new package to the old one.  Just replace it.  Not
1655     // that it really matters since this is an error anyway.
1656     file->clear_package();
1657   }
1658 
1659   DO(Consume("package"));
1660 
1661   {
1662     LocationRecorder location(root_location,
1663                               FileDescriptorProto::kPackageFieldNumber);
1664     location.RecordLegacyLocation(file, DescriptorPool::ErrorCollector::NAME);
1665 
1666     while (true) {
1667       string identifier;
1668       DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1669       file->mutable_package()->append(identifier);
1670       if (!TryConsume(".")) break;
1671       file->mutable_package()->append(".");
1672     }
1673 
1674     location.EndAt(input_->previous());
1675 
1676     DO(ConsumeEndOfDeclaration(";", &location));
1677   }
1678 
1679   return true;
1680 }
1681 
ParseImport(RepeatedPtrField<string> * dependency,RepeatedField<int32> * public_dependency,RepeatedField<int32> * weak_dependency,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)1682 bool Parser::ParseImport(RepeatedPtrField<string>* dependency,
1683                          RepeatedField<int32>* public_dependency,
1684                          RepeatedField<int32>* weak_dependency,
1685                          const LocationRecorder& root_location,
1686                          const FileDescriptorProto* containing_file) {
1687   DO(Consume("import"));
1688   if (LookingAt("public")) {
1689     LocationRecorder location(
1690         root_location, FileDescriptorProto::kPublicDependencyFieldNumber,
1691         public_dependency->size());
1692     DO(Consume("public"));
1693     *public_dependency->Add() = dependency->size();
1694   } else if (LookingAt("weak")) {
1695     LocationRecorder location(
1696         root_location, FileDescriptorProto::kWeakDependencyFieldNumber,
1697         weak_dependency->size());
1698     DO(Consume("weak"));
1699     *weak_dependency->Add() = dependency->size();
1700   }
1701   {
1702     LocationRecorder location(root_location,
1703                               FileDescriptorProto::kDependencyFieldNumber,
1704                               dependency->size());
1705     DO(ConsumeString(dependency->Add(),
1706       "Expected a string naming the file to import."));
1707 
1708     location.EndAt(input_->previous());
1709 
1710     DO(ConsumeEndOfDeclaration(";", &location));
1711   }
1712   return true;
1713 }
1714 
1715 // ===================================================================
1716 
SourceLocationTable()1717 SourceLocationTable::SourceLocationTable() {}
~SourceLocationTable()1718 SourceLocationTable::~SourceLocationTable() {}
1719 
Find(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int * line,int * column) const1720 bool SourceLocationTable::Find(
1721     const Message* descriptor,
1722     DescriptorPool::ErrorCollector::ErrorLocation location,
1723     int* line, int* column) const {
1724   const pair<int, int>* result =
1725     FindOrNull(location_map_, make_pair(descriptor, location));
1726   if (result == NULL) {
1727     *line   = -1;
1728     *column = 0;
1729     return false;
1730   } else {
1731     *line   = result->first;
1732     *column = result->second;
1733     return true;
1734   }
1735 }
1736 
Add(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int line,int column)1737 void SourceLocationTable::Add(
1738     const Message* descriptor,
1739     DescriptorPool::ErrorCollector::ErrorLocation location,
1740     int line, int column) {
1741   location_map_[make_pair(descriptor, location)] = make_pair(line, column);
1742 }
1743 
Clear()1744 void SourceLocationTable::Clear() {
1745   location_map_.clear();
1746 }
1747 
1748 }  // namespace compiler
1749 }  // namespace protobuf
1750 }  // namespace google
1751