1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // Recursive descent FTW.
36 
37 #include <float.h>
38 #include <google/protobuf/stubs/hash.h>
39 #include <limits>
40 
41 
42 #include <google/protobuf/compiler/parser.h>
43 #include <google/protobuf/descriptor.h>
44 #include <google/protobuf/descriptor.pb.h>
45 #include <google/protobuf/wire_format.h>
46 #include <google/protobuf/io/tokenizer.h>
47 #include <google/protobuf/stubs/logging.h>
48 #include <google/protobuf/stubs/common.h>
49 #include <google/protobuf/stubs/strutil.h>
50 #include <google/protobuf/stubs/map_util.h>
51 
52 namespace google {
53 namespace protobuf {
54 namespace compiler {
55 
56 using internal::WireFormat;
57 
58 namespace {
59 
60 typedef hash_map<string, FieldDescriptorProto::Type> TypeNameMap;
61 
MakeTypeNameTable()62 TypeNameMap MakeTypeNameTable() {
63   TypeNameMap result;
64 
65   result["double"  ] = FieldDescriptorProto::TYPE_DOUBLE;
66   result["float"   ] = FieldDescriptorProto::TYPE_FLOAT;
67   result["uint64"  ] = FieldDescriptorProto::TYPE_UINT64;
68   result["fixed64" ] = FieldDescriptorProto::TYPE_FIXED64;
69   result["fixed32" ] = FieldDescriptorProto::TYPE_FIXED32;
70   result["bool"    ] = FieldDescriptorProto::TYPE_BOOL;
71   result["string"  ] = FieldDescriptorProto::TYPE_STRING;
72   result["group"   ] = FieldDescriptorProto::TYPE_GROUP;
73 
74   result["bytes"   ] = FieldDescriptorProto::TYPE_BYTES;
75   result["uint32"  ] = FieldDescriptorProto::TYPE_UINT32;
76   result["sfixed32"] = FieldDescriptorProto::TYPE_SFIXED32;
77   result["sfixed64"] = FieldDescriptorProto::TYPE_SFIXED64;
78   result["int32"   ] = FieldDescriptorProto::TYPE_INT32;
79   result["int64"   ] = FieldDescriptorProto::TYPE_INT64;
80   result["sint32"  ] = FieldDescriptorProto::TYPE_SINT32;
81   result["sint64"  ] = FieldDescriptorProto::TYPE_SINT64;
82 
83   return result;
84 }
85 
86 const TypeNameMap kTypeNames = MakeTypeNameTable();
87 
88 // Camel-case the field name and append "Entry" for generated map entry name.
89 // e.g. map<KeyType, ValueType> foo_map => FooMapEntry
MapEntryName(const string & field_name)90 string MapEntryName(const string& field_name) {
91   string result;
92   static const char kSuffix[] = "Entry";
93   result.reserve(field_name.size() + sizeof(kSuffix));
94   bool cap_next = true;
95   for (int i = 0; i < field_name.size(); ++i) {
96     if (field_name[i] == '_') {
97       cap_next = true;
98     } else if (cap_next) {
99       // Note: Do not use ctype.h due to locales.
100       if ('a' <= field_name[i] && field_name[i] <= 'z') {
101         result.push_back(field_name[i] - 'a' + 'A');
102       } else {
103         result.push_back(field_name[i]);
104       }
105       cap_next = false;
106     } else {
107       result.push_back(field_name[i]);
108     }
109   }
110   result.append(kSuffix);
111   return result;
112 }
113 
114 }  // anonymous namespace
115 
116 // Makes code slightly more readable.  The meaning of "DO(foo)" is
117 // "Execute foo and fail if it fails.", where failure is indicated by
118 // returning false.
119 #define DO(STATEMENT) if (STATEMENT) {} else return false
120 
121 // ===================================================================
122 
Parser()123 Parser::Parser()
124   : input_(NULL),
125     error_collector_(NULL),
126     source_location_table_(NULL),
127     had_errors_(false),
128     require_syntax_identifier_(false),
129     stop_after_syntax_identifier_(false) {
130 }
131 
~Parser()132 Parser::~Parser() {
133 }
134 
135 // ===================================================================
136 
LookingAt(const char * text)137 inline bool Parser::LookingAt(const char* text) {
138   return input_->current().text == text;
139 }
140 
LookingAtType(io::Tokenizer::TokenType token_type)141 inline bool Parser::LookingAtType(io::Tokenizer::TokenType token_type) {
142   return input_->current().type == token_type;
143 }
144 
AtEnd()145 inline bool Parser::AtEnd() {
146   return LookingAtType(io::Tokenizer::TYPE_END);
147 }
148 
TryConsume(const char * text)149 bool Parser::TryConsume(const char* text) {
150   if (LookingAt(text)) {
151     input_->Next();
152     return true;
153   } else {
154     return false;
155   }
156 }
157 
Consume(const char * text,const char * error)158 bool Parser::Consume(const char* text, const char* error) {
159   if (TryConsume(text)) {
160     return true;
161   } else {
162     AddError(error);
163     return false;
164   }
165 }
166 
Consume(const char * text)167 bool Parser::Consume(const char* text) {
168   if (TryConsume(text)) {
169     return true;
170   } else {
171     AddError("Expected \"" + string(text) + "\".");
172     return false;
173   }
174 }
175 
ConsumeIdentifier(string * output,const char * error)176 bool Parser::ConsumeIdentifier(string* output, const char* error) {
177   if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
178     *output = input_->current().text;
179     input_->Next();
180     return true;
181   } else {
182     AddError(error);
183     return false;
184   }
185 }
186 
ConsumeInteger(int * output,const char * error)187 bool Parser::ConsumeInteger(int* output, const char* error) {
188   if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
189     uint64 value = 0;
190     if (!io::Tokenizer::ParseInteger(input_->current().text,
191                                      kint32max, &value)) {
192       AddError("Integer out of range.");
193       // We still return true because we did, in fact, parse an integer.
194     }
195     *output = value;
196     input_->Next();
197     return true;
198   } else {
199     AddError(error);
200     return false;
201   }
202 }
203 
ConsumeSignedInteger(int * output,const char * error)204 bool Parser::ConsumeSignedInteger(int* output, const char* error) {
205   bool is_negative = false;
206   uint64 max_value = kint32max;
207   if (TryConsume("-")) {
208     is_negative = true;
209     max_value += 1;
210   }
211   uint64 value = 0;
212   DO(ConsumeInteger64(max_value, &value, error));
213   if (is_negative) value *= -1;
214   *output = value;
215   return true;
216 }
217 
ConsumeInteger64(uint64 max_value,uint64 * output,const char * error)218 bool Parser::ConsumeInteger64(uint64 max_value, uint64* output,
219                               const char* error) {
220   if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
221     if (!io::Tokenizer::ParseInteger(input_->current().text, max_value,
222                                      output)) {
223       AddError("Integer out of range.");
224       // We still return true because we did, in fact, parse an integer.
225       *output = 0;
226     }
227     input_->Next();
228     return true;
229   } else {
230     AddError(error);
231     return false;
232   }
233 }
234 
ConsumeNumber(double * output,const char * error)235 bool Parser::ConsumeNumber(double* output, const char* error) {
236   if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
237     *output = io::Tokenizer::ParseFloat(input_->current().text);
238     input_->Next();
239     return true;
240   } else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
241     // Also accept integers.
242     uint64 value = 0;
243     if (!io::Tokenizer::ParseInteger(input_->current().text,
244                                      kuint64max, &value)) {
245       AddError("Integer out of range.");
246       // We still return true because we did, in fact, parse a number.
247     }
248     *output = value;
249     input_->Next();
250     return true;
251   } else if (LookingAt("inf")) {
252     *output = numeric_limits<double>::infinity();
253     input_->Next();
254     return true;
255   } else if (LookingAt("nan")) {
256     *output = numeric_limits<double>::quiet_NaN();
257     input_->Next();
258     return true;
259   } else {
260     AddError(error);
261     return false;
262   }
263 }
264 
ConsumeString(string * output,const char * error)265 bool Parser::ConsumeString(string* output, const char* error) {
266   if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
267     io::Tokenizer::ParseString(input_->current().text, output);
268     input_->Next();
269     // Allow C++ like concatenation of adjacent string tokens.
270     while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
271       io::Tokenizer::ParseStringAppend(input_->current().text, output);
272       input_->Next();
273     }
274     return true;
275   } else {
276     AddError(error);
277     return false;
278   }
279 }
280 
TryConsumeEndOfDeclaration(const char * text,const LocationRecorder * location)281 bool Parser::TryConsumeEndOfDeclaration(
282     const char* text, const LocationRecorder* location) {
283   if (LookingAt(text)) {
284     string leading, trailing;
285     vector<string> detached;
286     input_->NextWithComments(&trailing, &detached, &leading);
287 
288     // Save the leading comments for next time, and recall the leading comments
289     // from last time.
290     leading.swap(upcoming_doc_comments_);
291 
292     if (location != NULL) {
293       upcoming_detached_comments_.swap(detached);
294       location->AttachComments(&leading, &trailing, &detached);
295     } else if (strcmp(text, "}") == 0) {
296       // If the current location is null and we are finishing the current scope,
297       // drop pending upcoming detached comments.
298       upcoming_detached_comments_.swap(detached);
299     } else {
300       // Otherwise, append the new detached comments to the existing upcoming
301       // detached comments.
302       upcoming_detached_comments_.insert(upcoming_detached_comments_.end(),
303                                          detached.begin(), detached.end());
304     }
305 
306     return true;
307   } else {
308     return false;
309   }
310 }
311 
ConsumeEndOfDeclaration(const char * text,const LocationRecorder * location)312 bool Parser::ConsumeEndOfDeclaration(
313     const char* text, const LocationRecorder* location) {
314   if (TryConsumeEndOfDeclaration(text, location)) {
315     return true;
316   } else {
317     AddError("Expected \"" + string(text) + "\".");
318     return false;
319   }
320 }
321 
322 // -------------------------------------------------------------------
323 
AddError(int line,int column,const string & error)324 void Parser::AddError(int line, int column, const string& error) {
325   if (error_collector_ != NULL) {
326     error_collector_->AddError(line, column, error);
327   }
328   had_errors_ = true;
329 }
330 
AddError(const string & error)331 void Parser::AddError(const string& error) {
332   AddError(input_->current().line, input_->current().column, error);
333 }
334 
335 // -------------------------------------------------------------------
336 
LocationRecorder(Parser * parser)337 Parser::LocationRecorder::LocationRecorder(Parser* parser)
338   : parser_(parser),
339     location_(parser_->source_code_info_->add_location()) {
340   location_->add_span(parser_->input_->current().line);
341   location_->add_span(parser_->input_->current().column);
342 }
343 
LocationRecorder(const LocationRecorder & parent)344 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent) {
345   Init(parent);
346 }
347 
LocationRecorder(const LocationRecorder & parent,int path1)348 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
349                                            int path1) {
350   Init(parent);
351   AddPath(path1);
352 }
353 
LocationRecorder(const LocationRecorder & parent,int path1,int path2)354 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
355                                            int path1, int path2) {
356   Init(parent);
357   AddPath(path1);
358   AddPath(path2);
359 }
360 
Init(const LocationRecorder & parent)361 void Parser::LocationRecorder::Init(const LocationRecorder& parent) {
362   parser_ = parent.parser_;
363   location_ = parser_->source_code_info_->add_location();
364   location_->mutable_path()->CopyFrom(parent.location_->path());
365 
366   location_->add_span(parser_->input_->current().line);
367   location_->add_span(parser_->input_->current().column);
368 }
369 
~LocationRecorder()370 Parser::LocationRecorder::~LocationRecorder() {
371   if (location_->span_size() <= 2) {
372     EndAt(parser_->input_->previous());
373   }
374 }
375 
AddPath(int path_component)376 void Parser::LocationRecorder::AddPath(int path_component) {
377   location_->add_path(path_component);
378 }
379 
StartAt(const io::Tokenizer::Token & token)380 void Parser::LocationRecorder::StartAt(const io::Tokenizer::Token& token) {
381   location_->set_span(0, token.line);
382   location_->set_span(1, token.column);
383 }
384 
StartAt(const LocationRecorder & other)385 void Parser::LocationRecorder::StartAt(const LocationRecorder& other) {
386   location_->set_span(0, other.location_->span(0));
387   location_->set_span(1, other.location_->span(1));
388 }
389 
EndAt(const io::Tokenizer::Token & token)390 void Parser::LocationRecorder::EndAt(const io::Tokenizer::Token& token) {
391   if (token.line != location_->span(0)) {
392     location_->add_span(token.line);
393   }
394   location_->add_span(token.end_column);
395 }
396 
RecordLegacyLocation(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location)397 void Parser::LocationRecorder::RecordLegacyLocation(const Message* descriptor,
398     DescriptorPool::ErrorCollector::ErrorLocation location) {
399   if (parser_->source_location_table_ != NULL) {
400     parser_->source_location_table_->Add(
401         descriptor, location, location_->span(0), location_->span(1));
402   }
403 }
404 
AttachComments(string * leading,string * trailing,vector<string> * detached_comments) const405 void Parser::LocationRecorder::AttachComments(
406     string* leading, string* trailing,
407     vector<string>* detached_comments) const {
408   GOOGLE_CHECK(!location_->has_leading_comments());
409   GOOGLE_CHECK(!location_->has_trailing_comments());
410 
411   if (!leading->empty()) {
412     location_->mutable_leading_comments()->swap(*leading);
413   }
414   if (!trailing->empty()) {
415     location_->mutable_trailing_comments()->swap(*trailing);
416   }
417   for (int i = 0; i < detached_comments->size(); ++i) {
418     location_->add_leading_detached_comments()->swap(
419         (*detached_comments)[i]);
420   }
421   detached_comments->clear();
422 }
423 
424 // -------------------------------------------------------------------
425 
SkipStatement()426 void Parser::SkipStatement() {
427   while (true) {
428     if (AtEnd()) {
429       return;
430     } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
431       if (TryConsumeEndOfDeclaration(";", NULL)) {
432         return;
433       } else if (TryConsume("{")) {
434         SkipRestOfBlock();
435         return;
436       } else if (LookingAt("}")) {
437         return;
438       }
439     }
440     input_->Next();
441   }
442 }
443 
SkipRestOfBlock()444 void Parser::SkipRestOfBlock() {
445   while (true) {
446     if (AtEnd()) {
447       return;
448     } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
449       if (TryConsumeEndOfDeclaration("}", NULL)) {
450         return;
451       } else if (TryConsume("{")) {
452         SkipRestOfBlock();
453       }
454     }
455     input_->Next();
456   }
457 }
458 
459 // ===================================================================
460 
ValidateEnum(const EnumDescriptorProto * proto)461 bool Parser::ValidateEnum(const EnumDescriptorProto* proto) {
462   bool has_allow_alias = false;
463   bool allow_alias = false;
464 
465   for (int i = 0; i < proto->options().uninterpreted_option_size(); i++) {
466     const UninterpretedOption option = proto->options().uninterpreted_option(i);
467     if (option.name_size() > 1) {
468       continue;
469     }
470     if (!option.name(0).is_extension() &&
471         option.name(0).name_part() == "allow_alias") {
472       has_allow_alias = true;
473       if (option.identifier_value() == "true") {
474         allow_alias = true;
475       }
476       break;
477     }
478   }
479 
480   if (has_allow_alias && !allow_alias) {
481     string error =
482         "\"" + proto->name() +
483         "\" declares 'option allow_alias = false;' which has no effect. "
484         "Please remove the declaration.";
485     // This needlessly clutters declarations with nops.
486     AddError(error);
487     return false;
488   }
489 
490   set<int> used_values;
491   bool has_duplicates = false;
492   for (int i = 0; i < proto->value_size(); ++i) {
493     const EnumValueDescriptorProto enum_value = proto->value(i);
494     if (used_values.find(enum_value.number()) != used_values.end()) {
495       has_duplicates = true;
496       break;
497     } else {
498       used_values.insert(enum_value.number());
499     }
500   }
501   if (allow_alias && !has_duplicates) {
502     string error =
503         "\"" + proto->name() +
504         "\" declares support for enum aliases but no enum values share field "
505         "numbers. Please remove the unnecessary 'option allow_alias = true;' "
506         "declaration.";
507     // Generate an error if an enum declares support for duplicate enum values
508     // and does not use it protect future authors.
509     AddError(error);
510     return false;
511   }
512 
513   return true;
514 }
515 
Parse(io::Tokenizer * input,FileDescriptorProto * file)516 bool Parser::Parse(io::Tokenizer* input, FileDescriptorProto* file) {
517   input_ = input;
518   had_errors_ = false;
519   syntax_identifier_.clear();
520 
521   // Note that |file| could be NULL at this point if
522   // stop_after_syntax_identifier_ is true.  So, we conservatively allocate
523   // SourceCodeInfo on the stack, then swap it into the FileDescriptorProto
524   // later on.
525   SourceCodeInfo source_code_info;
526   source_code_info_ = &source_code_info;
527 
528   vector<string> top_doc_comments;
529   if (LookingAtType(io::Tokenizer::TYPE_START)) {
530     // Advance to first token.
531     input_->NextWithComments(NULL, &upcoming_detached_comments_,
532                              &upcoming_doc_comments_);
533   }
534 
535   {
536     LocationRecorder root_location(this);
537 
538     if (require_syntax_identifier_ || LookingAt("syntax")) {
539       if (!ParseSyntaxIdentifier(root_location)) {
540         // Don't attempt to parse the file if we didn't recognize the syntax
541         // identifier.
542         return false;
543       }
544       // Store the syntax into the file.
545       if (file != NULL) file->set_syntax(syntax_identifier_);
546     } else if (!stop_after_syntax_identifier_) {
547       GOOGLE_LOG(WARNING) << "No syntax specified for the proto file: "
548                    << file->name() << ". Please use 'syntax = \"proto2\";' "
549                    << "or 'syntax = \"proto3\";' to specify a syntax "
550                    << "version. (Defaulted to proto2 syntax.)";
551       syntax_identifier_ = "proto2";
552     }
553 
554     if (stop_after_syntax_identifier_) return !had_errors_;
555 
556     // Repeatedly parse statements until we reach the end of the file.
557     while (!AtEnd()) {
558       if (!ParseTopLevelStatement(file, root_location)) {
559         // This statement failed to parse.  Skip it, but keep looping to parse
560         // other statements.
561         SkipStatement();
562 
563         if (LookingAt("}")) {
564           AddError("Unmatched \"}\".");
565           input_->NextWithComments(NULL, &upcoming_detached_comments_,
566                                    &upcoming_doc_comments_);
567         }
568       }
569     }
570   }
571 
572   input_ = NULL;
573   source_code_info_ = NULL;
574   source_code_info.Swap(file->mutable_source_code_info());
575   return !had_errors_;
576 }
577 
ParseSyntaxIdentifier(const LocationRecorder & parent)578 bool Parser::ParseSyntaxIdentifier(const LocationRecorder& parent) {
579   LocationRecorder syntax_location(parent,
580                                    FileDescriptorProto::kSyntaxFieldNumber);
581   DO(Consume(
582       "syntax",
583       "File must begin with a syntax statement, e.g. 'syntax = \"proto2\";'."));
584   DO(Consume("="));
585   io::Tokenizer::Token syntax_token = input_->current();
586   string syntax;
587   DO(ConsumeString(&syntax, "Expected syntax identifier."));
588   DO(ConsumeEndOfDeclaration(";", &syntax_location));
589 
590   syntax_identifier_ = syntax;
591 
592   if (syntax != "proto2" && syntax != "proto3" &&
593       !stop_after_syntax_identifier_) {
594     AddError(syntax_token.line, syntax_token.column,
595       "Unrecognized syntax identifier \"" + syntax + "\".  This parser "
596       "only recognizes \"proto2\" and \"proto3\".");
597     return false;
598   }
599 
600   return true;
601 }
602 
ParseTopLevelStatement(FileDescriptorProto * file,const LocationRecorder & root_location)603 bool Parser::ParseTopLevelStatement(FileDescriptorProto* file,
604                                     const LocationRecorder& root_location) {
605   if (TryConsumeEndOfDeclaration(";", NULL)) {
606     // empty statement; ignore
607     return true;
608   } else if (LookingAt("message")) {
609     LocationRecorder location(root_location,
610       FileDescriptorProto::kMessageTypeFieldNumber, file->message_type_size());
611     return ParseMessageDefinition(file->add_message_type(), location, file);
612   } else if (LookingAt("enum")) {
613     LocationRecorder location(root_location,
614       FileDescriptorProto::kEnumTypeFieldNumber, file->enum_type_size());
615     return ParseEnumDefinition(file->add_enum_type(), location, file);
616   } else if (LookingAt("service")) {
617     LocationRecorder location(root_location,
618       FileDescriptorProto::kServiceFieldNumber, file->service_size());
619     return ParseServiceDefinition(file->add_service(), location, file);
620   } else if (LookingAt("extend")) {
621     LocationRecorder location(root_location,
622         FileDescriptorProto::kExtensionFieldNumber);
623     return ParseExtend(file->mutable_extension(),
624                        file->mutable_message_type(),
625                        root_location,
626                        FileDescriptorProto::kMessageTypeFieldNumber,
627                        location, file);
628   } else if (LookingAt("import")) {
629     return ParseImport(file->mutable_dependency(),
630                        file->mutable_public_dependency(),
631                        file->mutable_weak_dependency(),
632                        root_location, file);
633   } else if (LookingAt("package")) {
634     return ParsePackage(file, root_location, file);
635   } else if (LookingAt("option")) {
636     LocationRecorder location(root_location,
637         FileDescriptorProto::kOptionsFieldNumber);
638     return ParseOption(file->mutable_options(), location, file,
639                        OPTION_STATEMENT);
640   } else {
641     AddError("Expected top-level statement (e.g. \"message\").");
642     return false;
643   }
644 }
645 
646 // -------------------------------------------------------------------
647 // Messages
648 
ParseMessageDefinition(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)649 bool Parser::ParseMessageDefinition(
650     DescriptorProto* message,
651     const LocationRecorder& message_location,
652     const FileDescriptorProto* containing_file) {
653   DO(Consume("message"));
654   {
655     LocationRecorder location(message_location,
656                               DescriptorProto::kNameFieldNumber);
657     location.RecordLegacyLocation(
658         message, DescriptorPool::ErrorCollector::NAME);
659     DO(ConsumeIdentifier(message->mutable_name(), "Expected message name."));
660   }
661   DO(ParseMessageBlock(message, message_location, containing_file));
662   return true;
663 }
664 
665 namespace {
666 
667 const int kMaxExtensionRangeSentinel = -1;
668 
IsMessageSetWireFormatMessage(const DescriptorProto & message)669 bool IsMessageSetWireFormatMessage(const DescriptorProto& message) {
670   const MessageOptions& options = message.options();
671   for (int i = 0; i < options.uninterpreted_option_size(); ++i) {
672     const UninterpretedOption& uninterpreted = options.uninterpreted_option(i);
673     if (uninterpreted.name_size() == 1 &&
674         uninterpreted.name(0).name_part() == "message_set_wire_format" &&
675         uninterpreted.identifier_value() == "true") {
676       return true;
677     }
678   }
679   return false;
680 }
681 
682 // Modifies any extension ranges that specified 'max' as the end of the
683 // extension range, and sets them to the type-specific maximum. The actual max
684 // tag number can only be determined after all options have been parsed.
AdjustExtensionRangesWithMaxEndNumber(DescriptorProto * message)685 void AdjustExtensionRangesWithMaxEndNumber(DescriptorProto* message) {
686   const bool is_message_set = IsMessageSetWireFormatMessage(*message);
687   const int max_extension_number = is_message_set ?
688       kint32max :
689       FieldDescriptor::kMaxNumber + 1;
690   for (int i = 0; i < message->extension_range_size(); ++i) {
691     if (message->extension_range(i).end() == kMaxExtensionRangeSentinel) {
692       message->mutable_extension_range(i)->set_end(max_extension_number);
693     }
694   }
695 }
696 
697 }  // namespace
698 
ParseMessageBlock(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)699 bool Parser::ParseMessageBlock(DescriptorProto* message,
700                                const LocationRecorder& message_location,
701                                const FileDescriptorProto* containing_file) {
702   DO(ConsumeEndOfDeclaration("{", &message_location));
703 
704   while (!TryConsumeEndOfDeclaration("}", NULL)) {
705     if (AtEnd()) {
706       AddError("Reached end of input in message definition (missing '}').");
707       return false;
708     }
709 
710     if (!ParseMessageStatement(message, message_location, containing_file)) {
711       // This statement failed to parse.  Skip it, but keep looping to parse
712       // other statements.
713       SkipStatement();
714     }
715   }
716 
717   if (message->extension_range_size() > 0) {
718     AdjustExtensionRangesWithMaxEndNumber(message);
719   }
720   return true;
721 }
722 
ParseMessageStatement(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)723 bool Parser::ParseMessageStatement(DescriptorProto* message,
724                                    const LocationRecorder& message_location,
725                                    const FileDescriptorProto* containing_file) {
726   if (TryConsumeEndOfDeclaration(";", NULL)) {
727     // empty statement; ignore
728     return true;
729   } else if (LookingAt("message")) {
730     LocationRecorder location(message_location,
731                               DescriptorProto::kNestedTypeFieldNumber,
732                               message->nested_type_size());
733     return ParseMessageDefinition(message->add_nested_type(), location,
734                                   containing_file);
735   } else if (LookingAt("enum")) {
736     LocationRecorder location(message_location,
737                               DescriptorProto::kEnumTypeFieldNumber,
738                               message->enum_type_size());
739     return ParseEnumDefinition(message->add_enum_type(), location,
740                                containing_file);
741   } else if (LookingAt("extensions")) {
742     LocationRecorder location(message_location,
743                               DescriptorProto::kExtensionRangeFieldNumber);
744     return ParseExtensions(message, location, containing_file);
745   } else if (LookingAt("reserved")) {
746     return ParseReserved(message, message_location);
747   } else if (LookingAt("extend")) {
748     LocationRecorder location(message_location,
749                               DescriptorProto::kExtensionFieldNumber);
750     return ParseExtend(message->mutable_extension(),
751                        message->mutable_nested_type(),
752                        message_location,
753                        DescriptorProto::kNestedTypeFieldNumber,
754                        location, containing_file);
755   } else if (LookingAt("option")) {
756     LocationRecorder location(message_location,
757                               DescriptorProto::kOptionsFieldNumber);
758     return ParseOption(message->mutable_options(), location,
759                        containing_file, OPTION_STATEMENT);
760   } else if (LookingAt("oneof")) {
761     int oneof_index = message->oneof_decl_size();
762     LocationRecorder oneof_location(message_location,
763                                     DescriptorProto::kOneofDeclFieldNumber,
764                                     oneof_index);
765 
766     return ParseOneof(message->add_oneof_decl(), message,
767                       oneof_index, oneof_location, message_location,
768                       containing_file);
769   } else {
770     LocationRecorder location(message_location,
771                               DescriptorProto::kFieldFieldNumber,
772                               message->field_size());
773     return ParseMessageField(message->add_field(),
774                              message->mutable_nested_type(),
775                              message_location,
776                              DescriptorProto::kNestedTypeFieldNumber,
777                              location,
778                              containing_file);
779   }
780 }
781 
ParseMessageField(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)782 bool Parser::ParseMessageField(FieldDescriptorProto* field,
783                                RepeatedPtrField<DescriptorProto>* messages,
784                                const LocationRecorder& parent_location,
785                                int location_field_number_for_nested_type,
786                                const LocationRecorder& field_location,
787                                const FileDescriptorProto* containing_file) {
788   {
789     LocationRecorder location(field_location,
790                               FieldDescriptorProto::kLabelFieldNumber);
791     FieldDescriptorProto::Label label;
792     if (ParseLabel(&label, containing_file)) {
793       field->set_label(label);
794       if (label == FieldDescriptorProto::LABEL_OPTIONAL &&
795           syntax_identifier_ == "proto3") {
796         AddError(
797             "Explicit 'optional' labels are disallowed in the Proto3 syntax. "
798             "To define 'optional' fields in Proto3, simply remove the "
799             "'optional' label, as fields are 'optional' by default.");
800       }
801     }
802   }
803 
804   return ParseMessageFieldNoLabel(field, messages, parent_location,
805                                   location_field_number_for_nested_type,
806                                   field_location,
807                                   containing_file);
808 }
809 
ParseMessageFieldNoLabel(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)810 bool Parser::ParseMessageFieldNoLabel(
811     FieldDescriptorProto* field,
812     RepeatedPtrField<DescriptorProto>* messages,
813     const LocationRecorder& parent_location,
814     int location_field_number_for_nested_type,
815     const LocationRecorder& field_location,
816     const FileDescriptorProto* containing_file) {
817   MapField map_field;
818   // Parse type.
819   {
820     LocationRecorder location(field_location);  // add path later
821     location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::TYPE);
822 
823     bool type_parsed = false;
824     FieldDescriptorProto::Type type = FieldDescriptorProto::TYPE_INT32;
825     string type_name;
826 
827     // Special case map field. We only treat the field as a map field if the
828     // field type name starts with the word "map" with a following "<".
829     if (TryConsume("map")) {
830       if (LookingAt("<")) {
831         map_field.is_map_field = true;
832       } else {
833         // False positive
834         type_parsed = true;
835         type_name = "map";
836       }
837     }
838     if (map_field.is_map_field) {
839       if (field->has_oneof_index()) {
840         AddError("Map fields are not allowed in oneofs.");
841         return false;
842       }
843       if (field->has_label()) {
844         AddError(
845             "Field labels (required/optional/repeated) are not allowed on "
846             "map fields.");
847         return false;
848       }
849       if (field->has_extendee()) {
850         AddError("Map fields are not allowed to be extensions.");
851         return false;
852       }
853       field->set_label(FieldDescriptorProto::LABEL_REPEATED);
854       DO(Consume("<"));
855       DO(ParseType(&map_field.key_type, &map_field.key_type_name));
856       DO(Consume(","));
857       DO(ParseType(&map_field.value_type, &map_field.value_type_name));
858       DO(Consume(">"));
859       // Defer setting of the type name of the map field until the
860       // field name is parsed. Add the source location though.
861       location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
862     } else {
863       // Handle the case where no explicit label is given for a non-map field.
864       if (!field->has_label() && DefaultToOptionalFields()) {
865         field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
866       }
867       if (!field->has_label()) {
868         AddError("Expected \"required\", \"optional\", or \"repeated\".");
869         // We can actually reasonably recover here by just assuming the user
870         // forgot the label altogether.
871         field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
872       }
873 
874       // Handle the case where the actual type is a message or enum named "map",
875       // which we already consumed in the code above.
876       if (!type_parsed) {
877         DO(ParseType(&type, &type_name));
878       }
879       if (type_name.empty()) {
880         location.AddPath(FieldDescriptorProto::kTypeFieldNumber);
881         field->set_type(type);
882       } else {
883         location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
884         field->set_type_name(type_name);
885       }
886     }
887   }
888 
889   // Parse name and '='.
890   io::Tokenizer::Token name_token = input_->current();
891   {
892     LocationRecorder location(field_location,
893                               FieldDescriptorProto::kNameFieldNumber);
894     location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::NAME);
895     DO(ConsumeIdentifier(field->mutable_name(), "Expected field name."));
896   }
897   DO(Consume("=", "Missing field number."));
898 
899   // Parse field number.
900   {
901     LocationRecorder location(field_location,
902                               FieldDescriptorProto::kNumberFieldNumber);
903     location.RecordLegacyLocation(
904         field, DescriptorPool::ErrorCollector::NUMBER);
905     int number;
906     DO(ConsumeInteger(&number, "Expected field number."));
907     field->set_number(number);
908   }
909 
910   // Parse options.
911   DO(ParseFieldOptions(field, field_location, containing_file));
912 
913   // Deal with groups.
914   if (field->has_type() && field->type() == FieldDescriptorProto::TYPE_GROUP) {
915     // Awkward:  Since a group declares both a message type and a field, we
916     //   have to create overlapping locations.
917     LocationRecorder group_location(parent_location);
918     group_location.StartAt(field_location);
919     group_location.AddPath(location_field_number_for_nested_type);
920     group_location.AddPath(messages->size());
921 
922     DescriptorProto* group = messages->Add();
923     group->set_name(field->name());
924 
925     // Record name location to match the field name's location.
926     {
927       LocationRecorder location(group_location,
928                                 DescriptorProto::kNameFieldNumber);
929       location.StartAt(name_token);
930       location.EndAt(name_token);
931       location.RecordLegacyLocation(
932           group, DescriptorPool::ErrorCollector::NAME);
933     }
934 
935     // The field's type_name also comes from the name.  Confusing!
936     {
937       LocationRecorder location(field_location,
938                                 FieldDescriptorProto::kTypeNameFieldNumber);
939       location.StartAt(name_token);
940       location.EndAt(name_token);
941     }
942 
943     // As a hack for backwards-compatibility, we force the group name to start
944     // with a capital letter and lower-case the field name.  New code should
945     // not use groups; it should use nested messages.
946     if (group->name()[0] < 'A' || 'Z' < group->name()[0]) {
947       AddError(name_token.line, name_token.column,
948         "Group names must start with a capital letter.");
949     }
950     LowerString(field->mutable_name());
951 
952     field->set_type_name(group->name());
953     if (LookingAt("{")) {
954       DO(ParseMessageBlock(group, group_location, containing_file));
955     } else {
956       AddError("Missing group body.");
957       return false;
958     }
959   } else {
960     DO(ConsumeEndOfDeclaration(";", &field_location));
961   }
962 
963   // Create a map entry type if this is a map field.
964   if (map_field.is_map_field) {
965     GenerateMapEntry(map_field, field, messages);
966   }
967 
968   return true;
969 }
970 
GenerateMapEntry(const MapField & map_field,FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages)971 void Parser::GenerateMapEntry(const MapField& map_field,
972                               FieldDescriptorProto* field,
973                               RepeatedPtrField<DescriptorProto>* messages) {
974   DescriptorProto* entry = messages->Add();
975   string entry_name = MapEntryName(field->name());
976   field->set_type_name(entry_name);
977   entry->set_name(entry_name);
978   entry->mutable_options()->set_map_entry(true);
979   FieldDescriptorProto* key_field = entry->add_field();
980   key_field->set_name("key");
981   key_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
982   key_field->set_number(1);
983   if (map_field.key_type_name.empty()) {
984     key_field->set_type(map_field.key_type);
985   } else {
986     key_field->set_type_name(map_field.key_type_name);
987   }
988   FieldDescriptorProto* value_field = entry->add_field();
989   value_field->set_name("value");
990   value_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
991   value_field->set_number(2);
992   if (map_field.value_type_name.empty()) {
993     value_field->set_type(map_field.value_type);
994   } else {
995     value_field->set_type_name(map_field.value_type_name);
996   }
997   // Propagate the "enforce_utf8" option to key and value fields if they
998   // are strings. This helps simplify the implementation of code generators
999   // and also reflection-based parsing code.
1000   //
1001   // The following definition:
1002   //   message Foo {
1003   //     map<string, string> value = 1 [enforce_utf8 = false];
1004   //   }
1005   // will be interpreted as:
1006   //   message Foo {
1007   //     message ValueEntry {
1008   //       option map_entry = true;
1009   //       string key = 1 [enforce_utf8 = false];
1010   //       string value = 2 [enforce_utf8 = false];
1011   //     }
1012   //     repeated ValueEntry value = 1 [enforce_utf8 = false];
1013   //  }
1014   //
1015   // TODO(xiaofeng): Remove this when the "enforce_utf8" option is removed
1016   // from protocol compiler.
1017   for (int i = 0; i < field->options().uninterpreted_option_size(); ++i) {
1018     const UninterpretedOption& option =
1019         field->options().uninterpreted_option(i);
1020     if (option.name_size() == 1 &&
1021         option.name(0).name_part() == "enforce_utf8" &&
1022         !option.name(0).is_extension()) {
1023       if (key_field->type() == FieldDescriptorProto::TYPE_STRING) {
1024         key_field->mutable_options()->add_uninterpreted_option()
1025             ->CopyFrom(option);
1026       }
1027       if (value_field->type() == FieldDescriptorProto::TYPE_STRING) {
1028         value_field->mutable_options()->add_uninterpreted_option()
1029             ->CopyFrom(option);
1030       }
1031     }
1032   }
1033 }
1034 
ParseFieldOptions(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1035 bool Parser::ParseFieldOptions(FieldDescriptorProto* field,
1036                                const LocationRecorder& field_location,
1037                                const FileDescriptorProto* containing_file) {
1038   if (!LookingAt("[")) return true;
1039 
1040   LocationRecorder location(field_location,
1041                             FieldDescriptorProto::kOptionsFieldNumber);
1042 
1043   DO(Consume("["));
1044 
1045   // Parse field options.
1046   do {
1047     if (LookingAt("default")) {
1048       // We intentionally pass field_location rather than location here, since
1049       // the default value is not actually an option.
1050       DO(ParseDefaultAssignment(field, field_location, containing_file));
1051     } else if (LookingAt("json_name")) {
1052       // Like default value, this "json_name" is not an actual option.
1053       DO(ParseJsonName(field, field_location, containing_file));
1054     } else {
1055       DO(ParseOption(field->mutable_options(), location,
1056                      containing_file, OPTION_ASSIGNMENT));
1057     }
1058   } while (TryConsume(","));
1059 
1060   DO(Consume("]"));
1061   return true;
1062 }
1063 
ParseDefaultAssignment(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1064 bool Parser::ParseDefaultAssignment(
1065     FieldDescriptorProto* field,
1066     const LocationRecorder& field_location,
1067     const FileDescriptorProto* containing_file) {
1068   if (field->has_default_value()) {
1069     AddError("Already set option \"default\".");
1070     field->clear_default_value();
1071   }
1072 
1073   DO(Consume("default"));
1074   DO(Consume("="));
1075 
1076   LocationRecorder location(field_location,
1077                             FieldDescriptorProto::kDefaultValueFieldNumber);
1078   location.RecordLegacyLocation(
1079       field, DescriptorPool::ErrorCollector::DEFAULT_VALUE);
1080   string* default_value = field->mutable_default_value();
1081 
1082   if (!field->has_type()) {
1083     // The field has a type name, but we don't know if it is a message or an
1084     // enum yet. (If it were a primitive type, |field| would have a type set
1085     // already.) In this case, simply take the current string as the default
1086     // value; we will catch the error later if it is not a valid enum value.
1087     // (N.B. that we do not check whether the current token is an identifier:
1088     // doing so throws strange errors when the user mistypes a primitive
1089     // typename and we assume it's an enum. E.g.: "optional int foo = 1 [default
1090     // = 42]". In such a case the fundamental error is really that "int" is not
1091     // a type, not that "42" is not an identifier. See b/12533582.)
1092     *default_value = input_->current().text;
1093     input_->Next();
1094     return true;
1095   }
1096 
1097   switch (field->type()) {
1098     case FieldDescriptorProto::TYPE_INT32:
1099     case FieldDescriptorProto::TYPE_INT64:
1100     case FieldDescriptorProto::TYPE_SINT32:
1101     case FieldDescriptorProto::TYPE_SINT64:
1102     case FieldDescriptorProto::TYPE_SFIXED32:
1103     case FieldDescriptorProto::TYPE_SFIXED64: {
1104       uint64 max_value = kint64max;
1105       if (field->type() == FieldDescriptorProto::TYPE_INT32 ||
1106           field->type() == FieldDescriptorProto::TYPE_SINT32 ||
1107           field->type() == FieldDescriptorProto::TYPE_SFIXED32) {
1108         max_value = kint32max;
1109       }
1110 
1111       // These types can be negative.
1112       if (TryConsume("-")) {
1113         default_value->append("-");
1114         // Two's complement always has one more negative value than positive.
1115         ++max_value;
1116       }
1117       // Parse the integer to verify that it is not out-of-range.
1118       uint64 value;
1119       DO(ConsumeInteger64(max_value, &value,
1120                           "Expected integer for field default value."));
1121       // And stringify it again.
1122       default_value->append(SimpleItoa(value));
1123       break;
1124     }
1125 
1126     case FieldDescriptorProto::TYPE_UINT32:
1127     case FieldDescriptorProto::TYPE_UINT64:
1128     case FieldDescriptorProto::TYPE_FIXED32:
1129     case FieldDescriptorProto::TYPE_FIXED64: {
1130       uint64 max_value = kuint64max;
1131       if (field->type() == FieldDescriptorProto::TYPE_UINT32 ||
1132           field->type() == FieldDescriptorProto::TYPE_FIXED32) {
1133         max_value = kuint32max;
1134       }
1135 
1136       // Numeric, not negative.
1137       if (TryConsume("-")) {
1138         AddError("Unsigned field can't have negative default value.");
1139       }
1140       // Parse the integer to verify that it is not out-of-range.
1141       uint64 value;
1142       DO(ConsumeInteger64(max_value, &value,
1143                           "Expected integer for field default value."));
1144       // And stringify it again.
1145       default_value->append(SimpleItoa(value));
1146       break;
1147     }
1148 
1149     case FieldDescriptorProto::TYPE_FLOAT:
1150     case FieldDescriptorProto::TYPE_DOUBLE:
1151       // These types can be negative.
1152       if (TryConsume("-")) {
1153         default_value->append("-");
1154       }
1155       // Parse the integer because we have to convert hex integers to decimal
1156       // floats.
1157       double value;
1158       DO(ConsumeNumber(&value, "Expected number."));
1159       // And stringify it again.
1160       default_value->append(SimpleDtoa(value));
1161       break;
1162 
1163     case FieldDescriptorProto::TYPE_BOOL:
1164       if (TryConsume("true")) {
1165         default_value->assign("true");
1166       } else if (TryConsume("false")) {
1167         default_value->assign("false");
1168       } else {
1169         AddError("Expected \"true\" or \"false\".");
1170         return false;
1171       }
1172       break;
1173 
1174     case FieldDescriptorProto::TYPE_STRING:
1175       // Note: When file opton java_string_check_utf8 is true, if a
1176       // non-string representation (eg byte[]) is later supported, it must
1177       // be checked for UTF-8-ness.
1178       DO(ConsumeString(default_value, "Expected string for field default "
1179                        "value."));
1180       break;
1181 
1182     case FieldDescriptorProto::TYPE_BYTES:
1183       DO(ConsumeString(default_value, "Expected string."));
1184       *default_value = CEscape(*default_value);
1185       break;
1186 
1187     case FieldDescriptorProto::TYPE_ENUM:
1188       DO(ConsumeIdentifier(default_value, "Expected enum identifier for field "
1189                                           "default value."));
1190       break;
1191 
1192     case FieldDescriptorProto::TYPE_MESSAGE:
1193     case FieldDescriptorProto::TYPE_GROUP:
1194       AddError("Messages can't have default values.");
1195       return false;
1196   }
1197 
1198   return true;
1199 }
1200 
ParseJsonName(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1201 bool Parser::ParseJsonName(
1202     FieldDescriptorProto* field,
1203     const LocationRecorder& field_location,
1204     const FileDescriptorProto* containing_file) {
1205   if (field->has_json_name()) {
1206     AddError("Already set option \"json_name\".");
1207     field->clear_json_name();
1208   }
1209 
1210   DO(Consume("json_name"));
1211   DO(Consume("="));
1212 
1213   LocationRecorder location(field_location,
1214                             FieldDescriptorProto::kJsonNameFieldNumber);
1215   location.RecordLegacyLocation(
1216       field, DescriptorPool::ErrorCollector::OPTION_VALUE);
1217   DO(ConsumeString(field->mutable_json_name(),
1218                    "Expected string for JSON name."));
1219   return true;
1220 }
1221 
1222 
ParseOptionNamePart(UninterpretedOption * uninterpreted_option,const LocationRecorder & part_location,const FileDescriptorProto * containing_file)1223 bool Parser::ParseOptionNamePart(UninterpretedOption* uninterpreted_option,
1224                                  const LocationRecorder& part_location,
1225                                  const FileDescriptorProto* containing_file) {
1226   UninterpretedOption::NamePart* name = uninterpreted_option->add_name();
1227   string identifier;  // We parse identifiers into this string.
1228   if (LookingAt("(")) {  // This is an extension.
1229     DO(Consume("("));
1230 
1231     {
1232       LocationRecorder location(
1233           part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1234       // An extension name consists of dot-separated identifiers, and may begin
1235       // with a dot.
1236       if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1237         DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1238         name->mutable_name_part()->append(identifier);
1239       }
1240       while (LookingAt(".")) {
1241         DO(Consume("."));
1242         name->mutable_name_part()->append(".");
1243         DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1244         name->mutable_name_part()->append(identifier);
1245       }
1246     }
1247 
1248     DO(Consume(")"));
1249     name->set_is_extension(true);
1250   } else {  // This is a regular field.
1251     LocationRecorder location(
1252         part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1253     DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1254     name->mutable_name_part()->append(identifier);
1255     name->set_is_extension(false);
1256   }
1257   return true;
1258 }
1259 
ParseUninterpretedBlock(string * value)1260 bool Parser::ParseUninterpretedBlock(string* value) {
1261   // Note that enclosing braces are not added to *value.
1262   // We do NOT use ConsumeEndOfStatement for this brace because it's delimiting
1263   // an expression, not a block of statements.
1264   DO(Consume("{"));
1265   int brace_depth = 1;
1266   while (!AtEnd()) {
1267     if (LookingAt("{")) {
1268       brace_depth++;
1269     } else if (LookingAt("}")) {
1270       brace_depth--;
1271       if (brace_depth == 0) {
1272         input_->Next();
1273         return true;
1274       }
1275     }
1276     // TODO(sanjay): Interpret line/column numbers to preserve formatting
1277     if (!value->empty()) value->push_back(' ');
1278     value->append(input_->current().text);
1279     input_->Next();
1280   }
1281   AddError("Unexpected end of stream while parsing aggregate value.");
1282   return false;
1283 }
1284 
1285 // We don't interpret the option here. Instead we store it in an
1286 // UninterpretedOption, to be interpreted later.
ParseOption(Message * options,const LocationRecorder & options_location,const FileDescriptorProto * containing_file,OptionStyle style)1287 bool Parser::ParseOption(Message* options,
1288                          const LocationRecorder& options_location,
1289                          const FileDescriptorProto* containing_file,
1290                          OptionStyle style) {
1291   // Create an entry in the uninterpreted_option field.
1292   const FieldDescriptor* uninterpreted_option_field = options->GetDescriptor()->
1293       FindFieldByName("uninterpreted_option");
1294   GOOGLE_CHECK(uninterpreted_option_field != NULL)
1295       << "No field named \"uninterpreted_option\" in the Options proto.";
1296 
1297   const Reflection* reflection = options->GetReflection();
1298 
1299   LocationRecorder location(
1300       options_location, uninterpreted_option_field->number(),
1301       reflection->FieldSize(*options, uninterpreted_option_field));
1302 
1303   if (style == OPTION_STATEMENT) {
1304     DO(Consume("option"));
1305   }
1306 
1307   UninterpretedOption* uninterpreted_option = down_cast<UninterpretedOption*>(
1308       options->GetReflection()->AddMessage(options,
1309                                            uninterpreted_option_field));
1310 
1311   // Parse dot-separated name.
1312   {
1313     LocationRecorder name_location(location,
1314                                    UninterpretedOption::kNameFieldNumber);
1315     name_location.RecordLegacyLocation(
1316         uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_NAME);
1317 
1318     {
1319       LocationRecorder part_location(name_location,
1320                                      uninterpreted_option->name_size());
1321       DO(ParseOptionNamePart(uninterpreted_option, part_location,
1322                              containing_file));
1323     }
1324 
1325     while (LookingAt(".")) {
1326       DO(Consume("."));
1327       LocationRecorder part_location(name_location,
1328                                      uninterpreted_option->name_size());
1329       DO(ParseOptionNamePart(uninterpreted_option, part_location,
1330                              containing_file));
1331     }
1332   }
1333 
1334   DO(Consume("="));
1335 
1336   {
1337     LocationRecorder value_location(location);
1338     value_location.RecordLegacyLocation(
1339         uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_VALUE);
1340 
1341     // All values are a single token, except for negative numbers, which consist
1342     // of a single '-' symbol, followed by a positive number.
1343     bool is_negative = TryConsume("-");
1344 
1345     switch (input_->current().type) {
1346       case io::Tokenizer::TYPE_START:
1347         GOOGLE_LOG(FATAL) << "Trying to read value before any tokens have been read.";
1348         return false;
1349 
1350       case io::Tokenizer::TYPE_END:
1351         AddError("Unexpected end of stream while parsing option value.");
1352         return false;
1353 
1354       case io::Tokenizer::TYPE_IDENTIFIER: {
1355         value_location.AddPath(
1356             UninterpretedOption::kIdentifierValueFieldNumber);
1357         if (is_negative) {
1358           AddError("Invalid '-' symbol before identifier.");
1359           return false;
1360         }
1361         string value;
1362         DO(ConsumeIdentifier(&value, "Expected identifier."));
1363         uninterpreted_option->set_identifier_value(value);
1364         break;
1365       }
1366 
1367       case io::Tokenizer::TYPE_INTEGER: {
1368         uint64 value;
1369         uint64 max_value =
1370             is_negative ? static_cast<uint64>(kint64max) + 1 : kuint64max;
1371         DO(ConsumeInteger64(max_value, &value, "Expected integer."));
1372         if (is_negative) {
1373           value_location.AddPath(
1374               UninterpretedOption::kNegativeIntValueFieldNumber);
1375           uninterpreted_option->set_negative_int_value(
1376               -static_cast<int64>(value));
1377         } else {
1378           value_location.AddPath(
1379               UninterpretedOption::kPositiveIntValueFieldNumber);
1380           uninterpreted_option->set_positive_int_value(value);
1381         }
1382         break;
1383       }
1384 
1385       case io::Tokenizer::TYPE_FLOAT: {
1386         value_location.AddPath(UninterpretedOption::kDoubleValueFieldNumber);
1387         double value;
1388         DO(ConsumeNumber(&value, "Expected number."));
1389         uninterpreted_option->set_double_value(is_negative ? -value : value);
1390         break;
1391       }
1392 
1393       case io::Tokenizer::TYPE_STRING: {
1394         value_location.AddPath(UninterpretedOption::kStringValueFieldNumber);
1395         if (is_negative) {
1396           AddError("Invalid '-' symbol before string.");
1397           return false;
1398         }
1399         string value;
1400         DO(ConsumeString(&value, "Expected string."));
1401         uninterpreted_option->set_string_value(value);
1402         break;
1403       }
1404 
1405       case io::Tokenizer::TYPE_SYMBOL:
1406         if (LookingAt("{")) {
1407           value_location.AddPath(
1408               UninterpretedOption::kAggregateValueFieldNumber);
1409           DO(ParseUninterpretedBlock(
1410               uninterpreted_option->mutable_aggregate_value()));
1411         } else {
1412           AddError("Expected option value.");
1413           return false;
1414         }
1415         break;
1416     }
1417   }
1418 
1419   if (style == OPTION_STATEMENT) {
1420     DO(ConsumeEndOfDeclaration(";", &location));
1421   }
1422 
1423   return true;
1424 }
1425 
ParseExtensions(DescriptorProto * message,const LocationRecorder & extensions_location,const FileDescriptorProto * containing_file)1426 bool Parser::ParseExtensions(DescriptorProto* message,
1427                              const LocationRecorder& extensions_location,
1428                              const FileDescriptorProto* containing_file) {
1429   // Parse the declaration.
1430   DO(Consume("extensions"));
1431 
1432   do {
1433     // Note that kExtensionRangeFieldNumber was already pushed by the parent.
1434     LocationRecorder location(extensions_location,
1435                               message->extension_range_size());
1436 
1437     DescriptorProto::ExtensionRange* range = message->add_extension_range();
1438     location.RecordLegacyLocation(
1439         range, DescriptorPool::ErrorCollector::NUMBER);
1440 
1441     int start, end;
1442     io::Tokenizer::Token start_token;
1443 
1444     {
1445       LocationRecorder start_location(
1446           location, DescriptorProto::ExtensionRange::kStartFieldNumber);
1447       start_token = input_->current();
1448       DO(ConsumeInteger(&start, "Expected field number range."));
1449     }
1450 
1451     if (TryConsume("to")) {
1452       LocationRecorder end_location(
1453           location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1454       if (TryConsume("max")) {
1455         // Set to the sentinel value - 1 since we increment the value below.
1456         // The actual value of the end of the range should be set with
1457         // AdjustExtensionRangesWithMaxEndNumber.
1458         end = kMaxExtensionRangeSentinel - 1;
1459       } else {
1460         DO(ConsumeInteger(&end, "Expected integer."));
1461       }
1462     } else {
1463       LocationRecorder end_location(
1464           location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1465       end_location.StartAt(start_token);
1466       end_location.EndAt(start_token);
1467       end = start;
1468     }
1469 
1470     // Users like to specify inclusive ranges, but in code we like the end
1471     // number to be exclusive.
1472     ++end;
1473 
1474     range->set_start(start);
1475     range->set_end(end);
1476   } while (TryConsume(","));
1477 
1478   DO(ConsumeEndOfDeclaration(";", &extensions_location));
1479   return true;
1480 }
1481 
1482 // This is similar to extension range parsing, except that "max" is not
1483 // supported, and accepts field name literals.
ParseReserved(DescriptorProto * message,const LocationRecorder & message_location)1484 bool Parser::ParseReserved(DescriptorProto* message,
1485                            const LocationRecorder& message_location) {
1486   // Parse the declaration.
1487   DO(Consume("reserved"));
1488   if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1489     LocationRecorder location(message_location,
1490                               DescriptorProto::kReservedNameFieldNumber);
1491     return ParseReservedNames(message, location);
1492   } else {
1493     LocationRecorder location(message_location,
1494                               DescriptorProto::kReservedRangeFieldNumber);
1495     return ParseReservedNumbers(message, location);
1496   }
1497 }
1498 
1499 
ParseReservedNames(DescriptorProto * message,const LocationRecorder & parent_location)1500 bool Parser::ParseReservedNames(DescriptorProto* message,
1501                                 const LocationRecorder& parent_location) {
1502   do {
1503     LocationRecorder location(parent_location, message->reserved_name_size());
1504     DO(ConsumeString(message->add_reserved_name(), "Expected field name."));
1505   } while (TryConsume(","));
1506   DO(ConsumeEndOfDeclaration(";", &parent_location));
1507   return true;
1508 }
1509 
ParseReservedNumbers(DescriptorProto * message,const LocationRecorder & parent_location)1510 bool Parser::ParseReservedNumbers(DescriptorProto* message,
1511                                   const LocationRecorder& parent_location) {
1512   bool first = true;
1513   do {
1514     LocationRecorder location(parent_location, message->reserved_range_size());
1515 
1516     DescriptorProto::ReservedRange* range = message->add_reserved_range();
1517     int start, end;
1518     io::Tokenizer::Token start_token;
1519     {
1520       LocationRecorder start_location(
1521           location, DescriptorProto::ReservedRange::kStartFieldNumber);
1522       start_token = input_->current();
1523       DO(ConsumeInteger(&start, (first ?
1524                                  "Expected field name or number range." :
1525                                  "Expected field number range.")));
1526     }
1527 
1528     if (TryConsume("to")) {
1529       LocationRecorder end_location(
1530           location, DescriptorProto::ReservedRange::kEndFieldNumber);
1531       DO(ConsumeInteger(&end, "Expected integer."));
1532     } else {
1533       LocationRecorder end_location(
1534           location, DescriptorProto::ReservedRange::kEndFieldNumber);
1535       end_location.StartAt(start_token);
1536       end_location.EndAt(start_token);
1537       end = start;
1538     }
1539 
1540     // Users like to specify inclusive ranges, but in code we like the end
1541     // number to be exclusive.
1542     ++end;
1543 
1544     range->set_start(start);
1545     range->set_end(end);
1546     first = false;
1547   } while (TryConsume(","));
1548 
1549   DO(ConsumeEndOfDeclaration(";", &parent_location));
1550   return true;
1551 }
1552 
ParseExtend(RepeatedPtrField<FieldDescriptorProto> * extensions,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & extend_location,const FileDescriptorProto * containing_file)1553 bool Parser::ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
1554                          RepeatedPtrField<DescriptorProto>* messages,
1555                          const LocationRecorder& parent_location,
1556                          int location_field_number_for_nested_type,
1557                          const LocationRecorder& extend_location,
1558                          const FileDescriptorProto* containing_file) {
1559   DO(Consume("extend"));
1560 
1561   // Parse the extendee type.
1562   io::Tokenizer::Token extendee_start = input_->current();
1563   string extendee;
1564   DO(ParseUserDefinedType(&extendee));
1565   io::Tokenizer::Token extendee_end = input_->previous();
1566 
1567   // Parse the block.
1568   DO(ConsumeEndOfDeclaration("{", &extend_location));
1569 
1570   bool is_first = true;
1571 
1572   do {
1573     if (AtEnd()) {
1574       AddError("Reached end of input in extend definition (missing '}').");
1575       return false;
1576     }
1577 
1578     // Note that kExtensionFieldNumber was already pushed by the parent.
1579     LocationRecorder location(extend_location, extensions->size());
1580 
1581     FieldDescriptorProto* field = extensions->Add();
1582 
1583     {
1584       LocationRecorder extendee_location(
1585           location, FieldDescriptorProto::kExtendeeFieldNumber);
1586       extendee_location.StartAt(extendee_start);
1587       extendee_location.EndAt(extendee_end);
1588 
1589       if (is_first) {
1590         extendee_location.RecordLegacyLocation(
1591             field, DescriptorPool::ErrorCollector::EXTENDEE);
1592         is_first = false;
1593       }
1594     }
1595 
1596     field->set_extendee(extendee);
1597 
1598     if (!ParseMessageField(field, messages, parent_location,
1599                            location_field_number_for_nested_type,
1600                            location,
1601                            containing_file)) {
1602       // This statement failed to parse.  Skip it, but keep looping to parse
1603       // other statements.
1604       SkipStatement();
1605     }
1606   } while (!TryConsumeEndOfDeclaration("}", NULL));
1607 
1608   return true;
1609 }
1610 
ParseOneof(OneofDescriptorProto * oneof_decl,DescriptorProto * containing_type,int oneof_index,const LocationRecorder & oneof_location,const LocationRecorder & containing_type_location,const FileDescriptorProto * containing_file)1611 bool Parser::ParseOneof(OneofDescriptorProto* oneof_decl,
1612                         DescriptorProto* containing_type,
1613                         int oneof_index,
1614                         const LocationRecorder& oneof_location,
1615                         const LocationRecorder& containing_type_location,
1616                         const FileDescriptorProto* containing_file) {
1617   DO(Consume("oneof"));
1618 
1619   {
1620     LocationRecorder name_location(oneof_location,
1621                                    OneofDescriptorProto::kNameFieldNumber);
1622     DO(ConsumeIdentifier(oneof_decl->mutable_name(), "Expected oneof name."));
1623   }
1624 
1625   DO(ConsumeEndOfDeclaration("{", &oneof_location));
1626 
1627   do {
1628     if (AtEnd()) {
1629       AddError("Reached end of input in oneof definition (missing '}').");
1630       return false;
1631     }
1632 
1633     if (LookingAt("option")) {
1634       LocationRecorder option_location(
1635           oneof_location, OneofDescriptorProto::kOptionsFieldNumber);
1636       if (!ParseOption(oneof_decl->mutable_options(), option_location,
1637                        containing_file, OPTION_STATEMENT)) {
1638         return false;
1639       }
1640       continue;
1641     }
1642 
1643     // Print a nice error if the user accidentally tries to place a label
1644     // on an individual member of a oneof.
1645     if (LookingAt("required") ||
1646         LookingAt("optional") ||
1647         LookingAt("repeated")) {
1648       AddError("Fields in oneofs must not have labels (required / optional "
1649                "/ repeated).");
1650       // We can continue parsing here because we understand what the user
1651       // meant.  The error report will still make parsing fail overall.
1652       input_->Next();
1653     }
1654 
1655     LocationRecorder field_location(containing_type_location,
1656                                     DescriptorProto::kFieldFieldNumber,
1657                                     containing_type->field_size());
1658 
1659     FieldDescriptorProto* field = containing_type->add_field();
1660     field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1661     field->set_oneof_index(oneof_index);
1662 
1663     if (!ParseMessageFieldNoLabel(field,
1664                                   containing_type->mutable_nested_type(),
1665                                   containing_type_location,
1666                                   DescriptorProto::kNestedTypeFieldNumber,
1667                                   field_location,
1668                                   containing_file)) {
1669       // This statement failed to parse.  Skip it, but keep looping to parse
1670       // other statements.
1671       SkipStatement();
1672     }
1673   } while (!TryConsumeEndOfDeclaration("}", NULL));
1674 
1675   return true;
1676 }
1677 
1678 // -------------------------------------------------------------------
1679 // Enums
1680 
ParseEnumDefinition(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1681 bool Parser::ParseEnumDefinition(EnumDescriptorProto* enum_type,
1682                                  const LocationRecorder& enum_location,
1683                                  const FileDescriptorProto* containing_file) {
1684   DO(Consume("enum"));
1685 
1686   {
1687     LocationRecorder location(enum_location,
1688                               EnumDescriptorProto::kNameFieldNumber);
1689     location.RecordLegacyLocation(
1690         enum_type, DescriptorPool::ErrorCollector::NAME);
1691     DO(ConsumeIdentifier(enum_type->mutable_name(), "Expected enum name."));
1692   }
1693 
1694   DO(ParseEnumBlock(enum_type, enum_location, containing_file));
1695 
1696   DO(ValidateEnum(enum_type));
1697 
1698   return true;
1699 }
1700 
ParseEnumBlock(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1701 bool Parser::ParseEnumBlock(EnumDescriptorProto* enum_type,
1702                             const LocationRecorder& enum_location,
1703                             const FileDescriptorProto* containing_file) {
1704   DO(ConsumeEndOfDeclaration("{", &enum_location));
1705 
1706   while (!TryConsumeEndOfDeclaration("}", NULL)) {
1707     if (AtEnd()) {
1708       AddError("Reached end of input in enum definition (missing '}').");
1709       return false;
1710     }
1711 
1712     if (!ParseEnumStatement(enum_type, enum_location, containing_file)) {
1713       // This statement failed to parse.  Skip it, but keep looping to parse
1714       // other statements.
1715       SkipStatement();
1716     }
1717   }
1718 
1719   return true;
1720 }
1721 
ParseEnumStatement(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1722 bool Parser::ParseEnumStatement(EnumDescriptorProto* enum_type,
1723                                 const LocationRecorder& enum_location,
1724                                 const FileDescriptorProto* containing_file) {
1725   if (TryConsumeEndOfDeclaration(";", NULL)) {
1726     // empty statement; ignore
1727     return true;
1728   } else if (LookingAt("option")) {
1729     LocationRecorder location(enum_location,
1730                               EnumDescriptorProto::kOptionsFieldNumber);
1731     return ParseOption(enum_type->mutable_options(), location,
1732                        containing_file, OPTION_STATEMENT);
1733   } else {
1734     LocationRecorder location(enum_location,
1735         EnumDescriptorProto::kValueFieldNumber, enum_type->value_size());
1736     return ParseEnumConstant(enum_type->add_value(), location, containing_file);
1737   }
1738 }
1739 
ParseEnumConstant(EnumValueDescriptorProto * enum_value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)1740 bool Parser::ParseEnumConstant(EnumValueDescriptorProto* enum_value,
1741                                const LocationRecorder& enum_value_location,
1742                                const FileDescriptorProto* containing_file) {
1743   // Parse name.
1744   {
1745     LocationRecorder location(enum_value_location,
1746                               EnumValueDescriptorProto::kNameFieldNumber);
1747     location.RecordLegacyLocation(
1748         enum_value, DescriptorPool::ErrorCollector::NAME);
1749     DO(ConsumeIdentifier(enum_value->mutable_name(),
1750                          "Expected enum constant name."));
1751   }
1752 
1753   DO(Consume("=", "Missing numeric value for enum constant."));
1754 
1755   // Parse value.
1756   {
1757     LocationRecorder location(
1758         enum_value_location, EnumValueDescriptorProto::kNumberFieldNumber);
1759     location.RecordLegacyLocation(
1760         enum_value, DescriptorPool::ErrorCollector::NUMBER);
1761 
1762     int number;
1763     DO(ConsumeSignedInteger(&number, "Expected integer."));
1764     enum_value->set_number(number);
1765   }
1766 
1767   DO(ParseEnumConstantOptions(enum_value, enum_value_location,
1768                               containing_file));
1769 
1770   DO(ConsumeEndOfDeclaration(";", &enum_value_location));
1771 
1772   return true;
1773 }
1774 
ParseEnumConstantOptions(EnumValueDescriptorProto * value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)1775 bool Parser::ParseEnumConstantOptions(
1776     EnumValueDescriptorProto* value,
1777     const LocationRecorder& enum_value_location,
1778     const FileDescriptorProto* containing_file) {
1779   if (!LookingAt("[")) return true;
1780 
1781   LocationRecorder location(
1782       enum_value_location, EnumValueDescriptorProto::kOptionsFieldNumber);
1783 
1784   DO(Consume("["));
1785 
1786   do {
1787     DO(ParseOption(value->mutable_options(), location,
1788                    containing_file, OPTION_ASSIGNMENT));
1789   } while (TryConsume(","));
1790 
1791   DO(Consume("]"));
1792   return true;
1793 }
1794 
1795 // -------------------------------------------------------------------
1796 // Services
1797 
ParseServiceDefinition(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)1798 bool Parser::ParseServiceDefinition(
1799     ServiceDescriptorProto* service,
1800     const LocationRecorder& service_location,
1801     const FileDescriptorProto* containing_file) {
1802   DO(Consume("service"));
1803 
1804   {
1805     LocationRecorder location(service_location,
1806                               ServiceDescriptorProto::kNameFieldNumber);
1807     location.RecordLegacyLocation(
1808         service, DescriptorPool::ErrorCollector::NAME);
1809     DO(ConsumeIdentifier(service->mutable_name(), "Expected service name."));
1810   }
1811 
1812   DO(ParseServiceBlock(service, service_location, containing_file));
1813   return true;
1814 }
1815 
ParseServiceBlock(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)1816 bool Parser::ParseServiceBlock(ServiceDescriptorProto* service,
1817                                const LocationRecorder& service_location,
1818                                const FileDescriptorProto* containing_file) {
1819   DO(ConsumeEndOfDeclaration("{", &service_location));
1820 
1821   while (!TryConsumeEndOfDeclaration("}", NULL)) {
1822     if (AtEnd()) {
1823       AddError("Reached end of input in service definition (missing '}').");
1824       return false;
1825     }
1826 
1827     if (!ParseServiceStatement(service, service_location, containing_file)) {
1828       // This statement failed to parse.  Skip it, but keep looping to parse
1829       // other statements.
1830       SkipStatement();
1831     }
1832   }
1833 
1834   return true;
1835 }
1836 
ParseServiceStatement(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)1837 bool Parser::ParseServiceStatement(ServiceDescriptorProto* service,
1838                                    const LocationRecorder& service_location,
1839                                    const FileDescriptorProto* containing_file) {
1840   if (TryConsumeEndOfDeclaration(";", NULL)) {
1841     // empty statement; ignore
1842     return true;
1843   } else if (LookingAt("option")) {
1844     LocationRecorder location(
1845         service_location, ServiceDescriptorProto::kOptionsFieldNumber);
1846     return ParseOption(service->mutable_options(), location,
1847                        containing_file, OPTION_STATEMENT);
1848   } else {
1849     LocationRecorder location(service_location,
1850         ServiceDescriptorProto::kMethodFieldNumber, service->method_size());
1851     return ParseServiceMethod(service->add_method(), location, containing_file);
1852   }
1853 }
1854 
ParseServiceMethod(MethodDescriptorProto * method,const LocationRecorder & method_location,const FileDescriptorProto * containing_file)1855 bool Parser::ParseServiceMethod(MethodDescriptorProto* method,
1856                                 const LocationRecorder& method_location,
1857                                 const FileDescriptorProto* containing_file) {
1858   DO(Consume("rpc"));
1859 
1860   {
1861     LocationRecorder location(method_location,
1862                               MethodDescriptorProto::kNameFieldNumber);
1863     location.RecordLegacyLocation(
1864         method, DescriptorPool::ErrorCollector::NAME);
1865     DO(ConsumeIdentifier(method->mutable_name(), "Expected method name."));
1866   }
1867 
1868   // Parse input type.
1869   DO(Consume("("));
1870   {
1871     if (LookingAt("stream")) {
1872       LocationRecorder location(
1873           method_location, MethodDescriptorProto::kClientStreamingFieldNumber);
1874       location.RecordLegacyLocation(
1875           method, DescriptorPool::ErrorCollector::OTHER);
1876       method->set_client_streaming(true);
1877       DO(Consume("stream"));
1878 
1879     }
1880     LocationRecorder location(method_location,
1881                               MethodDescriptorProto::kInputTypeFieldNumber);
1882     location.RecordLegacyLocation(
1883         method, DescriptorPool::ErrorCollector::INPUT_TYPE);
1884     DO(ParseUserDefinedType(method->mutable_input_type()));
1885   }
1886   DO(Consume(")"));
1887 
1888   // Parse output type.
1889   DO(Consume("returns"));
1890   DO(Consume("("));
1891   {
1892     if (LookingAt("stream")) {
1893       LocationRecorder location(
1894           method_location, MethodDescriptorProto::kServerStreamingFieldNumber);
1895       location.RecordLegacyLocation(
1896           method, DescriptorPool::ErrorCollector::OTHER);
1897       DO(Consume("stream"));
1898       method->set_server_streaming(true);
1899 
1900     }
1901     LocationRecorder location(method_location,
1902                               MethodDescriptorProto::kOutputTypeFieldNumber);
1903     location.RecordLegacyLocation(
1904         method, DescriptorPool::ErrorCollector::OUTPUT_TYPE);
1905     DO(ParseUserDefinedType(method->mutable_output_type()));
1906   }
1907   DO(Consume(")"));
1908 
1909   if (LookingAt("{")) {
1910     // Options!
1911     DO(ParseMethodOptions(method_location, containing_file,
1912                           MethodDescriptorProto::kOptionsFieldNumber,
1913                           method->mutable_options()));
1914   } else {
1915     DO(ConsumeEndOfDeclaration(";", &method_location));
1916   }
1917 
1918   return true;
1919 }
1920 
1921 
ParseMethodOptions(const LocationRecorder & parent_location,const FileDescriptorProto * containing_file,const int optionsFieldNumber,Message * mutable_options)1922 bool Parser::ParseMethodOptions(const LocationRecorder& parent_location,
1923                                 const FileDescriptorProto* containing_file,
1924                                 const int optionsFieldNumber,
1925                                 Message* mutable_options) {
1926   // Options!
1927   ConsumeEndOfDeclaration("{", &parent_location);
1928   while (!TryConsumeEndOfDeclaration("}", NULL)) {
1929     if (AtEnd()) {
1930       AddError("Reached end of input in method options (missing '}').");
1931       return false;
1932     }
1933 
1934     if (TryConsumeEndOfDeclaration(";", NULL)) {
1935       // empty statement; ignore
1936     } else {
1937       LocationRecorder location(parent_location,
1938                                 optionsFieldNumber);
1939       if (!ParseOption(mutable_options, location,
1940                        containing_file, OPTION_STATEMENT)) {
1941         // This statement failed to parse.  Skip it, but keep looping to
1942         // parse other statements.
1943         SkipStatement();
1944       }
1945     }
1946   }
1947 
1948   return true;
1949 }
1950 
1951 // -------------------------------------------------------------------
1952 
ParseLabel(FieldDescriptorProto::Label * label,const FileDescriptorProto * containing_file)1953 bool Parser::ParseLabel(FieldDescriptorProto::Label* label,
1954                         const FileDescriptorProto* containing_file) {
1955   if (TryConsume("optional")) {
1956     *label = FieldDescriptorProto::LABEL_OPTIONAL;
1957     return true;
1958   } else if (TryConsume("repeated")) {
1959     *label = FieldDescriptorProto::LABEL_REPEATED;
1960     return true;
1961   } else if (TryConsume("required")) {
1962     *label = FieldDescriptorProto::LABEL_REQUIRED;
1963     return true;
1964   }
1965   return false;
1966 }
1967 
ParseType(FieldDescriptorProto::Type * type,string * type_name)1968 bool Parser::ParseType(FieldDescriptorProto::Type* type,
1969                        string* type_name) {
1970   TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
1971   if (iter != kTypeNames.end()) {
1972     *type = iter->second;
1973     input_->Next();
1974   } else {
1975     DO(ParseUserDefinedType(type_name));
1976   }
1977   return true;
1978 }
1979 
ParseUserDefinedType(string * type_name)1980 bool Parser::ParseUserDefinedType(string* type_name) {
1981   type_name->clear();
1982 
1983   TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
1984   if (iter != kTypeNames.end()) {
1985     // Note:  The only place enum types are allowed is for field types, but
1986     //   if we are parsing a field type then we would not get here because
1987     //   primitives are allowed there as well.  So this error message doesn't
1988     //   need to account for enums.
1989     AddError("Expected message type.");
1990 
1991     // Pretend to accept this type so that we can go on parsing.
1992     *type_name = input_->current().text;
1993     input_->Next();
1994     return true;
1995   }
1996 
1997   // A leading "." means the name is fully-qualified.
1998   if (TryConsume(".")) type_name->append(".");
1999 
2000   // Consume the first part of the name.
2001   string identifier;
2002   DO(ConsumeIdentifier(&identifier, "Expected type name."));
2003   type_name->append(identifier);
2004 
2005   // Consume more parts.
2006   while (TryConsume(".")) {
2007     type_name->append(".");
2008     DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2009     type_name->append(identifier);
2010   }
2011 
2012   return true;
2013 }
2014 
2015 // ===================================================================
2016 
ParsePackage(FileDescriptorProto * file,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)2017 bool Parser::ParsePackage(FileDescriptorProto* file,
2018                           const LocationRecorder& root_location,
2019                           const FileDescriptorProto* containing_file) {
2020   if (file->has_package()) {
2021     AddError("Multiple package definitions.");
2022     // Don't append the new package to the old one.  Just replace it.  Not
2023     // that it really matters since this is an error anyway.
2024     file->clear_package();
2025   }
2026 
2027   DO(Consume("package"));
2028 
2029   {
2030     LocationRecorder location(root_location,
2031                               FileDescriptorProto::kPackageFieldNumber);
2032     location.RecordLegacyLocation(file, DescriptorPool::ErrorCollector::NAME);
2033 
2034     while (true) {
2035       string identifier;
2036       DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2037       file->mutable_package()->append(identifier);
2038       if (!TryConsume(".")) break;
2039       file->mutable_package()->append(".");
2040     }
2041 
2042     location.EndAt(input_->previous());
2043 
2044     DO(ConsumeEndOfDeclaration(";", &location));
2045   }
2046 
2047   return true;
2048 }
2049 
ParseImport(RepeatedPtrField<string> * dependency,RepeatedField<int32> * public_dependency,RepeatedField<int32> * weak_dependency,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)2050 bool Parser::ParseImport(RepeatedPtrField<string>* dependency,
2051                          RepeatedField<int32>* public_dependency,
2052                          RepeatedField<int32>* weak_dependency,
2053                          const LocationRecorder& root_location,
2054                          const FileDescriptorProto* containing_file) {
2055   DO(Consume("import"));
2056   if (LookingAt("public")) {
2057     LocationRecorder location(
2058         root_location, FileDescriptorProto::kPublicDependencyFieldNumber,
2059         public_dependency->size());
2060     DO(Consume("public"));
2061     *public_dependency->Add() = dependency->size();
2062   } else if (LookingAt("weak")) {
2063     LocationRecorder location(
2064         root_location, FileDescriptorProto::kWeakDependencyFieldNumber,
2065         weak_dependency->size());
2066     DO(Consume("weak"));
2067     *weak_dependency->Add() = dependency->size();
2068   }
2069   {
2070     LocationRecorder location(root_location,
2071                               FileDescriptorProto::kDependencyFieldNumber,
2072                               dependency->size());
2073     DO(ConsumeString(dependency->Add(),
2074       "Expected a string naming the file to import."));
2075 
2076     location.EndAt(input_->previous());
2077 
2078     DO(ConsumeEndOfDeclaration(";", &location));
2079   }
2080   return true;
2081 }
2082 
2083 // ===================================================================
2084 
SourceLocationTable()2085 SourceLocationTable::SourceLocationTable() {}
~SourceLocationTable()2086 SourceLocationTable::~SourceLocationTable() {}
2087 
Find(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int * line,int * column) const2088 bool SourceLocationTable::Find(
2089     const Message* descriptor,
2090     DescriptorPool::ErrorCollector::ErrorLocation location,
2091     int* line, int* column) const {
2092   const pair<int, int>* result =
2093       FindOrNull(location_map_, std::make_pair(descriptor, location));
2094   if (result == NULL) {
2095     *line   = -1;
2096     *column = 0;
2097     return false;
2098   } else {
2099     *line   = result->first;
2100     *column = result->second;
2101     return true;
2102   }
2103 }
2104 
Add(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int line,int column)2105 void SourceLocationTable::Add(
2106     const Message* descriptor,
2107     DescriptorPool::ErrorCollector::ErrorLocation location,
2108     int line, int column) {
2109   location_map_[std::make_pair(descriptor, location)] =
2110       std::make_pair(line, column);
2111 }
2112 
Clear()2113 void SourceLocationTable::Clear() {
2114   location_map_.clear();
2115 }
2116 
2117 }  // namespace compiler
2118 }  // namespace protobuf
2119 }  // namespace google
2120