1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // Recursive descent FTW.
36 
37 #include <float.h>
38 #include <limits>
39 #include <unordered_map>
40 
41 
42 #include <google/protobuf/stubs/hash.h>
43 
44 #include <google/protobuf/stubs/casts.h>
45 #include <google/protobuf/stubs/logging.h>
46 #include <google/protobuf/stubs/common.h>
47 #include <google/protobuf/compiler/parser.h>
48 #include <google/protobuf/descriptor.pb.h>
49 #include <google/protobuf/io/tokenizer.h>
50 #include <google/protobuf/descriptor.h>
51 #include <google/protobuf/wire_format.h>
52 #include <google/protobuf/stubs/strutil.h>
53 #include <google/protobuf/stubs/map_util.h>
54 
55 namespace google {
56 namespace protobuf {
57 namespace compiler {
58 
59 using internal::WireFormat;
60 
61 namespace {
62 
63 typedef std::unordered_map<std::string, FieldDescriptorProto::Type> TypeNameMap;
64 
MakeTypeNameTable()65 TypeNameMap MakeTypeNameTable() {
66   TypeNameMap result;
67 
68   result["double"] = FieldDescriptorProto::TYPE_DOUBLE;
69   result["float"] = FieldDescriptorProto::TYPE_FLOAT;
70   result["uint64"] = FieldDescriptorProto::TYPE_UINT64;
71   result["fixed64"] = FieldDescriptorProto::TYPE_FIXED64;
72   result["fixed32"] = FieldDescriptorProto::TYPE_FIXED32;
73   result["bool"] = FieldDescriptorProto::TYPE_BOOL;
74   result["string"] = FieldDescriptorProto::TYPE_STRING;
75   result["group"] = FieldDescriptorProto::TYPE_GROUP;
76 
77   result["bytes"] = FieldDescriptorProto::TYPE_BYTES;
78   result["uint32"] = FieldDescriptorProto::TYPE_UINT32;
79   result["sfixed32"] = FieldDescriptorProto::TYPE_SFIXED32;
80   result["sfixed64"] = FieldDescriptorProto::TYPE_SFIXED64;
81   result["int32"] = FieldDescriptorProto::TYPE_INT32;
82   result["int64"] = FieldDescriptorProto::TYPE_INT64;
83   result["sint32"] = FieldDescriptorProto::TYPE_SINT32;
84   result["sint64"] = FieldDescriptorProto::TYPE_SINT64;
85 
86   return result;
87 }
88 
89 const TypeNameMap kTypeNames = MakeTypeNameTable();
90 
91 // Camel-case the field name and append "Entry" for generated map entry name.
92 // e.g. map<KeyType, ValueType> foo_map => FooMapEntry
MapEntryName(const std::string & field_name)93 std::string MapEntryName(const std::string& field_name) {
94   std::string result;
95   static const char kSuffix[] = "Entry";
96   result.reserve(field_name.size() + sizeof(kSuffix));
97   bool cap_next = true;
98   for (int i = 0; i < field_name.size(); ++i) {
99     if (field_name[i] == '_') {
100       cap_next = true;
101     } else if (cap_next) {
102       // Note: Do not use ctype.h due to locales.
103       if ('a' <= field_name[i] && field_name[i] <= 'z') {
104         result.push_back(field_name[i] - 'a' + 'A');
105       } else {
106         result.push_back(field_name[i]);
107       }
108       cap_next = false;
109     } else {
110       result.push_back(field_name[i]);
111     }
112   }
113   result.append(kSuffix);
114   return result;
115 }
116 
IsUppercase(char c)117 bool IsUppercase(char c) { return c >= 'A' && c <= 'Z'; }
118 
IsLowercase(char c)119 bool IsLowercase(char c) { return c >= 'a' && c <= 'z'; }
120 
IsNumber(char c)121 bool IsNumber(char c) { return c >= '0' && c <= '9'; }
122 
IsUpperCamelCase(const string & name)123 bool IsUpperCamelCase(const string& name) {
124   if (name.empty()) {
125     return true;
126   }
127   // Name must start with an upper case character.
128   if (!IsUppercase(name[0])) {
129     return false;
130   }
131   // Must not contains underscore.
132   for (int i = 1; i < name.length(); i++) {
133     if (name[i] == '_') {
134       return false;
135     }
136   }
137   return true;
138 }
139 
IsUpperUnderscore(const string & name)140 bool IsUpperUnderscore(const string& name) {
141   for (int i = 0; i < name.length(); i++) {
142     const char c = name[i];
143     if (!IsUppercase(c) && c != '_' && !IsNumber(c)) {
144       return false;
145     }
146   }
147   return true;
148 }
149 
IsLowerUnderscore(const string & name)150 bool IsLowerUnderscore(const string& name) {
151   for (int i = 0; i < name.length(); i++) {
152     const char c = name[i];
153     if (!IsLowercase(c) && c != '_' && !IsNumber(c)) {
154       return false;
155     }
156   }
157   return true;
158 }
159 
IsNumberFollowUnderscore(const string & name)160 bool IsNumberFollowUnderscore(const string& name) {
161   for (int i = 1; i < name.length(); i++) {
162     const char c = name[i];
163     if (IsNumber(c) && name[i - 1] == '_') {
164       return true;
165     }
166   }
167   return false;
168 }
169 
170 }  // anonymous namespace
171 
172 // Makes code slightly more readable.  The meaning of "DO(foo)" is
173 // "Execute foo and fail if it fails.", where failure is indicated by
174 // returning false.
175 #define DO(STATEMENT) \
176   if (STATEMENT) {    \
177   } else              \
178     return false
179 
180 // ===================================================================
181 
Parser()182 Parser::Parser()
183     : input_(NULL),
184       error_collector_(NULL),
185       source_location_table_(NULL),
186       had_errors_(false),
187       require_syntax_identifier_(false),
188       stop_after_syntax_identifier_(false) {
189 }
190 
~Parser()191 Parser::~Parser() {}
192 
193 // ===================================================================
194 
LookingAt(const char * text)195 inline bool Parser::LookingAt(const char* text) {
196   return input_->current().text == text;
197 }
198 
LookingAtType(io::Tokenizer::TokenType token_type)199 inline bool Parser::LookingAtType(io::Tokenizer::TokenType token_type) {
200   return input_->current().type == token_type;
201 }
202 
AtEnd()203 inline bool Parser::AtEnd() { return LookingAtType(io::Tokenizer::TYPE_END); }
204 
TryConsume(const char * text)205 bool Parser::TryConsume(const char* text) {
206   if (LookingAt(text)) {
207     input_->Next();
208     return true;
209   } else {
210     return false;
211   }
212 }
213 
Consume(const char * text,const char * error)214 bool Parser::Consume(const char* text, const char* error) {
215   if (TryConsume(text)) {
216     return true;
217   } else {
218     AddError(error);
219     return false;
220   }
221 }
222 
Consume(const char * text)223 bool Parser::Consume(const char* text) {
224   if (TryConsume(text)) {
225     return true;
226   } else {
227     AddError("Expected \"" + string(text) + "\".");
228     return false;
229   }
230 }
231 
ConsumeIdentifier(std::string * output,const char * error)232 bool Parser::ConsumeIdentifier(std::string* output, const char* error) {
233   if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
234     *output = input_->current().text;
235     input_->Next();
236     return true;
237   } else {
238     AddError(error);
239     return false;
240   }
241 }
242 
ConsumeInteger(int * output,const char * error)243 bool Parser::ConsumeInteger(int* output, const char* error) {
244   if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
245     uint64 value = 0;
246     if (!io::Tokenizer::ParseInteger(input_->current().text, kint32max,
247                                      &value)) {
248       AddError("Integer out of range.");
249       // We still return true because we did, in fact, parse an integer.
250     }
251     *output = value;
252     input_->Next();
253     return true;
254   } else {
255     AddError(error);
256     return false;
257   }
258 }
259 
ConsumeSignedInteger(int * output,const char * error)260 bool Parser::ConsumeSignedInteger(int* output, const char* error) {
261   bool is_negative = false;
262   uint64 max_value = kint32max;
263   if (TryConsume("-")) {
264     is_negative = true;
265     max_value += 1;
266   }
267   uint64 value = 0;
268   DO(ConsumeInteger64(max_value, &value, error));
269   if (is_negative) value *= -1;
270   *output = value;
271   return true;
272 }
273 
ConsumeInteger64(uint64 max_value,uint64 * output,const char * error)274 bool Parser::ConsumeInteger64(uint64 max_value, uint64* output,
275                               const char* error) {
276   if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
277     if (!io::Tokenizer::ParseInteger(input_->current().text, max_value,
278                                      output)) {
279       AddError("Integer out of range.");
280       // We still return true because we did, in fact, parse an integer.
281       *output = 0;
282     }
283     input_->Next();
284     return true;
285   } else {
286     AddError(error);
287     return false;
288   }
289 }
290 
ConsumeNumber(double * output,const char * error)291 bool Parser::ConsumeNumber(double* output, const char* error) {
292   if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
293     *output = io::Tokenizer::ParseFloat(input_->current().text);
294     input_->Next();
295     return true;
296   } else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
297     // Also accept integers.
298     uint64 value = 0;
299     if (!io::Tokenizer::ParseInteger(input_->current().text, kuint64max,
300                                      &value)) {
301       AddError("Integer out of range.");
302       // We still return true because we did, in fact, parse a number.
303     }
304     *output = value;
305     input_->Next();
306     return true;
307   } else if (LookingAt("inf")) {
308     *output = std::numeric_limits<double>::infinity();
309     input_->Next();
310     return true;
311   } else if (LookingAt("nan")) {
312     *output = std::numeric_limits<double>::quiet_NaN();
313     input_->Next();
314     return true;
315   } else {
316     AddError(error);
317     return false;
318   }
319 }
320 
ConsumeString(std::string * output,const char * error)321 bool Parser::ConsumeString(std::string* output, const char* error) {
322   if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
323     io::Tokenizer::ParseString(input_->current().text, output);
324     input_->Next();
325     // Allow C++ like concatenation of adjacent string tokens.
326     while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
327       io::Tokenizer::ParseStringAppend(input_->current().text, output);
328       input_->Next();
329     }
330     return true;
331   } else {
332     AddError(error);
333     return false;
334   }
335 }
336 
TryConsumeEndOfDeclaration(const char * text,const LocationRecorder * location)337 bool Parser::TryConsumeEndOfDeclaration(const char* text,
338                                         const LocationRecorder* location) {
339   if (LookingAt(text)) {
340     std::string leading, trailing;
341     std::vector<std::string> detached;
342     input_->NextWithComments(&trailing, &detached, &leading);
343 
344     // Save the leading comments for next time, and recall the leading comments
345     // from last time.
346     leading.swap(upcoming_doc_comments_);
347 
348     if (location != NULL) {
349       upcoming_detached_comments_.swap(detached);
350       location->AttachComments(&leading, &trailing, &detached);
351     } else if (strcmp(text, "}") == 0) {
352       // If the current location is null and we are finishing the current scope,
353       // drop pending upcoming detached comments.
354       upcoming_detached_comments_.swap(detached);
355     } else {
356       // Otherwise, append the new detached comments to the existing upcoming
357       // detached comments.
358       upcoming_detached_comments_.insert(upcoming_detached_comments_.end(),
359                                          detached.begin(), detached.end());
360     }
361 
362     return true;
363   } else {
364     return false;
365   }
366 }
367 
ConsumeEndOfDeclaration(const char * text,const LocationRecorder * location)368 bool Parser::ConsumeEndOfDeclaration(const char* text,
369                                      const LocationRecorder* location) {
370   if (TryConsumeEndOfDeclaration(text, location)) {
371     return true;
372   } else {
373     AddError("Expected \"" + string(text) + "\".");
374     return false;
375   }
376 }
377 
378 // -------------------------------------------------------------------
379 
AddError(int line,int column,const std::string & error)380 void Parser::AddError(int line, int column, const std::string& error) {
381   if (error_collector_ != NULL) {
382     error_collector_->AddError(line, column, error);
383   }
384   had_errors_ = true;
385 }
386 
AddError(const std::string & error)387 void Parser::AddError(const std::string& error) {
388   AddError(input_->current().line, input_->current().column, error);
389 }
390 
AddWarning(const string & warning)391 void Parser::AddWarning(const string& warning) {
392   if (error_collector_ != nullptr) {
393     error_collector_->AddWarning(input_->current().line,
394                                  input_->current().column, warning);
395   }
396 }
397 
398 // -------------------------------------------------------------------
399 
LocationRecorder(Parser * parser)400 Parser::LocationRecorder::LocationRecorder(Parser* parser)
401     : parser_(parser),
402       source_code_info_(parser->source_code_info_),
403       location_(parser_->source_code_info_->add_location()) {
404   location_->add_span(parser_->input_->current().line);
405   location_->add_span(parser_->input_->current().column);
406 }
407 
LocationRecorder(const LocationRecorder & parent)408 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent) {
409   Init(parent, parent.source_code_info_);
410 }
411 
LocationRecorder(const LocationRecorder & parent,int path1,SourceCodeInfo * source_code_info)412 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
413                                            int path1,
414                                            SourceCodeInfo* source_code_info) {
415   Init(parent, source_code_info);
416   AddPath(path1);
417 }
418 
LocationRecorder(const LocationRecorder & parent,int path1)419 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
420                                            int path1) {
421   Init(parent, parent.source_code_info_);
422   AddPath(path1);
423 }
424 
LocationRecorder(const LocationRecorder & parent,int path1,int path2)425 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
426                                            int path1, int path2) {
427   Init(parent, parent.source_code_info_);
428   AddPath(path1);
429   AddPath(path2);
430 }
431 
Init(const LocationRecorder & parent,SourceCodeInfo * source_code_info)432 void Parser::LocationRecorder::Init(const LocationRecorder& parent,
433                                     SourceCodeInfo* source_code_info) {
434   parser_ = parent.parser_;
435   source_code_info_ = source_code_info;
436 
437   location_ = source_code_info_->add_location();
438   location_->mutable_path()->CopyFrom(parent.location_->path());
439 
440   location_->add_span(parser_->input_->current().line);
441   location_->add_span(parser_->input_->current().column);
442 }
443 
~LocationRecorder()444 Parser::LocationRecorder::~LocationRecorder() {
445   if (location_->span_size() <= 2) {
446     EndAt(parser_->input_->previous());
447   }
448 }
449 
AddPath(int path_component)450 void Parser::LocationRecorder::AddPath(int path_component) {
451   location_->add_path(path_component);
452 }
453 
StartAt(const io::Tokenizer::Token & token)454 void Parser::LocationRecorder::StartAt(const io::Tokenizer::Token& token) {
455   location_->set_span(0, token.line);
456   location_->set_span(1, token.column);
457 }
458 
StartAt(const LocationRecorder & other)459 void Parser::LocationRecorder::StartAt(const LocationRecorder& other) {
460   location_->set_span(0, other.location_->span(0));
461   location_->set_span(1, other.location_->span(1));
462 }
463 
EndAt(const io::Tokenizer::Token & token)464 void Parser::LocationRecorder::EndAt(const io::Tokenizer::Token& token) {
465   if (token.line != location_->span(0)) {
466     location_->add_span(token.line);
467   }
468   location_->add_span(token.end_column);
469 }
470 
RecordLegacyLocation(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location)471 void Parser::LocationRecorder::RecordLegacyLocation(
472     const Message* descriptor,
473     DescriptorPool::ErrorCollector::ErrorLocation location) {
474   if (parser_->source_location_table_ != NULL) {
475     parser_->source_location_table_->Add(
476         descriptor, location, location_->span(0), location_->span(1));
477   }
478 }
479 
RecordLegacyImportLocation(const Message * descriptor,const string & name)480 void Parser::LocationRecorder::RecordLegacyImportLocation(
481     const Message* descriptor, const string& name) {
482   if (parser_->source_location_table_ != nullptr) {
483     parser_->source_location_table_->AddImport(
484         descriptor, name, location_->span(0), location_->span(1));
485   }
486 }
487 
CurrentPathSize() const488 int Parser::LocationRecorder::CurrentPathSize() const {
489   return location_->path_size();
490 }
491 
AttachComments(std::string * leading,std::string * trailing,std::vector<std::string> * detached_comments) const492 void Parser::LocationRecorder::AttachComments(
493     std::string* leading, std::string* trailing,
494     std::vector<std::string>* detached_comments) const {
495   GOOGLE_CHECK(!location_->has_leading_comments());
496   GOOGLE_CHECK(!location_->has_trailing_comments());
497 
498   if (!leading->empty()) {
499     location_->mutable_leading_comments()->swap(*leading);
500   }
501   if (!trailing->empty()) {
502     location_->mutable_trailing_comments()->swap(*trailing);
503   }
504   for (int i = 0; i < detached_comments->size(); ++i) {
505     location_->add_leading_detached_comments()->swap((*detached_comments)[i]);
506   }
507   detached_comments->clear();
508 }
509 
510 // -------------------------------------------------------------------
511 
SkipStatement()512 void Parser::SkipStatement() {
513   while (true) {
514     if (AtEnd()) {
515       return;
516     } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
517       if (TryConsumeEndOfDeclaration(";", NULL)) {
518         return;
519       } else if (TryConsume("{")) {
520         SkipRestOfBlock();
521         return;
522       } else if (LookingAt("}")) {
523         return;
524       }
525     }
526     input_->Next();
527   }
528 }
529 
SkipRestOfBlock()530 void Parser::SkipRestOfBlock() {
531   while (true) {
532     if (AtEnd()) {
533       return;
534     } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
535       if (TryConsumeEndOfDeclaration("}", NULL)) {
536         return;
537       } else if (TryConsume("{")) {
538         SkipRestOfBlock();
539       }
540     }
541     input_->Next();
542   }
543 }
544 
545 // ===================================================================
546 
ValidateEnum(const EnumDescriptorProto * proto)547 bool Parser::ValidateEnum(const EnumDescriptorProto* proto) {
548   bool has_allow_alias = false;
549   bool allow_alias = false;
550 
551   for (int i = 0; i < proto->options().uninterpreted_option_size(); i++) {
552     const UninterpretedOption option = proto->options().uninterpreted_option(i);
553     if (option.name_size() > 1) {
554       continue;
555     }
556     if (!option.name(0).is_extension() &&
557         option.name(0).name_part() == "allow_alias") {
558       has_allow_alias = true;
559       if (option.identifier_value() == "true") {
560         allow_alias = true;
561       }
562       break;
563     }
564   }
565 
566   if (has_allow_alias && !allow_alias) {
567     std::string error =
568         "\"" + proto->name() +
569         "\" declares 'option allow_alias = false;' which has no effect. "
570         "Please remove the declaration.";
571     // This needlessly clutters declarations with nops.
572     AddError(error);
573     return false;
574   }
575 
576   std::set<int> used_values;
577   bool has_duplicates = false;
578   for (int i = 0; i < proto->value_size(); ++i) {
579     const EnumValueDescriptorProto& enum_value = proto->value(i);
580     if (used_values.find(enum_value.number()) != used_values.end()) {
581       has_duplicates = true;
582       break;
583     } else {
584       used_values.insert(enum_value.number());
585     }
586   }
587   if (allow_alias && !has_duplicates) {
588     std::string error =
589         "\"" + proto->name() +
590         "\" declares support for enum aliases but no enum values share field "
591         "numbers. Please remove the unnecessary 'option allow_alias = true;' "
592         "declaration.";
593     // Generate an error if an enum declares support for duplicate enum values
594     // and does not use it protect future authors.
595     AddError(error);
596     return false;
597   }
598 
599   // Enforce that enum constants must be UPPER_CASE except in case of
600   // enum_alias.
601   if (!allow_alias) {
602     for (const auto& enum_value : proto->value()) {
603       if (!IsUpperUnderscore(enum_value.name())) {
604         AddWarning(
605             "Enum constant should be in UPPER_CASE. Found: " +
606             enum_value.name() +
607             ". See https://developers.google.com/protocol-buffers/docs/style");
608       }
609     }
610   }
611 
612   return true;
613 }
614 
Parse(io::Tokenizer * input,FileDescriptorProto * file)615 bool Parser::Parse(io::Tokenizer* input, FileDescriptorProto* file) {
616   input_ = input;
617   had_errors_ = false;
618   syntax_identifier_.clear();
619 
620   // Note that |file| could be NULL at this point if
621   // stop_after_syntax_identifier_ is true.  So, we conservatively allocate
622   // SourceCodeInfo on the stack, then swap it into the FileDescriptorProto
623   // later on.
624   SourceCodeInfo source_code_info;
625   source_code_info_ = &source_code_info;
626 
627   if (LookingAtType(io::Tokenizer::TYPE_START)) {
628     // Advance to first token.
629     input_->NextWithComments(NULL, &upcoming_detached_comments_,
630                              &upcoming_doc_comments_);
631   }
632 
633   {
634     LocationRecorder root_location(this);
635     root_location.RecordLegacyLocation(file,
636                                        DescriptorPool::ErrorCollector::OTHER);
637 
638     if (require_syntax_identifier_ || LookingAt("syntax")) {
639       if (!ParseSyntaxIdentifier(root_location)) {
640         // Don't attempt to parse the file if we didn't recognize the syntax
641         // identifier.
642         return false;
643       }
644       // Store the syntax into the file.
645       if (file != NULL) file->set_syntax(syntax_identifier_);
646     } else if (!stop_after_syntax_identifier_) {
647       GOOGLE_LOG(WARNING) << "No syntax specified for the proto file: " << file->name()
648                    << ". Please use 'syntax = \"proto2\";' "
649                    << "or 'syntax = \"proto3\";' to specify a syntax "
650                    << "version. (Defaulted to proto2 syntax.)";
651       syntax_identifier_ = "proto2";
652     }
653 
654     if (stop_after_syntax_identifier_) return !had_errors_;
655 
656     // Repeatedly parse statements until we reach the end of the file.
657     while (!AtEnd()) {
658       if (!ParseTopLevelStatement(file, root_location)) {
659         // This statement failed to parse.  Skip it, but keep looping to parse
660         // other statements.
661         SkipStatement();
662 
663         if (LookingAt("}")) {
664           AddError("Unmatched \"}\".");
665           input_->NextWithComments(NULL, &upcoming_detached_comments_,
666                                    &upcoming_doc_comments_);
667         }
668       }
669     }
670   }
671 
672   input_ = NULL;
673   source_code_info_ = NULL;
674   assert(file != NULL);
675   source_code_info.Swap(file->mutable_source_code_info());
676   return !had_errors_;
677 }
678 
ParseSyntaxIdentifier(const LocationRecorder & parent)679 bool Parser::ParseSyntaxIdentifier(const LocationRecorder& parent) {
680   LocationRecorder syntax_location(parent,
681                                    FileDescriptorProto::kSyntaxFieldNumber);
682   DO(Consume(
683       "syntax",
684       "File must begin with a syntax statement, e.g. 'syntax = \"proto2\";'."));
685   DO(Consume("="));
686   io::Tokenizer::Token syntax_token = input_->current();
687   std::string syntax;
688   DO(ConsumeString(&syntax, "Expected syntax identifier."));
689   DO(ConsumeEndOfDeclaration(";", &syntax_location));
690 
691   syntax_identifier_ = syntax;
692 
693   if (syntax != "proto2" && syntax != "proto3" &&
694       !stop_after_syntax_identifier_) {
695     AddError(syntax_token.line, syntax_token.column,
696              "Unrecognized syntax identifier \"" + syntax +
697                  "\".  This parser "
698                  "only recognizes \"proto2\" and \"proto3\".");
699     return false;
700   }
701 
702   return true;
703 }
704 
ParseTopLevelStatement(FileDescriptorProto * file,const LocationRecorder & root_location)705 bool Parser::ParseTopLevelStatement(FileDescriptorProto* file,
706                                     const LocationRecorder& root_location) {
707   if (TryConsumeEndOfDeclaration(";", NULL)) {
708     // empty statement; ignore
709     return true;
710   } else if (LookingAt("message")) {
711     LocationRecorder location(root_location,
712                               FileDescriptorProto::kMessageTypeFieldNumber,
713                               file->message_type_size());
714     return ParseMessageDefinition(file->add_message_type(), location, file);
715   } else if (LookingAt("enum")) {
716     LocationRecorder location(root_location,
717                               FileDescriptorProto::kEnumTypeFieldNumber,
718                               file->enum_type_size());
719     return ParseEnumDefinition(file->add_enum_type(), location, file);
720   } else if (LookingAt("service")) {
721     LocationRecorder location(root_location,
722                               FileDescriptorProto::kServiceFieldNumber,
723                               file->service_size());
724     return ParseServiceDefinition(file->add_service(), location, file);
725   } else if (LookingAt("extend")) {
726     LocationRecorder location(root_location,
727                               FileDescriptorProto::kExtensionFieldNumber);
728     return ParseExtend(
729         file->mutable_extension(), file->mutable_message_type(), root_location,
730         FileDescriptorProto::kMessageTypeFieldNumber, location, file);
731   } else if (LookingAt("import")) {
732     return ParseImport(file->mutable_dependency(),
733                        file->mutable_public_dependency(),
734                        file->mutable_weak_dependency(), root_location, file);
735   } else if (LookingAt("package")) {
736     return ParsePackage(file, root_location, file);
737   } else if (LookingAt("option")) {
738     LocationRecorder location(root_location,
739                               FileDescriptorProto::kOptionsFieldNumber);
740     return ParseOption(file->mutable_options(), location, file,
741                        OPTION_STATEMENT);
742   } else {
743     AddError("Expected top-level statement (e.g. \"message\").");
744     return false;
745   }
746 }
747 
748 // -------------------------------------------------------------------
749 // Messages
750 
ParseMessageDefinition(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)751 bool Parser::ParseMessageDefinition(
752     DescriptorProto* message, const LocationRecorder& message_location,
753     const FileDescriptorProto* containing_file) {
754   DO(Consume("message"));
755   {
756     LocationRecorder location(message_location,
757                               DescriptorProto::kNameFieldNumber);
758     location.RecordLegacyLocation(message,
759                                   DescriptorPool::ErrorCollector::NAME);
760     DO(ConsumeIdentifier(message->mutable_name(), "Expected message name."));
761     if (!IsUpperCamelCase(message->name())) {
762       AddWarning(
763           "Message name should be in UpperCamelCase. Found: " +
764           message->name() +
765           ". See https://developers.google.com/protocol-buffers/docs/style");
766     }
767   }
768   DO(ParseMessageBlock(message, message_location, containing_file));
769   return true;
770 }
771 
772 namespace {
773 
774 const int kMaxRangeSentinel = -1;
775 
IsMessageSetWireFormatMessage(const DescriptorProto & message)776 bool IsMessageSetWireFormatMessage(const DescriptorProto& message) {
777   const MessageOptions& options = message.options();
778   for (int i = 0; i < options.uninterpreted_option_size(); ++i) {
779     const UninterpretedOption& uninterpreted = options.uninterpreted_option(i);
780     if (uninterpreted.name_size() == 1 &&
781         uninterpreted.name(0).name_part() == "message_set_wire_format" &&
782         uninterpreted.identifier_value() == "true") {
783       return true;
784     }
785   }
786   return false;
787 }
788 
789 // Modifies any extension ranges that specified 'max' as the end of the
790 // extension range, and sets them to the type-specific maximum. The actual max
791 // tag number can only be determined after all options have been parsed.
AdjustExtensionRangesWithMaxEndNumber(DescriptorProto * message)792 void AdjustExtensionRangesWithMaxEndNumber(DescriptorProto* message) {
793   const bool is_message_set = IsMessageSetWireFormatMessage(*message);
794   const int max_extension_number =
795       is_message_set ? kint32max : FieldDescriptor::kMaxNumber + 1;
796   for (int i = 0; i < message->extension_range_size(); ++i) {
797     if (message->extension_range(i).end() == kMaxRangeSentinel) {
798       message->mutable_extension_range(i)->set_end(max_extension_number);
799     }
800   }
801 }
802 
803 // Modifies any reserved ranges that specified 'max' as the end of the
804 // reserved range, and sets them to the type-specific maximum. The actual max
805 // tag number can only be determined after all options have been parsed.
AdjustReservedRangesWithMaxEndNumber(DescriptorProto * message)806 void AdjustReservedRangesWithMaxEndNumber(DescriptorProto* message) {
807   const bool is_message_set = IsMessageSetWireFormatMessage(*message);
808   const int max_field_number =
809       is_message_set ? kint32max : FieldDescriptor::kMaxNumber + 1;
810   for (int i = 0; i < message->reserved_range_size(); ++i) {
811     if (message->reserved_range(i).end() == kMaxRangeSentinel) {
812       message->mutable_reserved_range(i)->set_end(max_field_number);
813     }
814   }
815 }
816 
817 }  // namespace
818 
ParseMessageBlock(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)819 bool Parser::ParseMessageBlock(DescriptorProto* message,
820                                const LocationRecorder& message_location,
821                                const FileDescriptorProto* containing_file) {
822   DO(ConsumeEndOfDeclaration("{", &message_location));
823 
824   while (!TryConsumeEndOfDeclaration("}", NULL)) {
825     if (AtEnd()) {
826       AddError("Reached end of input in message definition (missing '}').");
827       return false;
828     }
829 
830     if (!ParseMessageStatement(message, message_location, containing_file)) {
831       // This statement failed to parse.  Skip it, but keep looping to parse
832       // other statements.
833       SkipStatement();
834     }
835   }
836 
837   if (message->extension_range_size() > 0) {
838     AdjustExtensionRangesWithMaxEndNumber(message);
839   }
840   if (message->reserved_range_size() > 0) {
841     AdjustReservedRangesWithMaxEndNumber(message);
842   }
843   return true;
844 }
845 
ParseMessageStatement(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)846 bool Parser::ParseMessageStatement(DescriptorProto* message,
847                                    const LocationRecorder& message_location,
848                                    const FileDescriptorProto* containing_file) {
849   if (TryConsumeEndOfDeclaration(";", NULL)) {
850     // empty statement; ignore
851     return true;
852   } else if (LookingAt("message")) {
853     LocationRecorder location(message_location,
854                               DescriptorProto::kNestedTypeFieldNumber,
855                               message->nested_type_size());
856     return ParseMessageDefinition(message->add_nested_type(), location,
857                                   containing_file);
858   } else if (LookingAt("enum")) {
859     LocationRecorder location(message_location,
860                               DescriptorProto::kEnumTypeFieldNumber,
861                               message->enum_type_size());
862     return ParseEnumDefinition(message->add_enum_type(), location,
863                                containing_file);
864   } else if (LookingAt("extensions")) {
865     LocationRecorder location(message_location,
866                               DescriptorProto::kExtensionRangeFieldNumber);
867     return ParseExtensions(message, location, containing_file);
868   } else if (LookingAt("reserved")) {
869     return ParseReserved(message, message_location);
870   } else if (LookingAt("extend")) {
871     LocationRecorder location(message_location,
872                               DescriptorProto::kExtensionFieldNumber);
873     return ParseExtend(message->mutable_extension(),
874                        message->mutable_nested_type(), message_location,
875                        DescriptorProto::kNestedTypeFieldNumber, location,
876                        containing_file);
877   } else if (LookingAt("option")) {
878     LocationRecorder location(message_location,
879                               DescriptorProto::kOptionsFieldNumber);
880     return ParseOption(message->mutable_options(), location, containing_file,
881                        OPTION_STATEMENT);
882   } else if (LookingAt("oneof")) {
883     int oneof_index = message->oneof_decl_size();
884     LocationRecorder oneof_location(
885         message_location, DescriptorProto::kOneofDeclFieldNumber, oneof_index);
886 
887     return ParseOneof(message->add_oneof_decl(), message, oneof_index,
888                       oneof_location, message_location, containing_file);
889   } else {
890     LocationRecorder location(message_location,
891                               DescriptorProto::kFieldFieldNumber,
892                               message->field_size());
893     return ParseMessageField(
894         message->add_field(), message->mutable_nested_type(), message_location,
895         DescriptorProto::kNestedTypeFieldNumber, location, containing_file);
896   }
897 }
898 
ParseMessageField(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)899 bool Parser::ParseMessageField(FieldDescriptorProto* field,
900                                RepeatedPtrField<DescriptorProto>* messages,
901                                const LocationRecorder& parent_location,
902                                int location_field_number_for_nested_type,
903                                const LocationRecorder& field_location,
904                                const FileDescriptorProto* containing_file) {
905   {
906     LocationRecorder location(field_location,
907                               FieldDescriptorProto::kLabelFieldNumber);
908     FieldDescriptorProto::Label label;
909     if (ParseLabel(&label, containing_file)) {
910       field->set_label(label);
911       if (label == FieldDescriptorProto::LABEL_OPTIONAL &&
912           syntax_identifier_ == "proto3") {
913         AddError(
914             "Explicit 'optional' labels are disallowed in the Proto3 syntax. "
915             "To define 'optional' fields in Proto3, simply remove the "
916             "'optional' label, as fields are 'optional' by default.");
917       }
918     }
919   }
920 
921   return ParseMessageFieldNoLabel(field, messages, parent_location,
922                                   location_field_number_for_nested_type,
923                                   field_location, containing_file);
924 }
925 
ParseMessageFieldNoLabel(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)926 bool Parser::ParseMessageFieldNoLabel(
927     FieldDescriptorProto* field, RepeatedPtrField<DescriptorProto>* messages,
928     const LocationRecorder& parent_location,
929     int location_field_number_for_nested_type,
930     const LocationRecorder& field_location,
931     const FileDescriptorProto* containing_file) {
932   MapField map_field;
933   // Parse type.
934   {
935     LocationRecorder location(field_location);  // add path later
936     location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::TYPE);
937 
938     bool type_parsed = false;
939     FieldDescriptorProto::Type type = FieldDescriptorProto::TYPE_INT32;
940     std::string type_name;
941 
942     // Special case map field. We only treat the field as a map field if the
943     // field type name starts with the word "map" with a following "<".
944     if (TryConsume("map")) {
945       if (LookingAt("<")) {
946         map_field.is_map_field = true;
947       } else {
948         // False positive
949         type_parsed = true;
950         type_name = "map";
951       }
952     }
953     if (map_field.is_map_field) {
954       if (field->has_oneof_index()) {
955         AddError("Map fields are not allowed in oneofs.");
956         return false;
957       }
958       if (field->has_label()) {
959         AddError(
960             "Field labels (required/optional/repeated) are not allowed on "
961             "map fields.");
962         return false;
963       }
964       if (field->has_extendee()) {
965         AddError("Map fields are not allowed to be extensions.");
966         return false;
967       }
968       field->set_label(FieldDescriptorProto::LABEL_REPEATED);
969       DO(Consume("<"));
970       DO(ParseType(&map_field.key_type, &map_field.key_type_name));
971       DO(Consume(","));
972       DO(ParseType(&map_field.value_type, &map_field.value_type_name));
973       DO(Consume(">"));
974       // Defer setting of the type name of the map field until the
975       // field name is parsed. Add the source location though.
976       location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
977     } else {
978       // Handle the case where no explicit label is given for a non-map field.
979       if (!field->has_label() && DefaultToOptionalFields()) {
980         field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
981       }
982       if (!field->has_label()) {
983         AddError("Expected \"required\", \"optional\", or \"repeated\".");
984         // We can actually reasonably recover here by just assuming the user
985         // forgot the label altogether.
986         field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
987       }
988 
989       // Handle the case where the actual type is a message or enum named "map",
990       // which we already consumed in the code above.
991       if (!type_parsed) {
992         DO(ParseType(&type, &type_name));
993       }
994       if (type_name.empty()) {
995         location.AddPath(FieldDescriptorProto::kTypeFieldNumber);
996         field->set_type(type);
997       } else {
998         location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
999         field->set_type_name(type_name);
1000       }
1001     }
1002   }
1003 
1004   // Parse name and '='.
1005   io::Tokenizer::Token name_token = input_->current();
1006   {
1007     LocationRecorder location(field_location,
1008                               FieldDescriptorProto::kNameFieldNumber);
1009     location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::NAME);
1010     DO(ConsumeIdentifier(field->mutable_name(), "Expected field name."));
1011 
1012     if (!IsLowerUnderscore(field->name())) {
1013       AddWarning(
1014           "Field name should be lowercase. Found: " + field->name() +
1015           ". See: https://developers.google.com/protocol-buffers/docs/style");
1016     }
1017     if (IsNumberFollowUnderscore(field->name())) {
1018       AddWarning(
1019           "Number should not come right after an underscore. Found: " +
1020           field->name() +
1021           ". See: https://developers.google.com/protocol-buffers/docs/style");
1022     }
1023   }
1024   DO(Consume("=", "Missing field number."));
1025 
1026   // Parse field number.
1027   {
1028     LocationRecorder location(field_location,
1029                               FieldDescriptorProto::kNumberFieldNumber);
1030     location.RecordLegacyLocation(field,
1031                                   DescriptorPool::ErrorCollector::NUMBER);
1032     int number;
1033     DO(ConsumeInteger(&number, "Expected field number."));
1034     field->set_number(number);
1035   }
1036 
1037   // Parse options.
1038   DO(ParseFieldOptions(field, field_location, containing_file));
1039 
1040   // Deal with groups.
1041   if (field->has_type() && field->type() == FieldDescriptorProto::TYPE_GROUP) {
1042     // Awkward:  Since a group declares both a message type and a field, we
1043     //   have to create overlapping locations.
1044     LocationRecorder group_location(parent_location);
1045     group_location.StartAt(field_location);
1046     group_location.AddPath(location_field_number_for_nested_type);
1047     group_location.AddPath(messages->size());
1048 
1049     DescriptorProto* group = messages->Add();
1050     group->set_name(field->name());
1051 
1052     // Record name location to match the field name's location.
1053     {
1054       LocationRecorder location(group_location,
1055                                 DescriptorProto::kNameFieldNumber);
1056       location.StartAt(name_token);
1057       location.EndAt(name_token);
1058       location.RecordLegacyLocation(group,
1059                                     DescriptorPool::ErrorCollector::NAME);
1060     }
1061 
1062     // The field's type_name also comes from the name.  Confusing!
1063     {
1064       LocationRecorder location(field_location,
1065                                 FieldDescriptorProto::kTypeNameFieldNumber);
1066       location.StartAt(name_token);
1067       location.EndAt(name_token);
1068     }
1069 
1070     // As a hack for backwards-compatibility, we force the group name to start
1071     // with a capital letter and lower-case the field name.  New code should
1072     // not use groups; it should use nested messages.
1073     if (group->name()[0] < 'A' || 'Z' < group->name()[0]) {
1074       AddError(name_token.line, name_token.column,
1075                "Group names must start with a capital letter.");
1076     }
1077     LowerString(field->mutable_name());
1078 
1079     field->set_type_name(group->name());
1080     if (LookingAt("{")) {
1081       DO(ParseMessageBlock(group, group_location, containing_file));
1082     } else {
1083       AddError("Missing group body.");
1084       return false;
1085     }
1086   } else {
1087     DO(ConsumeEndOfDeclaration(";", &field_location));
1088   }
1089 
1090   // Create a map entry type if this is a map field.
1091   if (map_field.is_map_field) {
1092     GenerateMapEntry(map_field, field, messages);
1093   }
1094 
1095   return true;
1096 }
1097 
GenerateMapEntry(const MapField & map_field,FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages)1098 void Parser::GenerateMapEntry(const MapField& map_field,
1099                               FieldDescriptorProto* field,
1100                               RepeatedPtrField<DescriptorProto>* messages) {
1101   DescriptorProto* entry = messages->Add();
1102   std::string entry_name = MapEntryName(field->name());
1103   field->set_type_name(entry_name);
1104   entry->set_name(entry_name);
1105   entry->mutable_options()->set_map_entry(true);
1106   FieldDescriptorProto* key_field = entry->add_field();
1107   key_field->set_name("key");
1108   key_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1109   key_field->set_number(1);
1110   if (map_field.key_type_name.empty()) {
1111     key_field->set_type(map_field.key_type);
1112   } else {
1113     key_field->set_type_name(map_field.key_type_name);
1114   }
1115   FieldDescriptorProto* value_field = entry->add_field();
1116   value_field->set_name("value");
1117   value_field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1118   value_field->set_number(2);
1119   if (map_field.value_type_name.empty()) {
1120     value_field->set_type(map_field.value_type);
1121   } else {
1122     value_field->set_type_name(map_field.value_type_name);
1123   }
1124   // Propagate the "enforce_utf8" option to key and value fields if they
1125   // are strings. This helps simplify the implementation of code generators
1126   // and also reflection-based parsing code.
1127   //
1128   // The following definition:
1129   //   message Foo {
1130   //     map<string, string> value = 1 [enforce_utf8 = false];
1131   //   }
1132   // will be interpreted as:
1133   //   message Foo {
1134   //     message ValueEntry {
1135   //       option map_entry = true;
1136   //       string key = 1 [enforce_utf8 = false];
1137   //       string value = 2 [enforce_utf8 = false];
1138   //     }
1139   //     repeated ValueEntry value = 1 [enforce_utf8 = false];
1140   //  }
1141   //
1142   // TODO(xiaofeng): Remove this when the "enforce_utf8" option is removed
1143   // from protocol compiler.
1144   for (int i = 0; i < field->options().uninterpreted_option_size(); ++i) {
1145     const UninterpretedOption& option =
1146         field->options().uninterpreted_option(i);
1147     if (option.name_size() == 1 &&
1148         option.name(0).name_part() == "enforce_utf8" &&
1149         !option.name(0).is_extension()) {
1150       if (key_field->type() == FieldDescriptorProto::TYPE_STRING) {
1151         key_field->mutable_options()->add_uninterpreted_option()->CopyFrom(
1152             option);
1153       }
1154       if (value_field->type() == FieldDescriptorProto::TYPE_STRING) {
1155         value_field->mutable_options()->add_uninterpreted_option()->CopyFrom(
1156             option);
1157       }
1158     }
1159   }
1160 }
1161 
ParseFieldOptions(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1162 bool Parser::ParseFieldOptions(FieldDescriptorProto* field,
1163                                const LocationRecorder& field_location,
1164                                const FileDescriptorProto* containing_file) {
1165   if (!LookingAt("[")) return true;
1166 
1167   LocationRecorder location(field_location,
1168                             FieldDescriptorProto::kOptionsFieldNumber);
1169 
1170   DO(Consume("["));
1171 
1172   // Parse field options.
1173   do {
1174     if (LookingAt("default")) {
1175       // We intentionally pass field_location rather than location here, since
1176       // the default value is not actually an option.
1177       DO(ParseDefaultAssignment(field, field_location, containing_file));
1178     } else if (LookingAt("json_name")) {
1179       // Like default value, this "json_name" is not an actual option.
1180       DO(ParseJsonName(field, field_location, containing_file));
1181     } else {
1182       DO(ParseOption(field->mutable_options(), location, containing_file,
1183                      OPTION_ASSIGNMENT));
1184     }
1185   } while (TryConsume(","));
1186 
1187   DO(Consume("]"));
1188   return true;
1189 }
1190 
ParseDefaultAssignment(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1191 bool Parser::ParseDefaultAssignment(
1192     FieldDescriptorProto* field, const LocationRecorder& field_location,
1193     const FileDescriptorProto* containing_file) {
1194   if (field->has_default_value()) {
1195     AddError("Already set option \"default\".");
1196     field->clear_default_value();
1197   }
1198 
1199   DO(Consume("default"));
1200   DO(Consume("="));
1201 
1202   LocationRecorder location(field_location,
1203                             FieldDescriptorProto::kDefaultValueFieldNumber);
1204   location.RecordLegacyLocation(field,
1205                                 DescriptorPool::ErrorCollector::DEFAULT_VALUE);
1206   std::string* default_value = field->mutable_default_value();
1207 
1208   if (!field->has_type()) {
1209     // The field has a type name, but we don't know if it is a message or an
1210     // enum yet. (If it were a primitive type, |field| would have a type set
1211     // already.) In this case, simply take the current string as the default
1212     // value; we will catch the error later if it is not a valid enum value.
1213     // (N.B. that we do not check whether the current token is an identifier:
1214     // doing so throws strange errors when the user mistypes a primitive
1215     // typename and we assume it's an enum. E.g.: "optional int foo = 1 [default
1216     // = 42]". In such a case the fundamental error is really that "int" is not
1217     // a type, not that "42" is not an identifier. See b/12533582.)
1218     *default_value = input_->current().text;
1219     input_->Next();
1220     return true;
1221   }
1222 
1223   switch (field->type()) {
1224     case FieldDescriptorProto::TYPE_INT32:
1225     case FieldDescriptorProto::TYPE_INT64:
1226     case FieldDescriptorProto::TYPE_SINT32:
1227     case FieldDescriptorProto::TYPE_SINT64:
1228     case FieldDescriptorProto::TYPE_SFIXED32:
1229     case FieldDescriptorProto::TYPE_SFIXED64: {
1230       uint64 max_value = kint64max;
1231       if (field->type() == FieldDescriptorProto::TYPE_INT32 ||
1232           field->type() == FieldDescriptorProto::TYPE_SINT32 ||
1233           field->type() == FieldDescriptorProto::TYPE_SFIXED32) {
1234         max_value = kint32max;
1235       }
1236 
1237       // These types can be negative.
1238       if (TryConsume("-")) {
1239         default_value->append("-");
1240         // Two's complement always has one more negative value than positive.
1241         ++max_value;
1242       }
1243       // Parse the integer to verify that it is not out-of-range.
1244       uint64 value;
1245       DO(ConsumeInteger64(max_value, &value,
1246                           "Expected integer for field default value."));
1247       // And stringify it again.
1248       default_value->append(StrCat(value));
1249       break;
1250     }
1251 
1252     case FieldDescriptorProto::TYPE_UINT32:
1253     case FieldDescriptorProto::TYPE_UINT64:
1254     case FieldDescriptorProto::TYPE_FIXED32:
1255     case FieldDescriptorProto::TYPE_FIXED64: {
1256       uint64 max_value = kuint64max;
1257       if (field->type() == FieldDescriptorProto::TYPE_UINT32 ||
1258           field->type() == FieldDescriptorProto::TYPE_FIXED32) {
1259         max_value = kuint32max;
1260       }
1261 
1262       // Numeric, not negative.
1263       if (TryConsume("-")) {
1264         AddError("Unsigned field can't have negative default value.");
1265       }
1266       // Parse the integer to verify that it is not out-of-range.
1267       uint64 value;
1268       DO(ConsumeInteger64(max_value, &value,
1269                           "Expected integer for field default value."));
1270       // And stringify it again.
1271       default_value->append(StrCat(value));
1272       break;
1273     }
1274 
1275     case FieldDescriptorProto::TYPE_FLOAT:
1276     case FieldDescriptorProto::TYPE_DOUBLE:
1277       // These types can be negative.
1278       if (TryConsume("-")) {
1279         default_value->append("-");
1280       }
1281       // Parse the integer because we have to convert hex integers to decimal
1282       // floats.
1283       double value;
1284       DO(ConsumeNumber(&value, "Expected number."));
1285       // And stringify it again.
1286       default_value->append(SimpleDtoa(value));
1287       break;
1288 
1289     case FieldDescriptorProto::TYPE_BOOL:
1290       if (TryConsume("true")) {
1291         default_value->assign("true");
1292       } else if (TryConsume("false")) {
1293         default_value->assign("false");
1294       } else {
1295         AddError("Expected \"true\" or \"false\".");
1296         return false;
1297       }
1298       break;
1299 
1300     case FieldDescriptorProto::TYPE_STRING:
1301       // Note: When file opton java_string_check_utf8 is true, if a
1302       // non-string representation (eg byte[]) is later supported, it must
1303       // be checked for UTF-8-ness.
1304       DO(ConsumeString(default_value,
1305                        "Expected string for field default "
1306                        "value."));
1307       break;
1308 
1309     case FieldDescriptorProto::TYPE_BYTES:
1310       DO(ConsumeString(default_value, "Expected string."));
1311       *default_value = CEscape(*default_value);
1312       break;
1313 
1314     case FieldDescriptorProto::TYPE_ENUM:
1315       DO(ConsumeIdentifier(default_value,
1316                            "Expected enum identifier for field "
1317                            "default value."));
1318       break;
1319 
1320     case FieldDescriptorProto::TYPE_MESSAGE:
1321     case FieldDescriptorProto::TYPE_GROUP:
1322       AddError("Messages can't have default values.");
1323       return false;
1324   }
1325 
1326   return true;
1327 }
1328 
ParseJsonName(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)1329 bool Parser::ParseJsonName(FieldDescriptorProto* field,
1330                            const LocationRecorder& field_location,
1331                            const FileDescriptorProto* containing_file) {
1332   if (field->has_json_name()) {
1333     AddError("Already set option \"json_name\".");
1334     field->clear_json_name();
1335   }
1336 
1337   LocationRecorder location(field_location,
1338                             FieldDescriptorProto::kJsonNameFieldNumber);
1339   location.RecordLegacyLocation(field,
1340                                 DescriptorPool::ErrorCollector::OPTION_NAME);
1341 
1342   DO(Consume("json_name"));
1343   DO(Consume("="));
1344 
1345   LocationRecorder value_location(location);
1346   value_location.RecordLegacyLocation(
1347       field, DescriptorPool::ErrorCollector::OPTION_VALUE);
1348 
1349   DO(ConsumeString(field->mutable_json_name(),
1350                    "Expected string for JSON name."));
1351   return true;
1352 }
1353 
ParseOptionNamePart(UninterpretedOption * uninterpreted_option,const LocationRecorder & part_location,const FileDescriptorProto * containing_file)1354 bool Parser::ParseOptionNamePart(UninterpretedOption* uninterpreted_option,
1355                                  const LocationRecorder& part_location,
1356                                  const FileDescriptorProto* containing_file) {
1357   UninterpretedOption::NamePart* name = uninterpreted_option->add_name();
1358   std::string identifier;  // We parse identifiers into this string.
1359   if (LookingAt("(")) {    // This is an extension.
1360     DO(Consume("("));
1361 
1362     {
1363       LocationRecorder location(
1364           part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1365       // An extension name consists of dot-separated identifiers, and may begin
1366       // with a dot.
1367       if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1368         DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1369         name->mutable_name_part()->append(identifier);
1370       }
1371       while (LookingAt(".")) {
1372         DO(Consume("."));
1373         name->mutable_name_part()->append(".");
1374         DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1375         name->mutable_name_part()->append(identifier);
1376       }
1377     }
1378 
1379     DO(Consume(")"));
1380     name->set_is_extension(true);
1381   } else {  // This is a regular field.
1382     LocationRecorder location(
1383         part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
1384     DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1385     name->mutable_name_part()->append(identifier);
1386     name->set_is_extension(false);
1387   }
1388   return true;
1389 }
1390 
ParseUninterpretedBlock(std::string * value)1391 bool Parser::ParseUninterpretedBlock(std::string* value) {
1392   // Note that enclosing braces are not added to *value.
1393   // We do NOT use ConsumeEndOfStatement for this brace because it's delimiting
1394   // an expression, not a block of statements.
1395   DO(Consume("{"));
1396   int brace_depth = 1;
1397   while (!AtEnd()) {
1398     if (LookingAt("{")) {
1399       brace_depth++;
1400     } else if (LookingAt("}")) {
1401       brace_depth--;
1402       if (brace_depth == 0) {
1403         input_->Next();
1404         return true;
1405       }
1406     }
1407     // TODO(sanjay): Interpret line/column numbers to preserve formatting
1408     if (!value->empty()) value->push_back(' ');
1409     value->append(input_->current().text);
1410     input_->Next();
1411   }
1412   AddError("Unexpected end of stream while parsing aggregate value.");
1413   return false;
1414 }
1415 
1416 // We don't interpret the option here. Instead we store it in an
1417 // UninterpretedOption, to be interpreted later.
ParseOption(Message * options,const LocationRecorder & options_location,const FileDescriptorProto * containing_file,OptionStyle style)1418 bool Parser::ParseOption(Message* options,
1419                          const LocationRecorder& options_location,
1420                          const FileDescriptorProto* containing_file,
1421                          OptionStyle style) {
1422   // Create an entry in the uninterpreted_option field.
1423   const FieldDescriptor* uninterpreted_option_field =
1424       options->GetDescriptor()->FindFieldByName("uninterpreted_option");
1425   GOOGLE_CHECK(uninterpreted_option_field != NULL)
1426       << "No field named \"uninterpreted_option\" in the Options proto.";
1427 
1428   const Reflection* reflection = options->GetReflection();
1429 
1430   LocationRecorder location(
1431       options_location, uninterpreted_option_field->number(),
1432       reflection->FieldSize(*options, uninterpreted_option_field));
1433 
1434   if (style == OPTION_STATEMENT) {
1435     DO(Consume("option"));
1436   }
1437 
1438   UninterpretedOption* uninterpreted_option =
1439       down_cast<UninterpretedOption*>(options->GetReflection()->AddMessage(
1440           options, uninterpreted_option_field));
1441 
1442   // Parse dot-separated name.
1443   {
1444     LocationRecorder name_location(location,
1445                                    UninterpretedOption::kNameFieldNumber);
1446     name_location.RecordLegacyLocation(
1447         uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_NAME);
1448 
1449     {
1450       LocationRecorder part_location(name_location,
1451                                      uninterpreted_option->name_size());
1452       DO(ParseOptionNamePart(uninterpreted_option, part_location,
1453                              containing_file));
1454     }
1455 
1456     while (LookingAt(".")) {
1457       DO(Consume("."));
1458       LocationRecorder part_location(name_location,
1459                                      uninterpreted_option->name_size());
1460       DO(ParseOptionNamePart(uninterpreted_option, part_location,
1461                              containing_file));
1462     }
1463   }
1464 
1465   DO(Consume("="));
1466 
1467   {
1468     LocationRecorder value_location(location);
1469     value_location.RecordLegacyLocation(
1470         uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_VALUE);
1471 
1472     // All values are a single token, except for negative numbers, which consist
1473     // of a single '-' symbol, followed by a positive number.
1474     bool is_negative = TryConsume("-");
1475 
1476     switch (input_->current().type) {
1477       case io::Tokenizer::TYPE_START:
1478         GOOGLE_LOG(FATAL) << "Trying to read value before any tokens have been read.";
1479         return false;
1480 
1481       case io::Tokenizer::TYPE_END:
1482         AddError("Unexpected end of stream while parsing option value.");
1483         return false;
1484 
1485       case io::Tokenizer::TYPE_IDENTIFIER: {
1486         value_location.AddPath(
1487             UninterpretedOption::kIdentifierValueFieldNumber);
1488         if (is_negative) {
1489           AddError("Invalid '-' symbol before identifier.");
1490           return false;
1491         }
1492         std::string value;
1493         DO(ConsumeIdentifier(&value, "Expected identifier."));
1494         uninterpreted_option->set_identifier_value(value);
1495         break;
1496       }
1497 
1498       case io::Tokenizer::TYPE_INTEGER: {
1499         uint64 value;
1500         uint64 max_value =
1501             is_negative ? static_cast<uint64>(kint64max) + 1 : kuint64max;
1502         DO(ConsumeInteger64(max_value, &value, "Expected integer."));
1503         if (is_negative) {
1504           value_location.AddPath(
1505               UninterpretedOption::kNegativeIntValueFieldNumber);
1506           uninterpreted_option->set_negative_int_value(
1507               static_cast<int64>(-value));
1508         } else {
1509           value_location.AddPath(
1510               UninterpretedOption::kPositiveIntValueFieldNumber);
1511           uninterpreted_option->set_positive_int_value(value);
1512         }
1513         break;
1514       }
1515 
1516       case io::Tokenizer::TYPE_FLOAT: {
1517         value_location.AddPath(UninterpretedOption::kDoubleValueFieldNumber);
1518         double value;
1519         DO(ConsumeNumber(&value, "Expected number."));
1520         uninterpreted_option->set_double_value(is_negative ? -value : value);
1521         break;
1522       }
1523 
1524       case io::Tokenizer::TYPE_STRING: {
1525         value_location.AddPath(UninterpretedOption::kStringValueFieldNumber);
1526         if (is_negative) {
1527           AddError("Invalid '-' symbol before string.");
1528           return false;
1529         }
1530         std::string value;
1531         DO(ConsumeString(&value, "Expected string."));
1532         uninterpreted_option->set_string_value(value);
1533         break;
1534       }
1535 
1536       case io::Tokenizer::TYPE_SYMBOL:
1537         if (LookingAt("{")) {
1538           value_location.AddPath(
1539               UninterpretedOption::kAggregateValueFieldNumber);
1540           DO(ParseUninterpretedBlock(
1541               uninterpreted_option->mutable_aggregate_value()));
1542         } else {
1543           AddError("Expected option value.");
1544           return false;
1545         }
1546         break;
1547     }
1548   }
1549 
1550   if (style == OPTION_STATEMENT) {
1551     DO(ConsumeEndOfDeclaration(";", &location));
1552   }
1553 
1554   return true;
1555 }
1556 
ParseExtensions(DescriptorProto * message,const LocationRecorder & extensions_location,const FileDescriptorProto * containing_file)1557 bool Parser::ParseExtensions(DescriptorProto* message,
1558                              const LocationRecorder& extensions_location,
1559                              const FileDescriptorProto* containing_file) {
1560   // Parse the declaration.
1561   DO(Consume("extensions"));
1562 
1563   int old_range_size = message->extension_range_size();
1564 
1565   do {
1566     // Note that kExtensionRangeFieldNumber was already pushed by the parent.
1567     LocationRecorder location(extensions_location,
1568                               message->extension_range_size());
1569 
1570     DescriptorProto::ExtensionRange* range = message->add_extension_range();
1571     location.RecordLegacyLocation(range,
1572                                   DescriptorPool::ErrorCollector::NUMBER);
1573 
1574     int start, end;
1575     io::Tokenizer::Token start_token;
1576 
1577     {
1578       LocationRecorder start_location(
1579           location, DescriptorProto::ExtensionRange::kStartFieldNumber);
1580       start_token = input_->current();
1581       DO(ConsumeInteger(&start, "Expected field number range."));
1582     }
1583 
1584     if (TryConsume("to")) {
1585       LocationRecorder end_location(
1586           location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1587       if (TryConsume("max")) {
1588         // Set to the sentinel value - 1 since we increment the value below.
1589         // The actual value of the end of the range should be set with
1590         // AdjustExtensionRangesWithMaxEndNumber.
1591         end = kMaxRangeSentinel - 1;
1592       } else {
1593         DO(ConsumeInteger(&end, "Expected integer."));
1594       }
1595     } else {
1596       LocationRecorder end_location(
1597           location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1598       end_location.StartAt(start_token);
1599       end_location.EndAt(start_token);
1600       end = start;
1601     }
1602 
1603     // Users like to specify inclusive ranges, but in code we like the end
1604     // number to be exclusive.
1605     ++end;
1606 
1607     range->set_start(start);
1608     range->set_end(end);
1609   } while (TryConsume(","));
1610 
1611   if (LookingAt("[")) {
1612     int range_number_index = extensions_location.CurrentPathSize();
1613     SourceCodeInfo info;
1614 
1615     // Parse extension range options in the first range.
1616     ExtensionRangeOptions* options =
1617         message->mutable_extension_range(old_range_size)->mutable_options();
1618 
1619     {
1620       LocationRecorder index_location(
1621           extensions_location, 0 /* we fill this in w/ actual index below */,
1622           &info);
1623       LocationRecorder location(
1624           index_location, DescriptorProto::ExtensionRange::kOptionsFieldNumber);
1625       DO(Consume("["));
1626 
1627       do {
1628         DO(ParseOption(options, location, containing_file, OPTION_ASSIGNMENT));
1629       } while (TryConsume(","));
1630 
1631       DO(Consume("]"));
1632     }
1633 
1634     // Then copy the extension range options to all of the other ranges we've
1635     // parsed.
1636     for (int i = old_range_size + 1; i < message->extension_range_size(); i++) {
1637       message->mutable_extension_range(i)->mutable_options()->CopyFrom(
1638           *options);
1639     }
1640     // and copy source locations to the other ranges, too
1641     for (int i = old_range_size; i < message->extension_range_size(); i++) {
1642       for (int j = 0; j < info.location_size(); j++) {
1643         if (info.location(j).path_size() == range_number_index + 1) {
1644           // this location's path is up to the extension range index, but
1645           // doesn't include options; so it's redundant with location above
1646           continue;
1647         }
1648         SourceCodeInfo_Location* dest = source_code_info_->add_location();
1649         *dest = info.location(j);
1650         dest->set_path(range_number_index, i);
1651       }
1652     }
1653   }
1654 
1655   DO(ConsumeEndOfDeclaration(";", &extensions_location));
1656   return true;
1657 }
1658 
1659 // This is similar to extension range parsing, except that it accepts field
1660 // name literals.
ParseReserved(DescriptorProto * message,const LocationRecorder & message_location)1661 bool Parser::ParseReserved(DescriptorProto* message,
1662                            const LocationRecorder& message_location) {
1663   io::Tokenizer::Token start_token = input_->current();
1664   // Parse the declaration.
1665   DO(Consume("reserved"));
1666   if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1667     LocationRecorder location(message_location,
1668                               DescriptorProto::kReservedNameFieldNumber);
1669     location.StartAt(start_token);
1670     return ParseReservedNames(message, location);
1671   } else {
1672     LocationRecorder location(message_location,
1673                               DescriptorProto::kReservedRangeFieldNumber);
1674     location.StartAt(start_token);
1675     return ParseReservedNumbers(message, location);
1676   }
1677 }
1678 
ParseReservedNames(DescriptorProto * message,const LocationRecorder & parent_location)1679 bool Parser::ParseReservedNames(DescriptorProto* message,
1680                                 const LocationRecorder& parent_location) {
1681   do {
1682     LocationRecorder location(parent_location, message->reserved_name_size());
1683     DO(ConsumeString(message->add_reserved_name(), "Expected field name."));
1684   } while (TryConsume(","));
1685   DO(ConsumeEndOfDeclaration(";", &parent_location));
1686   return true;
1687 }
1688 
ParseReservedNumbers(DescriptorProto * message,const LocationRecorder & parent_location)1689 bool Parser::ParseReservedNumbers(DescriptorProto* message,
1690                                   const LocationRecorder& parent_location) {
1691   bool first = true;
1692   do {
1693     LocationRecorder location(parent_location, message->reserved_range_size());
1694 
1695     DescriptorProto::ReservedRange* range = message->add_reserved_range();
1696     int start, end;
1697     io::Tokenizer::Token start_token;
1698     {
1699       LocationRecorder start_location(
1700           location, DescriptorProto::ReservedRange::kStartFieldNumber);
1701       start_token = input_->current();
1702       DO(ConsumeInteger(&start, (first ? "Expected field name or number range."
1703                                        : "Expected field number range.")));
1704     }
1705 
1706     if (TryConsume("to")) {
1707       LocationRecorder end_location(
1708           location, DescriptorProto::ReservedRange::kEndFieldNumber);
1709       if (TryConsume("max")) {
1710         // Set to the sentinel value - 1 since we increment the value below.
1711         // The actual value of the end of the range should be set with
1712         // AdjustExtensionRangesWithMaxEndNumber.
1713         end = kMaxRangeSentinel - 1;
1714       } else {
1715         DO(ConsumeInteger(&end, "Expected integer."));
1716       }
1717     } else {
1718       LocationRecorder end_location(
1719           location, DescriptorProto::ReservedRange::kEndFieldNumber);
1720       end_location.StartAt(start_token);
1721       end_location.EndAt(start_token);
1722       end = start;
1723     }
1724 
1725     // Users like to specify inclusive ranges, but in code we like the end
1726     // number to be exclusive.
1727     ++end;
1728 
1729     range->set_start(start);
1730     range->set_end(end);
1731     first = false;
1732   } while (TryConsume(","));
1733 
1734   DO(ConsumeEndOfDeclaration(";", &parent_location));
1735   return true;
1736 }
1737 
ParseReserved(EnumDescriptorProto * message,const LocationRecorder & message_location)1738 bool Parser::ParseReserved(EnumDescriptorProto* message,
1739                            const LocationRecorder& message_location) {
1740   io::Tokenizer::Token start_token = input_->current();
1741   // Parse the declaration.
1742   DO(Consume("reserved"));
1743   if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1744     LocationRecorder location(message_location,
1745                               DescriptorProto::kReservedNameFieldNumber);
1746     location.StartAt(start_token);
1747     return ParseReservedNames(message, location);
1748   } else {
1749     LocationRecorder location(message_location,
1750                               DescriptorProto::kReservedRangeFieldNumber);
1751     location.StartAt(start_token);
1752     return ParseReservedNumbers(message, location);
1753   }
1754 }
1755 
ParseReservedNames(EnumDescriptorProto * message,const LocationRecorder & parent_location)1756 bool Parser::ParseReservedNames(EnumDescriptorProto* message,
1757                                 const LocationRecorder& parent_location) {
1758   do {
1759     LocationRecorder location(parent_location, message->reserved_name_size());
1760     DO(ConsumeString(message->add_reserved_name(), "Expected enum value."));
1761   } while (TryConsume(","));
1762   DO(ConsumeEndOfDeclaration(";", &parent_location));
1763   return true;
1764 }
1765 
ParseReservedNumbers(EnumDescriptorProto * message,const LocationRecorder & parent_location)1766 bool Parser::ParseReservedNumbers(EnumDescriptorProto* message,
1767                                   const LocationRecorder& parent_location) {
1768   bool first = true;
1769   do {
1770     LocationRecorder location(parent_location, message->reserved_range_size());
1771 
1772     EnumDescriptorProto::EnumReservedRange* range =
1773         message->add_reserved_range();
1774     int start, end;
1775     io::Tokenizer::Token start_token;
1776     {
1777       LocationRecorder start_location(
1778           location, EnumDescriptorProto::EnumReservedRange::kStartFieldNumber);
1779       start_token = input_->current();
1780       DO(ConsumeSignedInteger(&start,
1781                               (first ? "Expected enum value or number range."
1782                                      : "Expected enum number range.")));
1783     }
1784 
1785     if (TryConsume("to")) {
1786       LocationRecorder end_location(
1787           location, EnumDescriptorProto::EnumReservedRange::kEndFieldNumber);
1788       if (TryConsume("max")) {
1789         // This is in the enum descriptor path, which doesn't have the message
1790         // set duality to fix up, so it doesn't integrate with the sentinel.
1791         end = INT_MAX;
1792       } else {
1793         DO(ConsumeSignedInteger(&end, "Expected integer."));
1794       }
1795     } else {
1796       LocationRecorder end_location(
1797           location, EnumDescriptorProto::EnumReservedRange::kEndFieldNumber);
1798       end_location.StartAt(start_token);
1799       end_location.EndAt(start_token);
1800       end = start;
1801     }
1802 
1803     range->set_start(start);
1804     range->set_end(end);
1805     first = false;
1806   } while (TryConsume(","));
1807 
1808   DO(ConsumeEndOfDeclaration(";", &parent_location));
1809   return true;
1810 }
1811 
ParseExtend(RepeatedPtrField<FieldDescriptorProto> * extensions,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & extend_location,const FileDescriptorProto * containing_file)1812 bool Parser::ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
1813                          RepeatedPtrField<DescriptorProto>* messages,
1814                          const LocationRecorder& parent_location,
1815                          int location_field_number_for_nested_type,
1816                          const LocationRecorder& extend_location,
1817                          const FileDescriptorProto* containing_file) {
1818   DO(Consume("extend"));
1819 
1820   // Parse the extendee type.
1821   io::Tokenizer::Token extendee_start = input_->current();
1822   std::string extendee;
1823   DO(ParseUserDefinedType(&extendee));
1824   io::Tokenizer::Token extendee_end = input_->previous();
1825 
1826   // Parse the block.
1827   DO(ConsumeEndOfDeclaration("{", &extend_location));
1828 
1829   bool is_first = true;
1830 
1831   do {
1832     if (AtEnd()) {
1833       AddError("Reached end of input in extend definition (missing '}').");
1834       return false;
1835     }
1836 
1837     // Note that kExtensionFieldNumber was already pushed by the parent.
1838     LocationRecorder location(extend_location, extensions->size());
1839 
1840     FieldDescriptorProto* field = extensions->Add();
1841 
1842     {
1843       LocationRecorder extendee_location(
1844           location, FieldDescriptorProto::kExtendeeFieldNumber);
1845       extendee_location.StartAt(extendee_start);
1846       extendee_location.EndAt(extendee_end);
1847 
1848       if (is_first) {
1849         extendee_location.RecordLegacyLocation(
1850             field, DescriptorPool::ErrorCollector::EXTENDEE);
1851         is_first = false;
1852       }
1853     }
1854 
1855     field->set_extendee(extendee);
1856 
1857     if (!ParseMessageField(field, messages, parent_location,
1858                            location_field_number_for_nested_type, location,
1859                            containing_file)) {
1860       // This statement failed to parse.  Skip it, but keep looping to parse
1861       // other statements.
1862       SkipStatement();
1863     }
1864   } while (!TryConsumeEndOfDeclaration("}", NULL));
1865 
1866   return true;
1867 }
1868 
ParseOneof(OneofDescriptorProto * oneof_decl,DescriptorProto * containing_type,int oneof_index,const LocationRecorder & oneof_location,const LocationRecorder & containing_type_location,const FileDescriptorProto * containing_file)1869 bool Parser::ParseOneof(OneofDescriptorProto* oneof_decl,
1870                         DescriptorProto* containing_type, int oneof_index,
1871                         const LocationRecorder& oneof_location,
1872                         const LocationRecorder& containing_type_location,
1873                         const FileDescriptorProto* containing_file) {
1874   DO(Consume("oneof"));
1875 
1876   {
1877     LocationRecorder name_location(oneof_location,
1878                                    OneofDescriptorProto::kNameFieldNumber);
1879     DO(ConsumeIdentifier(oneof_decl->mutable_name(), "Expected oneof name."));
1880   }
1881 
1882   DO(ConsumeEndOfDeclaration("{", &oneof_location));
1883 
1884   do {
1885     if (AtEnd()) {
1886       AddError("Reached end of input in oneof definition (missing '}').");
1887       return false;
1888     }
1889 
1890     if (LookingAt("option")) {
1891       LocationRecorder option_location(
1892           oneof_location, OneofDescriptorProto::kOptionsFieldNumber);
1893       if (!ParseOption(oneof_decl->mutable_options(), option_location,
1894                        containing_file, OPTION_STATEMENT)) {
1895         return false;
1896       }
1897       continue;
1898     }
1899 
1900     // Print a nice error if the user accidentally tries to place a label
1901     // on an individual member of a oneof.
1902     if (LookingAt("required") || LookingAt("optional") ||
1903         LookingAt("repeated")) {
1904       AddError(
1905           "Fields in oneofs must not have labels (required / optional "
1906           "/ repeated).");
1907       // We can continue parsing here because we understand what the user
1908       // meant.  The error report will still make parsing fail overall.
1909       input_->Next();
1910     }
1911 
1912     LocationRecorder field_location(containing_type_location,
1913                                     DescriptorProto::kFieldFieldNumber,
1914                                     containing_type->field_size());
1915 
1916     FieldDescriptorProto* field = containing_type->add_field();
1917     field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1918     field->set_oneof_index(oneof_index);
1919 
1920     if (!ParseMessageFieldNoLabel(field, containing_type->mutable_nested_type(),
1921                                   containing_type_location,
1922                                   DescriptorProto::kNestedTypeFieldNumber,
1923                                   field_location, containing_file)) {
1924       // This statement failed to parse.  Skip it, but keep looping to parse
1925       // other statements.
1926       SkipStatement();
1927     }
1928   } while (!TryConsumeEndOfDeclaration("}", NULL));
1929 
1930   return true;
1931 }
1932 
1933 // -------------------------------------------------------------------
1934 // Enums
1935 
ParseEnumDefinition(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1936 bool Parser::ParseEnumDefinition(EnumDescriptorProto* enum_type,
1937                                  const LocationRecorder& enum_location,
1938                                  const FileDescriptorProto* containing_file) {
1939   DO(Consume("enum"));
1940 
1941   {
1942     LocationRecorder location(enum_location,
1943                               EnumDescriptorProto::kNameFieldNumber);
1944     location.RecordLegacyLocation(enum_type,
1945                                   DescriptorPool::ErrorCollector::NAME);
1946     DO(ConsumeIdentifier(enum_type->mutable_name(), "Expected enum name."));
1947   }
1948 
1949   DO(ParseEnumBlock(enum_type, enum_location, containing_file));
1950 
1951   DO(ValidateEnum(enum_type));
1952 
1953   return true;
1954 }
1955 
ParseEnumBlock(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1956 bool Parser::ParseEnumBlock(EnumDescriptorProto* enum_type,
1957                             const LocationRecorder& enum_location,
1958                             const FileDescriptorProto* containing_file) {
1959   DO(ConsumeEndOfDeclaration("{", &enum_location));
1960 
1961   while (!TryConsumeEndOfDeclaration("}", NULL)) {
1962     if (AtEnd()) {
1963       AddError("Reached end of input in enum definition (missing '}').");
1964       return false;
1965     }
1966 
1967     if (!ParseEnumStatement(enum_type, enum_location, containing_file)) {
1968       // This statement failed to parse.  Skip it, but keep looping to parse
1969       // other statements.
1970       SkipStatement();
1971     }
1972   }
1973 
1974   return true;
1975 }
1976 
ParseEnumStatement(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1977 bool Parser::ParseEnumStatement(EnumDescriptorProto* enum_type,
1978                                 const LocationRecorder& enum_location,
1979                                 const FileDescriptorProto* containing_file) {
1980   if (TryConsumeEndOfDeclaration(";", NULL)) {
1981     // empty statement; ignore
1982     return true;
1983   } else if (LookingAt("option")) {
1984     LocationRecorder location(enum_location,
1985                               EnumDescriptorProto::kOptionsFieldNumber);
1986     return ParseOption(enum_type->mutable_options(), location, containing_file,
1987                        OPTION_STATEMENT);
1988   } else if (LookingAt("reserved")) {
1989     return ParseReserved(enum_type, enum_location);
1990   } else {
1991     LocationRecorder location(enum_location,
1992                               EnumDescriptorProto::kValueFieldNumber,
1993                               enum_type->value_size());
1994     return ParseEnumConstant(enum_type->add_value(), location, containing_file);
1995   }
1996 }
1997 
ParseEnumConstant(EnumValueDescriptorProto * enum_value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)1998 bool Parser::ParseEnumConstant(EnumValueDescriptorProto* enum_value,
1999                                const LocationRecorder& enum_value_location,
2000                                const FileDescriptorProto* containing_file) {
2001   // Parse name.
2002   {
2003     LocationRecorder location(enum_value_location,
2004                               EnumValueDescriptorProto::kNameFieldNumber);
2005     location.RecordLegacyLocation(enum_value,
2006                                   DescriptorPool::ErrorCollector::NAME);
2007     DO(ConsumeIdentifier(enum_value->mutable_name(),
2008                          "Expected enum constant name."));
2009   }
2010 
2011   DO(Consume("=", "Missing numeric value for enum constant."));
2012 
2013   // Parse value.
2014   {
2015     LocationRecorder location(enum_value_location,
2016                               EnumValueDescriptorProto::kNumberFieldNumber);
2017     location.RecordLegacyLocation(enum_value,
2018                                   DescriptorPool::ErrorCollector::NUMBER);
2019 
2020     int number;
2021     DO(ConsumeSignedInteger(&number, "Expected integer."));
2022     enum_value->set_number(number);
2023   }
2024 
2025   DO(ParseEnumConstantOptions(enum_value, enum_value_location,
2026                               containing_file));
2027 
2028   DO(ConsumeEndOfDeclaration(";", &enum_value_location));
2029 
2030   return true;
2031 }
2032 
ParseEnumConstantOptions(EnumValueDescriptorProto * value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)2033 bool Parser::ParseEnumConstantOptions(
2034     EnumValueDescriptorProto* value,
2035     const LocationRecorder& enum_value_location,
2036     const FileDescriptorProto* containing_file) {
2037   if (!LookingAt("[")) return true;
2038 
2039   LocationRecorder location(enum_value_location,
2040                             EnumValueDescriptorProto::kOptionsFieldNumber);
2041 
2042   DO(Consume("["));
2043 
2044   do {
2045     DO(ParseOption(value->mutable_options(), location, containing_file,
2046                    OPTION_ASSIGNMENT));
2047   } while (TryConsume(","));
2048 
2049   DO(Consume("]"));
2050   return true;
2051 }
2052 
2053 // -------------------------------------------------------------------
2054 // Services
2055 
ParseServiceDefinition(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2056 bool Parser::ParseServiceDefinition(
2057     ServiceDescriptorProto* service, const LocationRecorder& service_location,
2058     const FileDescriptorProto* containing_file) {
2059   DO(Consume("service"));
2060 
2061   {
2062     LocationRecorder location(service_location,
2063                               ServiceDescriptorProto::kNameFieldNumber);
2064     location.RecordLegacyLocation(service,
2065                                   DescriptorPool::ErrorCollector::NAME);
2066     DO(ConsumeIdentifier(service->mutable_name(), "Expected service name."));
2067   }
2068 
2069   DO(ParseServiceBlock(service, service_location, containing_file));
2070   return true;
2071 }
2072 
ParseServiceBlock(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2073 bool Parser::ParseServiceBlock(ServiceDescriptorProto* service,
2074                                const LocationRecorder& service_location,
2075                                const FileDescriptorProto* containing_file) {
2076   DO(ConsumeEndOfDeclaration("{", &service_location));
2077 
2078   while (!TryConsumeEndOfDeclaration("}", NULL)) {
2079     if (AtEnd()) {
2080       AddError("Reached end of input in service definition (missing '}').");
2081       return false;
2082     }
2083 
2084     if (!ParseServiceStatement(service, service_location, containing_file)) {
2085       // This statement failed to parse.  Skip it, but keep looping to parse
2086       // other statements.
2087       SkipStatement();
2088     }
2089   }
2090 
2091   return true;
2092 }
2093 
ParseServiceStatement(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)2094 bool Parser::ParseServiceStatement(ServiceDescriptorProto* service,
2095                                    const LocationRecorder& service_location,
2096                                    const FileDescriptorProto* containing_file) {
2097   if (TryConsumeEndOfDeclaration(";", NULL)) {
2098     // empty statement; ignore
2099     return true;
2100   } else if (LookingAt("option")) {
2101     LocationRecorder location(service_location,
2102                               ServiceDescriptorProto::kOptionsFieldNumber);
2103     return ParseOption(service->mutable_options(), location, containing_file,
2104                        OPTION_STATEMENT);
2105   } else {
2106     LocationRecorder location(service_location,
2107                               ServiceDescriptorProto::kMethodFieldNumber,
2108                               service->method_size());
2109     return ParseServiceMethod(service->add_method(), location, containing_file);
2110   }
2111 }
2112 
ParseServiceMethod(MethodDescriptorProto * method,const LocationRecorder & method_location,const FileDescriptorProto * containing_file)2113 bool Parser::ParseServiceMethod(MethodDescriptorProto* method,
2114                                 const LocationRecorder& method_location,
2115                                 const FileDescriptorProto* containing_file) {
2116   DO(Consume("rpc"));
2117 
2118   {
2119     LocationRecorder location(method_location,
2120                               MethodDescriptorProto::kNameFieldNumber);
2121     location.RecordLegacyLocation(method, DescriptorPool::ErrorCollector::NAME);
2122     DO(ConsumeIdentifier(method->mutable_name(), "Expected method name."));
2123   }
2124 
2125   // Parse input type.
2126   DO(Consume("("));
2127   {
2128     if (LookingAt("stream")) {
2129       LocationRecorder location(
2130           method_location, MethodDescriptorProto::kClientStreamingFieldNumber);
2131       location.RecordLegacyLocation(method,
2132                                     DescriptorPool::ErrorCollector::OTHER);
2133       method->set_client_streaming(true);
2134       DO(Consume("stream"));
2135 
2136     }
2137     LocationRecorder location(method_location,
2138                               MethodDescriptorProto::kInputTypeFieldNumber);
2139     location.RecordLegacyLocation(method,
2140                                   DescriptorPool::ErrorCollector::INPUT_TYPE);
2141     DO(ParseUserDefinedType(method->mutable_input_type()));
2142   }
2143   DO(Consume(")"));
2144 
2145   // Parse output type.
2146   DO(Consume("returns"));
2147   DO(Consume("("));
2148   {
2149     if (LookingAt("stream")) {
2150       LocationRecorder location(
2151           method_location, MethodDescriptorProto::kServerStreamingFieldNumber);
2152       location.RecordLegacyLocation(method,
2153                                     DescriptorPool::ErrorCollector::OTHER);
2154       DO(Consume("stream"));
2155       method->set_server_streaming(true);
2156 
2157     }
2158     LocationRecorder location(method_location,
2159                               MethodDescriptorProto::kOutputTypeFieldNumber);
2160     location.RecordLegacyLocation(method,
2161                                   DescriptorPool::ErrorCollector::OUTPUT_TYPE);
2162     DO(ParseUserDefinedType(method->mutable_output_type()));
2163   }
2164   DO(Consume(")"));
2165 
2166   if (LookingAt("{")) {
2167     // Options!
2168     DO(ParseMethodOptions(method_location, containing_file,
2169                           MethodDescriptorProto::kOptionsFieldNumber,
2170                           method->mutable_options()));
2171   } else {
2172     DO(ConsumeEndOfDeclaration(";", &method_location));
2173   }
2174 
2175   return true;
2176 }
2177 
2178 
ParseMethodOptions(const LocationRecorder & parent_location,const FileDescriptorProto * containing_file,const int optionsFieldNumber,Message * mutable_options)2179 bool Parser::ParseMethodOptions(const LocationRecorder& parent_location,
2180                                 const FileDescriptorProto* containing_file,
2181                                 const int optionsFieldNumber,
2182                                 Message* mutable_options) {
2183   // Options!
2184   ConsumeEndOfDeclaration("{", &parent_location);
2185   while (!TryConsumeEndOfDeclaration("}", NULL)) {
2186     if (AtEnd()) {
2187       AddError("Reached end of input in method options (missing '}').");
2188       return false;
2189     }
2190 
2191     if (TryConsumeEndOfDeclaration(";", NULL)) {
2192       // empty statement; ignore
2193     } else {
2194       LocationRecorder location(parent_location, optionsFieldNumber);
2195       if (!ParseOption(mutable_options, location, containing_file,
2196                        OPTION_STATEMENT)) {
2197         // This statement failed to parse.  Skip it, but keep looping to
2198         // parse other statements.
2199         SkipStatement();
2200       }
2201     }
2202   }
2203 
2204   return true;
2205 }
2206 
2207 // -------------------------------------------------------------------
2208 
ParseLabel(FieldDescriptorProto::Label * label,const FileDescriptorProto * containing_file)2209 bool Parser::ParseLabel(FieldDescriptorProto::Label* label,
2210                         const FileDescriptorProto* containing_file) {
2211   if (TryConsume("optional")) {
2212     *label = FieldDescriptorProto::LABEL_OPTIONAL;
2213     return true;
2214   } else if (TryConsume("repeated")) {
2215     *label = FieldDescriptorProto::LABEL_REPEATED;
2216     return true;
2217   } else if (TryConsume("required")) {
2218     *label = FieldDescriptorProto::LABEL_REQUIRED;
2219     return true;
2220   }
2221   return false;
2222 }
2223 
ParseType(FieldDescriptorProto::Type * type,std::string * type_name)2224 bool Parser::ParseType(FieldDescriptorProto::Type* type,
2225                        std::string* type_name) {
2226   TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
2227   if (iter != kTypeNames.end()) {
2228     *type = iter->second;
2229     input_->Next();
2230   } else {
2231     DO(ParseUserDefinedType(type_name));
2232   }
2233   return true;
2234 }
2235 
ParseUserDefinedType(std::string * type_name)2236 bool Parser::ParseUserDefinedType(std::string* type_name) {
2237   type_name->clear();
2238 
2239   TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
2240   if (iter != kTypeNames.end()) {
2241     // Note:  The only place enum types are allowed is for field types, but
2242     //   if we are parsing a field type then we would not get here because
2243     //   primitives are allowed there as well.  So this error message doesn't
2244     //   need to account for enums.
2245     AddError("Expected message type.");
2246 
2247     // Pretend to accept this type so that we can go on parsing.
2248     *type_name = input_->current().text;
2249     input_->Next();
2250     return true;
2251   }
2252 
2253   // A leading "." means the name is fully-qualified.
2254   if (TryConsume(".")) type_name->append(".");
2255 
2256   // Consume the first part of the name.
2257   std::string identifier;
2258   DO(ConsumeIdentifier(&identifier, "Expected type name."));
2259   type_name->append(identifier);
2260 
2261   // Consume more parts.
2262   while (TryConsume(".")) {
2263     type_name->append(".");
2264     DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2265     type_name->append(identifier);
2266   }
2267 
2268   return true;
2269 }
2270 
2271 // ===================================================================
2272 
ParsePackage(FileDescriptorProto * file,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)2273 bool Parser::ParsePackage(FileDescriptorProto* file,
2274                           const LocationRecorder& root_location,
2275                           const FileDescriptorProto* containing_file) {
2276   if (file->has_package()) {
2277     AddError("Multiple package definitions.");
2278     // Don't append the new package to the old one.  Just replace it.  Not
2279     // that it really matters since this is an error anyway.
2280     file->clear_package();
2281   }
2282 
2283   LocationRecorder location(root_location,
2284                             FileDescriptorProto::kPackageFieldNumber);
2285   location.RecordLegacyLocation(file, DescriptorPool::ErrorCollector::NAME);
2286 
2287   DO(Consume("package"));
2288 
2289   while (true) {
2290     std::string identifier;
2291     DO(ConsumeIdentifier(&identifier, "Expected identifier."));
2292     file->mutable_package()->append(identifier);
2293     if (!TryConsume(".")) break;
2294     file->mutable_package()->append(".");
2295   }
2296 
2297   DO(ConsumeEndOfDeclaration(";", &location));
2298 
2299   return true;
2300 }
2301 
ParseImport(RepeatedPtrField<std::string> * dependency,RepeatedField<int32> * public_dependency,RepeatedField<int32> * weak_dependency,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)2302 bool Parser::ParseImport(RepeatedPtrField<std::string>* dependency,
2303                          RepeatedField<int32>* public_dependency,
2304                          RepeatedField<int32>* weak_dependency,
2305                          const LocationRecorder& root_location,
2306                          const FileDescriptorProto* containing_file) {
2307   LocationRecorder location(root_location,
2308                             FileDescriptorProto::kDependencyFieldNumber,
2309                             dependency->size());
2310 
2311   DO(Consume("import"));
2312 
2313   if (LookingAt("public")) {
2314     LocationRecorder public_location(
2315         root_location, FileDescriptorProto::kPublicDependencyFieldNumber,
2316         public_dependency->size());
2317     DO(Consume("public"));
2318     *public_dependency->Add() = dependency->size();
2319   } else if (LookingAt("weak")) {
2320     LocationRecorder weak_location(
2321         root_location, FileDescriptorProto::kWeakDependencyFieldNumber,
2322         weak_dependency->size());
2323     weak_location.RecordLegacyImportLocation(containing_file, "weak");
2324     DO(Consume("weak"));
2325     *weak_dependency->Add() = dependency->size();
2326   }
2327 
2328   string import_file;
2329   DO(ConsumeString(&import_file,
2330                    "Expected a string naming the file to import."));
2331   *dependency->Add() = import_file;
2332   location.RecordLegacyImportLocation(containing_file, import_file);
2333 
2334   DO(ConsumeEndOfDeclaration(";", &location));
2335 
2336   return true;
2337 }
2338 
2339 // ===================================================================
2340 
SourceLocationTable()2341 SourceLocationTable::SourceLocationTable() {}
~SourceLocationTable()2342 SourceLocationTable::~SourceLocationTable() {}
2343 
Find(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int * line,int * column) const2344 bool SourceLocationTable::Find(
2345     const Message* descriptor,
2346     DescriptorPool::ErrorCollector::ErrorLocation location, int* line,
2347     int* column) const {
2348   const std::pair<int, int>* result =
2349       FindOrNull(location_map_, std::make_pair(descriptor, location));
2350   if (result == NULL) {
2351     *line = -1;
2352     *column = 0;
2353     return false;
2354   } else {
2355     *line = result->first;
2356     *column = result->second;
2357     return true;
2358   }
2359 }
2360 
FindImport(const Message * descriptor,const string & name,int * line,int * column) const2361 bool SourceLocationTable::FindImport(const Message* descriptor,
2362                                      const string& name, int* line,
2363                                      int* column) const {
2364   const std::pair<int, int>* result =
2365       FindOrNull(import_location_map_, std::make_pair(descriptor, name));
2366   if (result == nullptr) {
2367     *line = -1;
2368     *column = 0;
2369     return false;
2370   } else {
2371     *line = result->first;
2372     *column = result->second;
2373     return true;
2374   }
2375 }
2376 
Add(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int line,int column)2377 void SourceLocationTable::Add(
2378     const Message* descriptor,
2379     DescriptorPool::ErrorCollector::ErrorLocation location, int line,
2380     int column) {
2381   location_map_[std::make_pair(descriptor, location)] =
2382       std::make_pair(line, column);
2383 }
2384 
AddImport(const Message * descriptor,const string & name,int line,int column)2385 void SourceLocationTable::AddImport(const Message* descriptor,
2386                                     const string& name, int line, int column) {
2387   import_location_map_[std::make_pair(descriptor, name)] =
2388       std::make_pair(line, column);
2389 }
2390 
Clear()2391 void SourceLocationTable::Clear() { location_map_.clear(); }
2392 
2393 }  // namespace compiler
2394 }  // namespace protobuf
2395 }  // namespace google
2396