1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // Recursive descent FTW.
36
37 #include <float.h>
38 #include <google/protobuf/stubs/hash.h>
39 #include <limits>
40
41
42 #include <google/protobuf/compiler/parser.h>
43 #include <google/protobuf/descriptor.h>
44 #include <google/protobuf/descriptor.pb.h>
45 #include <google/protobuf/wire_format.h>
46 #include <google/protobuf/io/tokenizer.h>
47 #include <google/protobuf/stubs/common.h>
48 #include <google/protobuf/stubs/strutil.h>
49 #include <google/protobuf/stubs/map_util.h>
50
51 namespace google {
52 namespace protobuf {
53 namespace compiler {
54
55 using internal::WireFormat;
56
57 namespace {
58
59 typedef hash_map<string, FieldDescriptorProto::Type> TypeNameMap;
60
MakeTypeNameTable()61 TypeNameMap MakeTypeNameTable() {
62 TypeNameMap result;
63
64 result["double" ] = FieldDescriptorProto::TYPE_DOUBLE;
65 result["float" ] = FieldDescriptorProto::TYPE_FLOAT;
66 result["uint64" ] = FieldDescriptorProto::TYPE_UINT64;
67 result["fixed64" ] = FieldDescriptorProto::TYPE_FIXED64;
68 result["fixed32" ] = FieldDescriptorProto::TYPE_FIXED32;
69 result["bool" ] = FieldDescriptorProto::TYPE_BOOL;
70 result["string" ] = FieldDescriptorProto::TYPE_STRING;
71 result["group" ] = FieldDescriptorProto::TYPE_GROUP;
72
73 result["bytes" ] = FieldDescriptorProto::TYPE_BYTES;
74 result["uint32" ] = FieldDescriptorProto::TYPE_UINT32;
75 result["sfixed32"] = FieldDescriptorProto::TYPE_SFIXED32;
76 result["sfixed64"] = FieldDescriptorProto::TYPE_SFIXED64;
77 result["int32" ] = FieldDescriptorProto::TYPE_INT32;
78 result["int64" ] = FieldDescriptorProto::TYPE_INT64;
79 result["sint32" ] = FieldDescriptorProto::TYPE_SINT32;
80 result["sint64" ] = FieldDescriptorProto::TYPE_SINT64;
81
82 return result;
83 }
84
85 const TypeNameMap kTypeNames = MakeTypeNameTable();
86
87 } // anonymous namespace
88
89 // Makes code slightly more readable. The meaning of "DO(foo)" is
90 // "Execute foo and fail if it fails.", where failure is indicated by
91 // returning false.
92 #define DO(STATEMENT) if (STATEMENT) {} else return false
93
94 // ===================================================================
95
Parser()96 Parser::Parser()
97 : input_(NULL),
98 error_collector_(NULL),
99 source_location_table_(NULL),
100 had_errors_(false),
101 require_syntax_identifier_(false),
102 stop_after_syntax_identifier_(false) {
103 }
104
~Parser()105 Parser::~Parser() {
106 }
107
108 // ===================================================================
109
LookingAt(const char * text)110 inline bool Parser::LookingAt(const char* text) {
111 return input_->current().text == text;
112 }
113
LookingAtType(io::Tokenizer::TokenType token_type)114 inline bool Parser::LookingAtType(io::Tokenizer::TokenType token_type) {
115 return input_->current().type == token_type;
116 }
117
AtEnd()118 inline bool Parser::AtEnd() {
119 return LookingAtType(io::Tokenizer::TYPE_END);
120 }
121
TryConsume(const char * text)122 bool Parser::TryConsume(const char* text) {
123 if (LookingAt(text)) {
124 input_->Next();
125 return true;
126 } else {
127 return false;
128 }
129 }
130
Consume(const char * text,const char * error)131 bool Parser::Consume(const char* text, const char* error) {
132 if (TryConsume(text)) {
133 return true;
134 } else {
135 AddError(error);
136 return false;
137 }
138 }
139
Consume(const char * text)140 bool Parser::Consume(const char* text) {
141 if (TryConsume(text)) {
142 return true;
143 } else {
144 AddError("Expected \"" + string(text) + "\".");
145 return false;
146 }
147 }
148
ConsumeIdentifier(string * output,const char * error)149 bool Parser::ConsumeIdentifier(string* output, const char* error) {
150 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
151 *output = input_->current().text;
152 input_->Next();
153 return true;
154 } else {
155 AddError(error);
156 return false;
157 }
158 }
159
ConsumeInteger(int * output,const char * error)160 bool Parser::ConsumeInteger(int* output, const char* error) {
161 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
162 uint64 value = 0;
163 if (!io::Tokenizer::ParseInteger(input_->current().text,
164 kint32max, &value)) {
165 AddError("Integer out of range.");
166 // We still return true because we did, in fact, parse an integer.
167 }
168 *output = value;
169 input_->Next();
170 return true;
171 } else {
172 AddError(error);
173 return false;
174 }
175 }
176
ConsumeSignedInteger(int * output,const char * error)177 bool Parser::ConsumeSignedInteger(int* output, const char* error) {
178 bool is_negative = false;
179 uint64 max_value = kint32max;
180 if (TryConsume("-")) {
181 is_negative = true;
182 max_value += 1;
183 }
184 uint64 value = 0;
185 DO(ConsumeInteger64(max_value, &value, error));
186 if (is_negative) value *= -1;
187 *output = value;
188 return true;
189 }
190
ConsumeInteger64(uint64 max_value,uint64 * output,const char * error)191 bool Parser::ConsumeInteger64(uint64 max_value, uint64* output,
192 const char* error) {
193 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
194 if (!io::Tokenizer::ParseInteger(input_->current().text, max_value,
195 output)) {
196 AddError("Integer out of range.");
197 // We still return true because we did, in fact, parse an integer.
198 *output = 0;
199 }
200 input_->Next();
201 return true;
202 } else {
203 AddError(error);
204 return false;
205 }
206 }
207
ConsumeNumber(double * output,const char * error)208 bool Parser::ConsumeNumber(double* output, const char* error) {
209 if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
210 *output = io::Tokenizer::ParseFloat(input_->current().text);
211 input_->Next();
212 return true;
213 } else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
214 // Also accept integers.
215 uint64 value = 0;
216 if (!io::Tokenizer::ParseInteger(input_->current().text,
217 kuint64max, &value)) {
218 AddError("Integer out of range.");
219 // We still return true because we did, in fact, parse a number.
220 }
221 *output = value;
222 input_->Next();
223 return true;
224 } else if (LookingAt("inf")) {
225 *output = numeric_limits<double>::infinity();
226 input_->Next();
227 return true;
228 } else if (LookingAt("nan")) {
229 *output = numeric_limits<double>::quiet_NaN();
230 input_->Next();
231 return true;
232 } else {
233 AddError(error);
234 return false;
235 }
236 }
237
ConsumeString(string * output,const char * error)238 bool Parser::ConsumeString(string* output, const char* error) {
239 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
240 io::Tokenizer::ParseString(input_->current().text, output);
241 input_->Next();
242 // Allow C++ like concatenation of adjacent string tokens.
243 while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
244 io::Tokenizer::ParseStringAppend(input_->current().text, output);
245 input_->Next();
246 }
247 return true;
248 } else {
249 AddError(error);
250 return false;
251 }
252 }
253
TryConsumeEndOfDeclaration(const char * text,const LocationRecorder * location)254 bool Parser::TryConsumeEndOfDeclaration(const char* text,
255 const LocationRecorder* location) {
256 if (LookingAt(text)) {
257 string leading, trailing;
258 input_->NextWithComments(&trailing, NULL, &leading);
259
260 // Save the leading comments for next time, and recall the leading comments
261 // from last time.
262 leading.swap(upcoming_doc_comments_);
263
264 if (location != NULL) {
265 location->AttachComments(&leading, &trailing);
266 }
267 return true;
268 } else {
269 return false;
270 }
271 }
272
ConsumeEndOfDeclaration(const char * text,const LocationRecorder * location)273 bool Parser::ConsumeEndOfDeclaration(const char* text,
274 const LocationRecorder* location) {
275 if (TryConsumeEndOfDeclaration(text, location)) {
276 return true;
277 } else {
278 AddError("Expected \"" + string(text) + "\".");
279 return false;
280 }
281 }
282
283 // -------------------------------------------------------------------
284
AddError(int line,int column,const string & error)285 void Parser::AddError(int line, int column, const string& error) {
286 if (error_collector_ != NULL) {
287 error_collector_->AddError(line, column, error);
288 }
289 had_errors_ = true;
290 }
291
AddError(const string & error)292 void Parser::AddError(const string& error) {
293 AddError(input_->current().line, input_->current().column, error);
294 }
295
296 // -------------------------------------------------------------------
297
LocationRecorder(Parser * parser)298 Parser::LocationRecorder::LocationRecorder(Parser* parser)
299 : parser_(parser),
300 location_(parser_->source_code_info_->add_location()) {
301 location_->add_span(parser_->input_->current().line);
302 location_->add_span(parser_->input_->current().column);
303 }
304
LocationRecorder(const LocationRecorder & parent)305 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent) {
306 Init(parent);
307 }
308
LocationRecorder(const LocationRecorder & parent,int path1)309 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
310 int path1) {
311 Init(parent);
312 AddPath(path1);
313 }
314
LocationRecorder(const LocationRecorder & parent,int path1,int path2)315 Parser::LocationRecorder::LocationRecorder(const LocationRecorder& parent,
316 int path1, int path2) {
317 Init(parent);
318 AddPath(path1);
319 AddPath(path2);
320 }
321
Init(const LocationRecorder & parent)322 void Parser::LocationRecorder::Init(const LocationRecorder& parent) {
323 parser_ = parent.parser_;
324 location_ = parser_->source_code_info_->add_location();
325 location_->mutable_path()->CopyFrom(parent.location_->path());
326
327 location_->add_span(parser_->input_->current().line);
328 location_->add_span(parser_->input_->current().column);
329 }
330
~LocationRecorder()331 Parser::LocationRecorder::~LocationRecorder() {
332 if (location_->span_size() <= 2) {
333 EndAt(parser_->input_->previous());
334 }
335 }
336
AddPath(int path_component)337 void Parser::LocationRecorder::AddPath(int path_component) {
338 location_->add_path(path_component);
339 }
340
StartAt(const io::Tokenizer::Token & token)341 void Parser::LocationRecorder::StartAt(const io::Tokenizer::Token& token) {
342 location_->set_span(0, token.line);
343 location_->set_span(1, token.column);
344 }
345
StartAt(const LocationRecorder & other)346 void Parser::LocationRecorder::StartAt(const LocationRecorder& other) {
347 location_->set_span(0, other.location_->span(0));
348 location_->set_span(1, other.location_->span(1));
349 }
350
EndAt(const io::Tokenizer::Token & token)351 void Parser::LocationRecorder::EndAt(const io::Tokenizer::Token& token) {
352 if (token.line != location_->span(0)) {
353 location_->add_span(token.line);
354 }
355 location_->add_span(token.end_column);
356 }
357
RecordLegacyLocation(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location)358 void Parser::LocationRecorder::RecordLegacyLocation(const Message* descriptor,
359 DescriptorPool::ErrorCollector::ErrorLocation location) {
360 if (parser_->source_location_table_ != NULL) {
361 parser_->source_location_table_->Add(
362 descriptor, location, location_->span(0), location_->span(1));
363 }
364 }
365
AttachComments(string * leading,string * trailing) const366 void Parser::LocationRecorder::AttachComments(
367 string* leading, string* trailing) const {
368 GOOGLE_CHECK(!location_->has_leading_comments());
369 GOOGLE_CHECK(!location_->has_trailing_comments());
370
371 if (!leading->empty()) {
372 location_->mutable_leading_comments()->swap(*leading);
373 }
374 if (!trailing->empty()) {
375 location_->mutable_trailing_comments()->swap(*trailing);
376 }
377 }
378
379 // -------------------------------------------------------------------
380
SkipStatement()381 void Parser::SkipStatement() {
382 while (true) {
383 if (AtEnd()) {
384 return;
385 } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
386 if (TryConsumeEndOfDeclaration(";", NULL)) {
387 return;
388 } else if (TryConsume("{")) {
389 SkipRestOfBlock();
390 return;
391 } else if (LookingAt("}")) {
392 return;
393 }
394 }
395 input_->Next();
396 }
397 }
398
SkipRestOfBlock()399 void Parser::SkipRestOfBlock() {
400 while (true) {
401 if (AtEnd()) {
402 return;
403 } else if (LookingAtType(io::Tokenizer::TYPE_SYMBOL)) {
404 if (TryConsumeEndOfDeclaration("}", NULL)) {
405 return;
406 } else if (TryConsume("{")) {
407 SkipRestOfBlock();
408 }
409 }
410 input_->Next();
411 }
412 }
413
414 // ===================================================================
415
Parse(io::Tokenizer * input,FileDescriptorProto * file)416 bool Parser::Parse(io::Tokenizer* input, FileDescriptorProto* file) {
417 input_ = input;
418 had_errors_ = false;
419 syntax_identifier_.clear();
420
421 // Note that |file| could be NULL at this point if
422 // stop_after_syntax_identifier_ is true. So, we conservatively allocate
423 // SourceCodeInfo on the stack, then swap it into the FileDescriptorProto
424 // later on.
425 SourceCodeInfo source_code_info;
426 source_code_info_ = &source_code_info;
427
428 if (LookingAtType(io::Tokenizer::TYPE_START)) {
429 // Advance to first token.
430 input_->NextWithComments(NULL, NULL, &upcoming_doc_comments_);
431 }
432
433 {
434 LocationRecorder root_location(this);
435
436 if (require_syntax_identifier_ || LookingAt("syntax")) {
437 if (!ParseSyntaxIdentifier()) {
438 // Don't attempt to parse the file if we didn't recognize the syntax
439 // identifier.
440 return false;
441 }
442 } else if (!stop_after_syntax_identifier_) {
443 syntax_identifier_ = "proto2";
444 }
445
446 if (stop_after_syntax_identifier_) return !had_errors_;
447
448 // Repeatedly parse statements until we reach the end of the file.
449 while (!AtEnd()) {
450 if (!ParseTopLevelStatement(file, root_location)) {
451 // This statement failed to parse. Skip it, but keep looping to parse
452 // other statements.
453 SkipStatement();
454
455 if (LookingAt("}")) {
456 AddError("Unmatched \"}\".");
457 input_->NextWithComments(NULL, NULL, &upcoming_doc_comments_);
458 }
459 }
460 }
461 }
462
463 input_ = NULL;
464 source_code_info_ = NULL;
465 source_code_info.Swap(file->mutable_source_code_info());
466 return !had_errors_;
467 }
468
ParseSyntaxIdentifier()469 bool Parser::ParseSyntaxIdentifier() {
470 DO(Consume("syntax", "File must begin with 'syntax = \"proto2\";'."));
471 DO(Consume("="));
472 io::Tokenizer::Token syntax_token = input_->current();
473 string syntax;
474 DO(ConsumeString(&syntax, "Expected syntax identifier."));
475 DO(ConsumeEndOfDeclaration(";", NULL));
476
477 syntax_identifier_ = syntax;
478
479 if (syntax != "proto2" && !stop_after_syntax_identifier_) {
480 AddError(syntax_token.line, syntax_token.column,
481 "Unrecognized syntax identifier \"" + syntax + "\". This parser "
482 "only recognizes \"proto2\".");
483 return false;
484 }
485
486 return true;
487 }
488
ParseTopLevelStatement(FileDescriptorProto * file,const LocationRecorder & root_location)489 bool Parser::ParseTopLevelStatement(FileDescriptorProto* file,
490 const LocationRecorder& root_location) {
491 if (TryConsumeEndOfDeclaration(";", NULL)) {
492 // empty statement; ignore
493 return true;
494 } else if (LookingAt("message")) {
495 LocationRecorder location(root_location,
496 FileDescriptorProto::kMessageTypeFieldNumber, file->message_type_size());
497 return ParseMessageDefinition(file->add_message_type(), location, file);
498 } else if (LookingAt("enum")) {
499 LocationRecorder location(root_location,
500 FileDescriptorProto::kEnumTypeFieldNumber, file->enum_type_size());
501 return ParseEnumDefinition(file->add_enum_type(), location, file);
502 } else if (LookingAt("service")) {
503 LocationRecorder location(root_location,
504 FileDescriptorProto::kServiceFieldNumber, file->service_size());
505 return ParseServiceDefinition(file->add_service(), location, file);
506 } else if (LookingAt("extend")) {
507 LocationRecorder location(root_location,
508 FileDescriptorProto::kExtensionFieldNumber);
509 return ParseExtend(file->mutable_extension(),
510 file->mutable_message_type(),
511 root_location,
512 FileDescriptorProto::kMessageTypeFieldNumber,
513 location, file);
514 } else if (LookingAt("import")) {
515 return ParseImport(file->mutable_dependency(),
516 file->mutable_public_dependency(),
517 file->mutable_weak_dependency(),
518 root_location, file);
519 } else if (LookingAt("package")) {
520 return ParsePackage(file, root_location, file);
521 } else if (LookingAt("option")) {
522 LocationRecorder location(root_location,
523 FileDescriptorProto::kOptionsFieldNumber);
524 return ParseOption(file->mutable_options(), location, file,
525 OPTION_STATEMENT);
526 } else {
527 AddError("Expected top-level statement (e.g. \"message\").");
528 return false;
529 }
530 }
531
532 // -------------------------------------------------------------------
533 // Messages
534
ParseMessageDefinition(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)535 bool Parser::ParseMessageDefinition(
536 DescriptorProto* message,
537 const LocationRecorder& message_location,
538 const FileDescriptorProto* containing_file) {
539 DO(Consume("message"));
540 {
541 LocationRecorder location(message_location,
542 DescriptorProto::kNameFieldNumber);
543 location.RecordLegacyLocation(
544 message, DescriptorPool::ErrorCollector::NAME);
545 DO(ConsumeIdentifier(message->mutable_name(), "Expected message name."));
546 }
547 DO(ParseMessageBlock(message, message_location, containing_file));
548 return true;
549 }
550
551 namespace {
552
553 const int kMaxExtensionRangeSentinel = -1;
554
IsMessageSetWireFormatMessage(const DescriptorProto & message)555 bool IsMessageSetWireFormatMessage(const DescriptorProto& message) {
556 const MessageOptions& options = message.options();
557 for (int i = 0; i < options.uninterpreted_option_size(); ++i) {
558 const UninterpretedOption& uninterpreted = options.uninterpreted_option(i);
559 if (uninterpreted.name_size() == 1 &&
560 uninterpreted.name(0).name_part() == "message_set_wire_format" &&
561 uninterpreted.identifier_value() == "true") {
562 return true;
563 }
564 }
565 return false;
566 }
567
568 // Modifies any extension ranges that specified 'max' as the end of the
569 // extension range, and sets them to the type-specific maximum. The actual max
570 // tag number can only be determined after all options have been parsed.
AdjustExtensionRangesWithMaxEndNumber(DescriptorProto * message)571 void AdjustExtensionRangesWithMaxEndNumber(DescriptorProto* message) {
572 const bool is_message_set = IsMessageSetWireFormatMessage(*message);
573 const int max_extension_number = is_message_set ?
574 kint32max :
575 FieldDescriptor::kMaxNumber + 1;
576 for (int i = 0; i < message->extension_range_size(); ++i) {
577 if (message->extension_range(i).end() == kMaxExtensionRangeSentinel) {
578 message->mutable_extension_range(i)->set_end(max_extension_number);
579 }
580 }
581 }
582
583 } // namespace
584
ParseMessageBlock(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)585 bool Parser::ParseMessageBlock(DescriptorProto* message,
586 const LocationRecorder& message_location,
587 const FileDescriptorProto* containing_file) {
588 DO(ConsumeEndOfDeclaration("{", &message_location));
589
590 while (!TryConsumeEndOfDeclaration("}", NULL)) {
591 if (AtEnd()) {
592 AddError("Reached end of input in message definition (missing '}').");
593 return false;
594 }
595
596 if (!ParseMessageStatement(message, message_location, containing_file)) {
597 // This statement failed to parse. Skip it, but keep looping to parse
598 // other statements.
599 SkipStatement();
600 }
601 }
602
603 if (message->extension_range_size() > 0) {
604 AdjustExtensionRangesWithMaxEndNumber(message);
605 }
606 return true;
607 }
608
ParseMessageStatement(DescriptorProto * message,const LocationRecorder & message_location,const FileDescriptorProto * containing_file)609 bool Parser::ParseMessageStatement(DescriptorProto* message,
610 const LocationRecorder& message_location,
611 const FileDescriptorProto* containing_file) {
612 if (TryConsumeEndOfDeclaration(";", NULL)) {
613 // empty statement; ignore
614 return true;
615 } else if (LookingAt("message")) {
616 LocationRecorder location(message_location,
617 DescriptorProto::kNestedTypeFieldNumber,
618 message->nested_type_size());
619 return ParseMessageDefinition(message->add_nested_type(), location,
620 containing_file);
621 } else if (LookingAt("enum")) {
622 LocationRecorder location(message_location,
623 DescriptorProto::kEnumTypeFieldNumber,
624 message->enum_type_size());
625 return ParseEnumDefinition(message->add_enum_type(), location,
626 containing_file);
627 } else if (LookingAt("extensions")) {
628 LocationRecorder location(message_location,
629 DescriptorProto::kExtensionRangeFieldNumber);
630 return ParseExtensions(message, location, containing_file);
631 } else if (LookingAt("extend")) {
632 LocationRecorder location(message_location,
633 DescriptorProto::kExtensionFieldNumber);
634 return ParseExtend(message->mutable_extension(),
635 message->mutable_nested_type(),
636 message_location,
637 DescriptorProto::kNestedTypeFieldNumber,
638 location, containing_file);
639 } else if (LookingAt("option")) {
640 LocationRecorder location(message_location,
641 DescriptorProto::kOptionsFieldNumber);
642 return ParseOption(message->mutable_options(), location,
643 containing_file, OPTION_STATEMENT);
644 } else if (LookingAt("oneof")) {
645 int oneof_index = message->oneof_decl_size();
646 LocationRecorder oneof_location(message_location,
647 DescriptorProto::kOneofDeclFieldNumber,
648 oneof_index);
649
650 return ParseOneof(message->add_oneof_decl(), message,
651 oneof_index, oneof_location, message_location,
652 containing_file);
653 } else {
654 LocationRecorder location(message_location,
655 DescriptorProto::kFieldFieldNumber,
656 message->field_size());
657 return ParseMessageField(message->add_field(),
658 message->mutable_nested_type(),
659 message_location,
660 DescriptorProto::kNestedTypeFieldNumber,
661 location,
662 containing_file);
663 }
664 }
665
ParseMessageField(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)666 bool Parser::ParseMessageField(FieldDescriptorProto* field,
667 RepeatedPtrField<DescriptorProto>* messages,
668 const LocationRecorder& parent_location,
669 int location_field_number_for_nested_type,
670 const LocationRecorder& field_location,
671 const FileDescriptorProto* containing_file) {
672 {
673 LocationRecorder location(field_location,
674 FieldDescriptorProto::kLabelFieldNumber);
675 FieldDescriptorProto::Label label;
676 DO(ParseLabel(&label, containing_file));
677 field->set_label(label);
678 }
679
680 return ParseMessageFieldNoLabel(field, messages, parent_location,
681 location_field_number_for_nested_type,
682 field_location,
683 containing_file);
684 }
685
ParseMessageFieldNoLabel(FieldDescriptorProto * field,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)686 bool Parser::ParseMessageFieldNoLabel(
687 FieldDescriptorProto* field,
688 RepeatedPtrField<DescriptorProto>* messages,
689 const LocationRecorder& parent_location,
690 int location_field_number_for_nested_type,
691 const LocationRecorder& field_location,
692 const FileDescriptorProto* containing_file) {
693 // Parse type.
694 {
695 LocationRecorder location(field_location); // add path later
696 location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::TYPE);
697
698 FieldDescriptorProto::Type type = FieldDescriptorProto::TYPE_INT32;
699 string type_name;
700 DO(ParseType(&type, &type_name));
701 if (type_name.empty()) {
702 location.AddPath(FieldDescriptorProto::kTypeFieldNumber);
703 field->set_type(type);
704 } else {
705 location.AddPath(FieldDescriptorProto::kTypeNameFieldNumber);
706 field->set_type_name(type_name);
707 }
708 }
709
710 // Parse name and '='.
711 io::Tokenizer::Token name_token = input_->current();
712 {
713 LocationRecorder location(field_location,
714 FieldDescriptorProto::kNameFieldNumber);
715 location.RecordLegacyLocation(field, DescriptorPool::ErrorCollector::NAME);
716 DO(ConsumeIdentifier(field->mutable_name(), "Expected field name."));
717 }
718 DO(Consume("=", "Missing field number."));
719
720 // Parse field number.
721 {
722 LocationRecorder location(field_location,
723 FieldDescriptorProto::kNumberFieldNumber);
724 location.RecordLegacyLocation(
725 field, DescriptorPool::ErrorCollector::NUMBER);
726 int number;
727 DO(ConsumeInteger(&number, "Expected field number."));
728 field->set_number(number);
729 }
730
731 // Parse options.
732 DO(ParseFieldOptions(field, field_location, containing_file));
733
734 // Deal with groups.
735 if (field->has_type() && field->type() == FieldDescriptorProto::TYPE_GROUP) {
736 // Awkward: Since a group declares both a message type and a field, we
737 // have to create overlapping locations.
738 LocationRecorder group_location(parent_location);
739 group_location.StartAt(field_location);
740 group_location.AddPath(location_field_number_for_nested_type);
741 group_location.AddPath(messages->size());
742
743 DescriptorProto* group = messages->Add();
744 group->set_name(field->name());
745
746 // Record name location to match the field name's location.
747 {
748 LocationRecorder location(group_location,
749 DescriptorProto::kNameFieldNumber);
750 location.StartAt(name_token);
751 location.EndAt(name_token);
752 location.RecordLegacyLocation(
753 group, DescriptorPool::ErrorCollector::NAME);
754 }
755
756 // The field's type_name also comes from the name. Confusing!
757 {
758 LocationRecorder location(field_location,
759 FieldDescriptorProto::kTypeNameFieldNumber);
760 location.StartAt(name_token);
761 location.EndAt(name_token);
762 }
763
764 // As a hack for backwards-compatibility, we force the group name to start
765 // with a capital letter and lower-case the field name. New code should
766 // not use groups; it should use nested messages.
767 if (group->name()[0] < 'A' || 'Z' < group->name()[0]) {
768 AddError(name_token.line, name_token.column,
769 "Group names must start with a capital letter.");
770 }
771 LowerString(field->mutable_name());
772
773 field->set_type_name(group->name());
774 if (LookingAt("{")) {
775 DO(ParseMessageBlock(group, group_location, containing_file));
776 } else {
777 AddError("Missing group body.");
778 return false;
779 }
780 } else {
781 DO(ConsumeEndOfDeclaration(";", &field_location));
782 }
783
784 return true;
785 }
786
ParseFieldOptions(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)787 bool Parser::ParseFieldOptions(FieldDescriptorProto* field,
788 const LocationRecorder& field_location,
789 const FileDescriptorProto* containing_file) {
790 if (!LookingAt("[")) return true;
791
792 LocationRecorder location(field_location,
793 FieldDescriptorProto::kOptionsFieldNumber);
794
795 DO(Consume("["));
796
797 // Parse field options.
798 do {
799 if (LookingAt("default")) {
800 // We intentionally pass field_location rather than location here, since
801 // the default value is not actually an option.
802 DO(ParseDefaultAssignment(field, field_location, containing_file));
803 } else {
804 DO(ParseOption(field->mutable_options(), location,
805 containing_file, OPTION_ASSIGNMENT));
806 }
807 } while (TryConsume(","));
808
809 DO(Consume("]"));
810 return true;
811 }
812
ParseDefaultAssignment(FieldDescriptorProto * field,const LocationRecorder & field_location,const FileDescriptorProto * containing_file)813 bool Parser::ParseDefaultAssignment(
814 FieldDescriptorProto* field,
815 const LocationRecorder& field_location,
816 const FileDescriptorProto* containing_file) {
817 if (field->has_default_value()) {
818 AddError("Already set option \"default\".");
819 field->clear_default_value();
820 }
821
822 DO(Consume("default"));
823 DO(Consume("="));
824
825 LocationRecorder location(field_location,
826 FieldDescriptorProto::kDefaultValueFieldNumber);
827 location.RecordLegacyLocation(
828 field, DescriptorPool::ErrorCollector::DEFAULT_VALUE);
829 string* default_value = field->mutable_default_value();
830
831 if (!field->has_type()) {
832 // The field has a type name, but we don't know if it is a message or an
833 // enum yet. (If it were a primitive type, |field| would have a type set
834 // already.) In this case, simply take the current string as the default
835 // value; we will catch the error later if it is not a valid enum value.
836 // (N.B. that we do not check whether the current token is an identifier:
837 // doing so throws strange errors when the user mistypes a primitive
838 // typename and we assume it's an enum. E.g.: "optional int foo = 1 [default
839 // = 42]". In such a case the fundamental error is really that "int" is not
840 // a type, not that "42" is not an identifier. See b/12533582.)
841 *default_value = input_->current().text;
842 input_->Next();
843 return true;
844 }
845
846 switch (field->type()) {
847 case FieldDescriptorProto::TYPE_INT32:
848 case FieldDescriptorProto::TYPE_INT64:
849 case FieldDescriptorProto::TYPE_SINT32:
850 case FieldDescriptorProto::TYPE_SINT64:
851 case FieldDescriptorProto::TYPE_SFIXED32:
852 case FieldDescriptorProto::TYPE_SFIXED64: {
853 uint64 max_value = kint64max;
854 if (field->type() == FieldDescriptorProto::TYPE_INT32 ||
855 field->type() == FieldDescriptorProto::TYPE_SINT32 ||
856 field->type() == FieldDescriptorProto::TYPE_SFIXED32) {
857 max_value = kint32max;
858 }
859
860 // These types can be negative.
861 if (TryConsume("-")) {
862 default_value->append("-");
863 // Two's complement always has one more negative value than positive.
864 ++max_value;
865 }
866 // Parse the integer to verify that it is not out-of-range.
867 uint64 value;
868 DO(ConsumeInteger64(max_value, &value,
869 "Expected integer for field default value."));
870 // And stringify it again.
871 default_value->append(SimpleItoa(value));
872 break;
873 }
874
875 case FieldDescriptorProto::TYPE_UINT32:
876 case FieldDescriptorProto::TYPE_UINT64:
877 case FieldDescriptorProto::TYPE_FIXED32:
878 case FieldDescriptorProto::TYPE_FIXED64: {
879 uint64 max_value = kuint64max;
880 if (field->type() == FieldDescriptorProto::TYPE_UINT32 ||
881 field->type() == FieldDescriptorProto::TYPE_FIXED32) {
882 max_value = kuint32max;
883 }
884
885 // Numeric, not negative.
886 if (TryConsume("-")) {
887 AddError("Unsigned field can't have negative default value.");
888 }
889 // Parse the integer to verify that it is not out-of-range.
890 uint64 value;
891 DO(ConsumeInteger64(max_value, &value,
892 "Expected integer for field default value."));
893 // And stringify it again.
894 default_value->append(SimpleItoa(value));
895 break;
896 }
897
898 case FieldDescriptorProto::TYPE_FLOAT:
899 case FieldDescriptorProto::TYPE_DOUBLE:
900 // These types can be negative.
901 if (TryConsume("-")) {
902 default_value->append("-");
903 }
904 // Parse the integer because we have to convert hex integers to decimal
905 // floats.
906 double value;
907 DO(ConsumeNumber(&value, "Expected number."));
908 // And stringify it again.
909 default_value->append(SimpleDtoa(value));
910 break;
911
912 case FieldDescriptorProto::TYPE_BOOL:
913 if (TryConsume("true")) {
914 default_value->assign("true");
915 } else if (TryConsume("false")) {
916 default_value->assign("false");
917 } else {
918 AddError("Expected \"true\" or \"false\".");
919 return false;
920 }
921 break;
922
923 case FieldDescriptorProto::TYPE_STRING:
924 // Note: When file opton java_string_check_utf8 is true, if a
925 // non-string representation (eg byte[]) is later supported, it must
926 // be checked for UTF-8-ness.
927 DO(ConsumeString(default_value, "Expected string for field default "
928 "value."));
929 break;
930
931 case FieldDescriptorProto::TYPE_BYTES:
932 DO(ConsumeString(default_value, "Expected string."));
933 *default_value = CEscape(*default_value);
934 break;
935
936 case FieldDescriptorProto::TYPE_ENUM:
937 DO(ConsumeIdentifier(default_value, "Expected enum identifier for field "
938 "default value."));
939 break;
940
941 case FieldDescriptorProto::TYPE_MESSAGE:
942 case FieldDescriptorProto::TYPE_GROUP:
943 AddError("Messages can't have default values.");
944 return false;
945 }
946
947 return true;
948 }
949
ParseOptionNamePart(UninterpretedOption * uninterpreted_option,const LocationRecorder & part_location,const FileDescriptorProto * containing_file)950 bool Parser::ParseOptionNamePart(UninterpretedOption* uninterpreted_option,
951 const LocationRecorder& part_location,
952 const FileDescriptorProto* containing_file) {
953 UninterpretedOption::NamePart* name = uninterpreted_option->add_name();
954 string identifier; // We parse identifiers into this string.
955 if (LookingAt("(")) { // This is an extension.
956 DO(Consume("("));
957
958 {
959 LocationRecorder location(
960 part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
961 // An extension name consists of dot-separated identifiers, and may begin
962 // with a dot.
963 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
964 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
965 name->mutable_name_part()->append(identifier);
966 }
967 while (LookingAt(".")) {
968 DO(Consume("."));
969 name->mutable_name_part()->append(".");
970 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
971 name->mutable_name_part()->append(identifier);
972 }
973 }
974
975 DO(Consume(")"));
976 name->set_is_extension(true);
977 } else { // This is a regular field.
978 LocationRecorder location(
979 part_location, UninterpretedOption::NamePart::kNamePartFieldNumber);
980 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
981 name->mutable_name_part()->append(identifier);
982 name->set_is_extension(false);
983 }
984 return true;
985 }
986
ParseUninterpretedBlock(string * value)987 bool Parser::ParseUninterpretedBlock(string* value) {
988 // Note that enclosing braces are not added to *value.
989 // We do NOT use ConsumeEndOfStatement for this brace because it's delimiting
990 // an expression, not a block of statements.
991 DO(Consume("{"));
992 int brace_depth = 1;
993 while (!AtEnd()) {
994 if (LookingAt("{")) {
995 brace_depth++;
996 } else if (LookingAt("}")) {
997 brace_depth--;
998 if (brace_depth == 0) {
999 input_->Next();
1000 return true;
1001 }
1002 }
1003 // TODO(sanjay): Interpret line/column numbers to preserve formatting
1004 if (!value->empty()) value->push_back(' ');
1005 value->append(input_->current().text);
1006 input_->Next();
1007 }
1008 AddError("Unexpected end of stream while parsing aggregate value.");
1009 return false;
1010 }
1011
1012 // We don't interpret the option here. Instead we store it in an
1013 // UninterpretedOption, to be interpreted later.
ParseOption(Message * options,const LocationRecorder & options_location,const FileDescriptorProto * containing_file,OptionStyle style)1014 bool Parser::ParseOption(Message* options,
1015 const LocationRecorder& options_location,
1016 const FileDescriptorProto* containing_file,
1017 OptionStyle style) {
1018 // Create an entry in the uninterpreted_option field.
1019 const FieldDescriptor* uninterpreted_option_field = options->GetDescriptor()->
1020 FindFieldByName("uninterpreted_option");
1021 GOOGLE_CHECK(uninterpreted_option_field != NULL)
1022 << "No field named \"uninterpreted_option\" in the Options proto.";
1023
1024 const Reflection* reflection = options->GetReflection();
1025
1026 LocationRecorder location(
1027 options_location, uninterpreted_option_field->number(),
1028 reflection->FieldSize(*options, uninterpreted_option_field));
1029
1030 if (style == OPTION_STATEMENT) {
1031 DO(Consume("option"));
1032 }
1033
1034 UninterpretedOption* uninterpreted_option = down_cast<UninterpretedOption*>(
1035 options->GetReflection()->AddMessage(options,
1036 uninterpreted_option_field));
1037
1038 // Parse dot-separated name.
1039 {
1040 LocationRecorder name_location(location,
1041 UninterpretedOption::kNameFieldNumber);
1042 name_location.RecordLegacyLocation(
1043 uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_NAME);
1044
1045 {
1046 LocationRecorder part_location(name_location,
1047 uninterpreted_option->name_size());
1048 DO(ParseOptionNamePart(uninterpreted_option, part_location,
1049 containing_file));
1050 }
1051
1052 while (LookingAt(".")) {
1053 DO(Consume("."));
1054 LocationRecorder part_location(name_location,
1055 uninterpreted_option->name_size());
1056 DO(ParseOptionNamePart(uninterpreted_option, part_location,
1057 containing_file));
1058 }
1059 }
1060
1061 DO(Consume("="));
1062
1063 {
1064 LocationRecorder value_location(location);
1065 value_location.RecordLegacyLocation(
1066 uninterpreted_option, DescriptorPool::ErrorCollector::OPTION_VALUE);
1067
1068 // All values are a single token, except for negative numbers, which consist
1069 // of a single '-' symbol, followed by a positive number.
1070 bool is_negative = TryConsume("-");
1071
1072 switch (input_->current().type) {
1073 case io::Tokenizer::TYPE_START:
1074 GOOGLE_LOG(FATAL) << "Trying to read value before any tokens have been read.";
1075 return false;
1076
1077 case io::Tokenizer::TYPE_END:
1078 AddError("Unexpected end of stream while parsing option value.");
1079 return false;
1080
1081 case io::Tokenizer::TYPE_IDENTIFIER: {
1082 value_location.AddPath(
1083 UninterpretedOption::kIdentifierValueFieldNumber);
1084 if (is_negative) {
1085 AddError("Invalid '-' symbol before identifier.");
1086 return false;
1087 }
1088 string value;
1089 DO(ConsumeIdentifier(&value, "Expected identifier."));
1090 uninterpreted_option->set_identifier_value(value);
1091 break;
1092 }
1093
1094 case io::Tokenizer::TYPE_INTEGER: {
1095 uint64 value;
1096 uint64 max_value =
1097 is_negative ? static_cast<uint64>(kint64max) + 1 : kuint64max;
1098 DO(ConsumeInteger64(max_value, &value, "Expected integer."));
1099 if (is_negative) {
1100 value_location.AddPath(
1101 UninterpretedOption::kNegativeIntValueFieldNumber);
1102 uninterpreted_option->set_negative_int_value(
1103 -static_cast<int64>(value));
1104 } else {
1105 value_location.AddPath(
1106 UninterpretedOption::kPositiveIntValueFieldNumber);
1107 uninterpreted_option->set_positive_int_value(value);
1108 }
1109 break;
1110 }
1111
1112 case io::Tokenizer::TYPE_FLOAT: {
1113 value_location.AddPath(UninterpretedOption::kDoubleValueFieldNumber);
1114 double value;
1115 DO(ConsumeNumber(&value, "Expected number."));
1116 uninterpreted_option->set_double_value(is_negative ? -value : value);
1117 break;
1118 }
1119
1120 case io::Tokenizer::TYPE_STRING: {
1121 value_location.AddPath(UninterpretedOption::kStringValueFieldNumber);
1122 if (is_negative) {
1123 AddError("Invalid '-' symbol before string.");
1124 return false;
1125 }
1126 string value;
1127 DO(ConsumeString(&value, "Expected string."));
1128 uninterpreted_option->set_string_value(value);
1129 break;
1130 }
1131
1132 case io::Tokenizer::TYPE_SYMBOL:
1133 if (LookingAt("{")) {
1134 value_location.AddPath(
1135 UninterpretedOption::kAggregateValueFieldNumber);
1136 DO(ParseUninterpretedBlock(
1137 uninterpreted_option->mutable_aggregate_value()));
1138 } else {
1139 AddError("Expected option value.");
1140 return false;
1141 }
1142 break;
1143 }
1144 }
1145
1146 if (style == OPTION_STATEMENT) {
1147 DO(ConsumeEndOfDeclaration(";", &location));
1148 }
1149
1150
1151 return true;
1152 }
1153
ParseExtensions(DescriptorProto * message,const LocationRecorder & extensions_location,const FileDescriptorProto * containing_file)1154 bool Parser::ParseExtensions(DescriptorProto* message,
1155 const LocationRecorder& extensions_location,
1156 const FileDescriptorProto* containing_file) {
1157 // Parse the declaration.
1158 DO(Consume("extensions"));
1159
1160 do {
1161 // Note that kExtensionRangeFieldNumber was already pushed by the parent.
1162 LocationRecorder location(extensions_location,
1163 message->extension_range_size());
1164
1165 DescriptorProto::ExtensionRange* range = message->add_extension_range();
1166 location.RecordLegacyLocation(
1167 range, DescriptorPool::ErrorCollector::NUMBER);
1168
1169 int start, end;
1170 io::Tokenizer::Token start_token;
1171
1172 {
1173 LocationRecorder start_location(
1174 location, DescriptorProto::ExtensionRange::kStartFieldNumber);
1175 start_token = input_->current();
1176 DO(ConsumeInteger(&start, "Expected field number range."));
1177 }
1178
1179 if (TryConsume("to")) {
1180 LocationRecorder end_location(
1181 location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1182 if (TryConsume("max")) {
1183 // Set to the sentinel value - 1 since we increment the value below.
1184 // The actual value of the end of the range should be set with
1185 // AdjustExtensionRangesWithMaxEndNumber.
1186 end = kMaxExtensionRangeSentinel - 1;
1187 } else {
1188 DO(ConsumeInteger(&end, "Expected integer."));
1189 }
1190 } else {
1191 LocationRecorder end_location(
1192 location, DescriptorProto::ExtensionRange::kEndFieldNumber);
1193 end_location.StartAt(start_token);
1194 end_location.EndAt(start_token);
1195 end = start;
1196 }
1197
1198 // Users like to specify inclusive ranges, but in code we like the end
1199 // number to be exclusive.
1200 ++end;
1201
1202 range->set_start(start);
1203 range->set_end(end);
1204 } while (TryConsume(","));
1205
1206 DO(ConsumeEndOfDeclaration(";", &extensions_location));
1207 return true;
1208 }
1209
ParseExtend(RepeatedPtrField<FieldDescriptorProto> * extensions,RepeatedPtrField<DescriptorProto> * messages,const LocationRecorder & parent_location,int location_field_number_for_nested_type,const LocationRecorder & extend_location,const FileDescriptorProto * containing_file)1210 bool Parser::ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
1211 RepeatedPtrField<DescriptorProto>* messages,
1212 const LocationRecorder& parent_location,
1213 int location_field_number_for_nested_type,
1214 const LocationRecorder& extend_location,
1215 const FileDescriptorProto* containing_file) {
1216 DO(Consume("extend"));
1217
1218 // Parse the extendee type.
1219 io::Tokenizer::Token extendee_start = input_->current();
1220 string extendee;
1221 DO(ParseUserDefinedType(&extendee));
1222 io::Tokenizer::Token extendee_end = input_->previous();
1223
1224 // Parse the block.
1225 DO(ConsumeEndOfDeclaration("{", &extend_location));
1226
1227 bool is_first = true;
1228
1229 do {
1230 if (AtEnd()) {
1231 AddError("Reached end of input in extend definition (missing '}').");
1232 return false;
1233 }
1234
1235 // Note that kExtensionFieldNumber was already pushed by the parent.
1236 LocationRecorder location(extend_location, extensions->size());
1237
1238 FieldDescriptorProto* field = extensions->Add();
1239
1240 {
1241 LocationRecorder extendee_location(
1242 location, FieldDescriptorProto::kExtendeeFieldNumber);
1243 extendee_location.StartAt(extendee_start);
1244 extendee_location.EndAt(extendee_end);
1245
1246 if (is_first) {
1247 extendee_location.RecordLegacyLocation(
1248 field, DescriptorPool::ErrorCollector::EXTENDEE);
1249 is_first = false;
1250 }
1251 }
1252
1253 field->set_extendee(extendee);
1254
1255 if (!ParseMessageField(field, messages, parent_location,
1256 location_field_number_for_nested_type,
1257 location,
1258 containing_file)) {
1259 // This statement failed to parse. Skip it, but keep looping to parse
1260 // other statements.
1261 SkipStatement();
1262 }
1263 } while (!TryConsumeEndOfDeclaration("}", NULL));
1264
1265 return true;
1266 }
1267
ParseOneof(OneofDescriptorProto * oneof_decl,DescriptorProto * containing_type,int oneof_index,const LocationRecorder & oneof_location,const LocationRecorder & containing_type_location,const FileDescriptorProto * containing_file)1268 bool Parser::ParseOneof(OneofDescriptorProto* oneof_decl,
1269 DescriptorProto* containing_type,
1270 int oneof_index,
1271 const LocationRecorder& oneof_location,
1272 const LocationRecorder& containing_type_location,
1273 const FileDescriptorProto* containing_file) {
1274 DO(Consume("oneof"));
1275
1276 {
1277 LocationRecorder name_location(oneof_location,
1278 OneofDescriptorProto::kNameFieldNumber);
1279 DO(ConsumeIdentifier(oneof_decl->mutable_name(), "Expected oneof name."));
1280 }
1281
1282 DO(ConsumeEndOfDeclaration("{", &oneof_location));
1283
1284 do {
1285 if (AtEnd()) {
1286 AddError("Reached end of input in oneof definition (missing '}').");
1287 return false;
1288 }
1289
1290 // Print a nice error if the user accidentally tries to place a label
1291 // on an individual member of a oneof.
1292 if (LookingAt("required") ||
1293 LookingAt("optional") ||
1294 LookingAt("repeated")) {
1295 AddError("Fields in oneofs must not have labels (required / optional "
1296 "/ repeated).");
1297 // We can continue parsing here because we understand what the user
1298 // meant. The error report will still make parsing fail overall.
1299 input_->Next();
1300 }
1301
1302 LocationRecorder field_location(containing_type_location,
1303 DescriptorProto::kFieldFieldNumber,
1304 containing_type->field_size());
1305
1306 FieldDescriptorProto* field = containing_type->add_field();
1307 field->set_label(FieldDescriptorProto::LABEL_OPTIONAL);
1308 field->set_oneof_index(oneof_index);
1309
1310 if (!ParseMessageFieldNoLabel(field,
1311 containing_type->mutable_nested_type(),
1312 containing_type_location,
1313 DescriptorProto::kNestedTypeFieldNumber,
1314 field_location,
1315 containing_file)) {
1316 // This statement failed to parse. Skip it, but keep looping to parse
1317 // other statements.
1318 SkipStatement();
1319 }
1320 } while (!TryConsumeEndOfDeclaration("}", NULL));
1321
1322 return true;
1323 }
1324
1325 // -------------------------------------------------------------------
1326 // Enums
1327
ParseEnumDefinition(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1328 bool Parser::ParseEnumDefinition(EnumDescriptorProto* enum_type,
1329 const LocationRecorder& enum_location,
1330 const FileDescriptorProto* containing_file) {
1331 DO(Consume("enum"));
1332
1333 {
1334 LocationRecorder location(enum_location,
1335 EnumDescriptorProto::kNameFieldNumber);
1336 location.RecordLegacyLocation(
1337 enum_type, DescriptorPool::ErrorCollector::NAME);
1338 DO(ConsumeIdentifier(enum_type->mutable_name(), "Expected enum name."));
1339 }
1340
1341 DO(ParseEnumBlock(enum_type, enum_location, containing_file));
1342 return true;
1343 }
1344
ParseEnumBlock(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1345 bool Parser::ParseEnumBlock(EnumDescriptorProto* enum_type,
1346 const LocationRecorder& enum_location,
1347 const FileDescriptorProto* containing_file) {
1348 DO(ConsumeEndOfDeclaration("{", &enum_location));
1349
1350 while (!TryConsumeEndOfDeclaration("}", NULL)) {
1351 if (AtEnd()) {
1352 AddError("Reached end of input in enum definition (missing '}').");
1353 return false;
1354 }
1355
1356 if (!ParseEnumStatement(enum_type, enum_location, containing_file)) {
1357 // This statement failed to parse. Skip it, but keep looping to parse
1358 // other statements.
1359 SkipStatement();
1360 }
1361 }
1362
1363 return true;
1364 }
1365
ParseEnumStatement(EnumDescriptorProto * enum_type,const LocationRecorder & enum_location,const FileDescriptorProto * containing_file)1366 bool Parser::ParseEnumStatement(EnumDescriptorProto* enum_type,
1367 const LocationRecorder& enum_location,
1368 const FileDescriptorProto* containing_file) {
1369 if (TryConsumeEndOfDeclaration(";", NULL)) {
1370 // empty statement; ignore
1371 return true;
1372 } else if (LookingAt("option")) {
1373 LocationRecorder location(enum_location,
1374 EnumDescriptorProto::kOptionsFieldNumber);
1375 return ParseOption(enum_type->mutable_options(), location,
1376 containing_file, OPTION_STATEMENT);
1377 } else {
1378 LocationRecorder location(enum_location,
1379 EnumDescriptorProto::kValueFieldNumber, enum_type->value_size());
1380 return ParseEnumConstant(enum_type->add_value(), location, containing_file);
1381 }
1382 }
1383
ParseEnumConstant(EnumValueDescriptorProto * enum_value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)1384 bool Parser::ParseEnumConstant(EnumValueDescriptorProto* enum_value,
1385 const LocationRecorder& enum_value_location,
1386 const FileDescriptorProto* containing_file) {
1387 // Parse name.
1388 {
1389 LocationRecorder location(enum_value_location,
1390 EnumValueDescriptorProto::kNameFieldNumber);
1391 location.RecordLegacyLocation(
1392 enum_value, DescriptorPool::ErrorCollector::NAME);
1393 DO(ConsumeIdentifier(enum_value->mutable_name(),
1394 "Expected enum constant name."));
1395 }
1396
1397 DO(Consume("=", "Missing numeric value for enum constant."));
1398
1399 // Parse value.
1400 {
1401 LocationRecorder location(
1402 enum_value_location, EnumValueDescriptorProto::kNumberFieldNumber);
1403 location.RecordLegacyLocation(
1404 enum_value, DescriptorPool::ErrorCollector::NUMBER);
1405
1406 int number;
1407 DO(ConsumeSignedInteger(&number, "Expected integer."));
1408 enum_value->set_number(number);
1409 }
1410
1411 DO(ParseEnumConstantOptions(enum_value, enum_value_location,
1412 containing_file));
1413
1414 DO(ConsumeEndOfDeclaration(";", &enum_value_location));
1415
1416 return true;
1417 }
1418
ParseEnumConstantOptions(EnumValueDescriptorProto * value,const LocationRecorder & enum_value_location,const FileDescriptorProto * containing_file)1419 bool Parser::ParseEnumConstantOptions(
1420 EnumValueDescriptorProto* value,
1421 const LocationRecorder& enum_value_location,
1422 const FileDescriptorProto* containing_file) {
1423 if (!LookingAt("[")) return true;
1424
1425 LocationRecorder location(
1426 enum_value_location, EnumValueDescriptorProto::kOptionsFieldNumber);
1427
1428 DO(Consume("["));
1429
1430 do {
1431 DO(ParseOption(value->mutable_options(), location,
1432 containing_file, OPTION_ASSIGNMENT));
1433 } while (TryConsume(","));
1434
1435 DO(Consume("]"));
1436 return true;
1437 }
1438
1439 // -------------------------------------------------------------------
1440 // Services
1441
ParseServiceDefinition(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)1442 bool Parser::ParseServiceDefinition(
1443 ServiceDescriptorProto* service,
1444 const LocationRecorder& service_location,
1445 const FileDescriptorProto* containing_file) {
1446 DO(Consume("service"));
1447
1448 {
1449 LocationRecorder location(service_location,
1450 ServiceDescriptorProto::kNameFieldNumber);
1451 location.RecordLegacyLocation(
1452 service, DescriptorPool::ErrorCollector::NAME);
1453 DO(ConsumeIdentifier(service->mutable_name(), "Expected service name."));
1454 }
1455
1456 DO(ParseServiceBlock(service, service_location, containing_file));
1457 return true;
1458 }
1459
ParseServiceBlock(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)1460 bool Parser::ParseServiceBlock(ServiceDescriptorProto* service,
1461 const LocationRecorder& service_location,
1462 const FileDescriptorProto* containing_file) {
1463 DO(ConsumeEndOfDeclaration("{", &service_location));
1464
1465 while (!TryConsumeEndOfDeclaration("}", NULL)) {
1466 if (AtEnd()) {
1467 AddError("Reached end of input in service definition (missing '}').");
1468 return false;
1469 }
1470
1471 if (!ParseServiceStatement(service, service_location, containing_file)) {
1472 // This statement failed to parse. Skip it, but keep looping to parse
1473 // other statements.
1474 SkipStatement();
1475 }
1476 }
1477
1478 return true;
1479 }
1480
ParseServiceStatement(ServiceDescriptorProto * service,const LocationRecorder & service_location,const FileDescriptorProto * containing_file)1481 bool Parser::ParseServiceStatement(ServiceDescriptorProto* service,
1482 const LocationRecorder& service_location,
1483 const FileDescriptorProto* containing_file) {
1484 if (TryConsumeEndOfDeclaration(";", NULL)) {
1485 // empty statement; ignore
1486 return true;
1487 } else if (LookingAt("option")) {
1488 LocationRecorder location(
1489 service_location, ServiceDescriptorProto::kOptionsFieldNumber);
1490 return ParseOption(service->mutable_options(), location,
1491 containing_file, OPTION_STATEMENT);
1492 } else {
1493 LocationRecorder location(service_location,
1494 ServiceDescriptorProto::kMethodFieldNumber, service->method_size());
1495 return ParseServiceMethod(service->add_method(), location, containing_file);
1496 }
1497 }
1498
ParseServiceMethod(MethodDescriptorProto * method,const LocationRecorder & method_location,const FileDescriptorProto * containing_file)1499 bool Parser::ParseServiceMethod(MethodDescriptorProto* method,
1500 const LocationRecorder& method_location,
1501 const FileDescriptorProto* containing_file) {
1502 DO(Consume("rpc"));
1503
1504 {
1505 LocationRecorder location(method_location,
1506 MethodDescriptorProto::kNameFieldNumber);
1507 location.RecordLegacyLocation(
1508 method, DescriptorPool::ErrorCollector::NAME);
1509 DO(ConsumeIdentifier(method->mutable_name(), "Expected method name."));
1510 }
1511
1512 // Parse input type.
1513 DO(Consume("("));
1514 {
1515 LocationRecorder location(method_location,
1516 MethodDescriptorProto::kInputTypeFieldNumber);
1517 location.RecordLegacyLocation(
1518 method, DescriptorPool::ErrorCollector::INPUT_TYPE);
1519 DO(ParseUserDefinedType(method->mutable_input_type()));
1520 }
1521 DO(Consume(")"));
1522
1523 // Parse output type.
1524 DO(Consume("returns"));
1525 DO(Consume("("));
1526 {
1527 LocationRecorder location(method_location,
1528 MethodDescriptorProto::kOutputTypeFieldNumber);
1529 location.RecordLegacyLocation(
1530 method, DescriptorPool::ErrorCollector::OUTPUT_TYPE);
1531 DO(ParseUserDefinedType(method->mutable_output_type()));
1532 }
1533 DO(Consume(")"));
1534
1535 if (LookingAt("{")) {
1536 // Options!
1537 DO(ParseOptions(method_location,
1538 containing_file,
1539 MethodDescriptorProto::kOptionsFieldNumber,
1540 method->mutable_options()));
1541 } else {
1542 DO(ConsumeEndOfDeclaration(";", &method_location));
1543 }
1544
1545 return true;
1546 }
1547
1548
ParseOptions(const LocationRecorder & parent_location,const FileDescriptorProto * containing_file,const int optionsFieldNumber,Message * mutable_options)1549 bool Parser::ParseOptions(const LocationRecorder& parent_location,
1550 const FileDescriptorProto* containing_file,
1551 const int optionsFieldNumber,
1552 Message* mutable_options) {
1553 // Options!
1554 ConsumeEndOfDeclaration("{", &parent_location);
1555 while (!TryConsumeEndOfDeclaration("}", NULL)) {
1556 if (AtEnd()) {
1557 AddError("Reached end of input in method options (missing '}').");
1558 return false;
1559 }
1560
1561 if (TryConsumeEndOfDeclaration(";", NULL)) {
1562 // empty statement; ignore
1563 } else {
1564 LocationRecorder location(parent_location,
1565 optionsFieldNumber);
1566 if (!ParseOption(mutable_options, location, containing_file,
1567 OPTION_STATEMENT)) {
1568 // This statement failed to parse. Skip it, but keep looping to
1569 // parse other statements.
1570 SkipStatement();
1571 }
1572 }
1573 }
1574
1575 return true;
1576 }
1577
1578 // -------------------------------------------------------------------
1579
ParseLabel(FieldDescriptorProto::Label * label,const FileDescriptorProto * containing_file)1580 bool Parser::ParseLabel(FieldDescriptorProto::Label* label,
1581 const FileDescriptorProto* containing_file) {
1582 if (TryConsume("optional")) {
1583 *label = FieldDescriptorProto::LABEL_OPTIONAL;
1584 return true;
1585 } else if (TryConsume("repeated")) {
1586 *label = FieldDescriptorProto::LABEL_REPEATED;
1587 return true;
1588 } else if (TryConsume("required")) {
1589 *label = FieldDescriptorProto::LABEL_REQUIRED;
1590 return true;
1591 } else {
1592 AddError("Expected \"required\", \"optional\", or \"repeated\".");
1593 // We can actually reasonably recover here by just assuming the user
1594 // forgot the label altogether.
1595 *label = FieldDescriptorProto::LABEL_OPTIONAL;
1596 return true;
1597 }
1598 }
1599
ParseType(FieldDescriptorProto::Type * type,string * type_name)1600 bool Parser::ParseType(FieldDescriptorProto::Type* type,
1601 string* type_name) {
1602 TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
1603 if (iter != kTypeNames.end()) {
1604 *type = iter->second;
1605 input_->Next();
1606 } else {
1607 DO(ParseUserDefinedType(type_name));
1608 }
1609 return true;
1610 }
1611
ParseUserDefinedType(string * type_name)1612 bool Parser::ParseUserDefinedType(string* type_name) {
1613 type_name->clear();
1614
1615 TypeNameMap::const_iterator iter = kTypeNames.find(input_->current().text);
1616 if (iter != kTypeNames.end()) {
1617 // Note: The only place enum types are allowed is for field types, but
1618 // if we are parsing a field type then we would not get here because
1619 // primitives are allowed there as well. So this error message doesn't
1620 // need to account for enums.
1621 AddError("Expected message type.");
1622
1623 // Pretend to accept this type so that we can go on parsing.
1624 *type_name = input_->current().text;
1625 input_->Next();
1626 return true;
1627 }
1628
1629 // A leading "." means the name is fully-qualified.
1630 if (TryConsume(".")) type_name->append(".");
1631
1632 // Consume the first part of the name.
1633 string identifier;
1634 DO(ConsumeIdentifier(&identifier, "Expected type name."));
1635 type_name->append(identifier);
1636
1637 // Consume more parts.
1638 while (TryConsume(".")) {
1639 type_name->append(".");
1640 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1641 type_name->append(identifier);
1642 }
1643
1644 return true;
1645 }
1646
1647 // ===================================================================
1648
ParsePackage(FileDescriptorProto * file,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)1649 bool Parser::ParsePackage(FileDescriptorProto* file,
1650 const LocationRecorder& root_location,
1651 const FileDescriptorProto* containing_file) {
1652 if (file->has_package()) {
1653 AddError("Multiple package definitions.");
1654 // Don't append the new package to the old one. Just replace it. Not
1655 // that it really matters since this is an error anyway.
1656 file->clear_package();
1657 }
1658
1659 DO(Consume("package"));
1660
1661 {
1662 LocationRecorder location(root_location,
1663 FileDescriptorProto::kPackageFieldNumber);
1664 location.RecordLegacyLocation(file, DescriptorPool::ErrorCollector::NAME);
1665
1666 while (true) {
1667 string identifier;
1668 DO(ConsumeIdentifier(&identifier, "Expected identifier."));
1669 file->mutable_package()->append(identifier);
1670 if (!TryConsume(".")) break;
1671 file->mutable_package()->append(".");
1672 }
1673
1674 location.EndAt(input_->previous());
1675
1676 DO(ConsumeEndOfDeclaration(";", &location));
1677 }
1678
1679 return true;
1680 }
1681
ParseImport(RepeatedPtrField<string> * dependency,RepeatedField<int32> * public_dependency,RepeatedField<int32> * weak_dependency,const LocationRecorder & root_location,const FileDescriptorProto * containing_file)1682 bool Parser::ParseImport(RepeatedPtrField<string>* dependency,
1683 RepeatedField<int32>* public_dependency,
1684 RepeatedField<int32>* weak_dependency,
1685 const LocationRecorder& root_location,
1686 const FileDescriptorProto* containing_file) {
1687 DO(Consume("import"));
1688 if (LookingAt("public")) {
1689 LocationRecorder location(
1690 root_location, FileDescriptorProto::kPublicDependencyFieldNumber,
1691 public_dependency->size());
1692 DO(Consume("public"));
1693 *public_dependency->Add() = dependency->size();
1694 } else if (LookingAt("weak")) {
1695 LocationRecorder location(
1696 root_location, FileDescriptorProto::kWeakDependencyFieldNumber,
1697 weak_dependency->size());
1698 DO(Consume("weak"));
1699 *weak_dependency->Add() = dependency->size();
1700 }
1701 {
1702 LocationRecorder location(root_location,
1703 FileDescriptorProto::kDependencyFieldNumber,
1704 dependency->size());
1705 DO(ConsumeString(dependency->Add(),
1706 "Expected a string naming the file to import."));
1707
1708 location.EndAt(input_->previous());
1709
1710 DO(ConsumeEndOfDeclaration(";", &location));
1711 }
1712 return true;
1713 }
1714
1715 // ===================================================================
1716
SourceLocationTable()1717 SourceLocationTable::SourceLocationTable() {}
~SourceLocationTable()1718 SourceLocationTable::~SourceLocationTable() {}
1719
Find(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int * line,int * column) const1720 bool SourceLocationTable::Find(
1721 const Message* descriptor,
1722 DescriptorPool::ErrorCollector::ErrorLocation location,
1723 int* line, int* column) const {
1724 const pair<int, int>* result =
1725 FindOrNull(location_map_, make_pair(descriptor, location));
1726 if (result == NULL) {
1727 *line = -1;
1728 *column = 0;
1729 return false;
1730 } else {
1731 *line = result->first;
1732 *column = result->second;
1733 return true;
1734 }
1735 }
1736
Add(const Message * descriptor,DescriptorPool::ErrorCollector::ErrorLocation location,int line,int column)1737 void SourceLocationTable::Add(
1738 const Message* descriptor,
1739 DescriptorPool::ErrorCollector::ErrorLocation location,
1740 int line, int column) {
1741 location_map_[make_pair(descriptor, location)] = make_pair(line, column);
1742 }
1743
Clear()1744 void SourceLocationTable::Clear() {
1745 location_map_.clear();
1746 }
1747
1748 } // namespace compiler
1749 } // namespace protobuf
1750 } // namespace google
1751