1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 #ifdef _MSC_VER
32 #include <io.h>
33 #else
34 #include <unistd.h>
35 #endif
36 #include <climits>
37 #include <errno.h>
38 #include <fcntl.h>
39 #include <fstream>
40 #include <iostream>
41 #include <sstream>
42 #include <stdlib.h>
43 #include <vector>
44 
45 #include <google/protobuf/stubs/hash.h>
46 #include <google/protobuf/compiler/objectivec/objectivec_helpers.h>
47 #include <google/protobuf/io/coded_stream.h>
48 #include <google/protobuf/io/zero_copy_stream_impl.h>
49 #include <google/protobuf/descriptor.pb.h>
50 #include <google/protobuf/stubs/common.h>
51 #include <google/protobuf/stubs/strutil.h>
52 
53 // NOTE: src/google/protobuf/compiler/plugin.cc makes use of cerr for some
54 // error cases, so it seems to be ok to use as a back door for errors.
55 
56 namespace google {
57 namespace protobuf {
58 namespace compiler {
59 namespace objectivec {
60 
Options()61 Options::Options() {
62   // Default is the value of the env for the package prefixes.
63   const char* file_path = getenv("GPB_OBJC_EXPECTED_PACKAGE_PREFIXES");
64   if (file_path) {
65     expected_prefixes_path = file_path;
66   }
67 }
68 
69 namespace {
70 
MakeWordsMap(const char * const words[],size_t num_words)71 hash_set<string> MakeWordsMap(const char* const words[], size_t num_words) {
72   hash_set<string> result;
73   for (int i = 0; i < num_words; i++) {
74     result.insert(words[i]);
75   }
76   return result;
77 }
78 
79 const char* const kUpperSegmentsList[] = {"url", "http", "https"};
80 
81 hash_set<string> kUpperSegments =
82     MakeWordsMap(kUpperSegmentsList, GOOGLE_ARRAYSIZE(kUpperSegmentsList));
83 
ascii_isnewline(char c)84 bool ascii_isnewline(char c) {
85   return c == '\n' || c == '\r';
86 }
87 
88 // Internal helper for name handing.
89 // Do not expose this outside of helpers, stick to having functions for specific
90 // cases (ClassName(), FieldName()), so there is always consistent suffix rules.
UnderscoresToCamelCase(const string & input,bool first_capitalized)91 string UnderscoresToCamelCase(const string& input, bool first_capitalized) {
92   vector<string> values;
93   string current;
94 
95   bool last_char_was_number = false;
96   bool last_char_was_lower = false;
97   bool last_char_was_upper = false;
98   for (int i = 0; i < input.size(); i++) {
99     char c = input[i];
100     if (ascii_isdigit(c)) {
101       if (!last_char_was_number) {
102         values.push_back(current);
103         current = "";
104       }
105       current += c;
106       last_char_was_number = last_char_was_lower = last_char_was_upper = false;
107       last_char_was_number = true;
108     } else if (ascii_islower(c)) {
109       // lowercase letter can follow a lowercase or uppercase letter
110       if (!last_char_was_lower && !last_char_was_upper) {
111         values.push_back(current);
112         current = "";
113       }
114       current += c;  // already lower
115       last_char_was_number = last_char_was_lower = last_char_was_upper = false;
116       last_char_was_lower = true;
117     } else if (ascii_isupper(c)) {
118       if (!last_char_was_upper) {
119         values.push_back(current);
120         current = "";
121       }
122       current += ascii_tolower(c);
123       last_char_was_number = last_char_was_lower = last_char_was_upper = false;
124       last_char_was_upper = true;
125     } else {
126       last_char_was_number = last_char_was_lower = last_char_was_upper = false;
127     }
128   }
129   values.push_back(current);
130 
131   string result;
132   bool first_segment_forces_upper = false;
133   for (vector<string>::iterator i = values.begin(); i != values.end(); ++i) {
134     string value = *i;
135     bool all_upper = (kUpperSegments.count(value) > 0);
136     if (all_upper && (result.length() == 0)) {
137       first_segment_forces_upper = true;
138     }
139     for (int j = 0; j < value.length(); j++) {
140       if (j == 0 || all_upper) {
141         value[j] = ascii_toupper(value[j]);
142       } else {
143         // Nothing, already in lower.
144       }
145     }
146     result += value;
147   }
148   if ((result.length() != 0) &&
149       !first_capitalized &&
150       !first_segment_forces_upper) {
151     result[0] = ascii_tolower(result[0]);
152   }
153   return result;
154 }
155 
156 const char* const kReservedWordList[] = {
157     // Objective C "keywords" that aren't in C
158     // From
159     // http://stackoverflow.com/questions/1873630/reserved-keywords-in-objective-c
160     "id", "_cmd", "super", "in", "out", "inout", "bycopy", "byref", "oneway",
161     "self",
162 
163     // C/C++ keywords (Incl C++ 0x11)
164     // From http://en.cppreference.com/w/cpp/keywords
165     "and", "and_eq", "alignas", "alignof", "asm", "auto", "bitand", "bitor",
166     "bool", "break", "case", "catch", "char", "char16_t", "char32_t", "class",
167     "compl", "const", "constexpr", "const_cast", "continue", "decltype",
168     "default", "delete", "double", "dynamic_cast", "else", "enum", "explicit",
169     "export", "extern ", "false", "float", "for", "friend", "goto", "if",
170     "inline", "int", "long", "mutable", "namespace", "new", "noexcept", "not",
171     "not_eq", "nullptr", "operator", "or", "or_eq", "private", "protected",
172     "public", "register", "reinterpret_cast", "return", "short", "signed",
173     "sizeof", "static", "static_assert", "static_cast", "struct", "switch",
174     "template", "this", "thread_local", "throw", "true", "try", "typedef",
175     "typeid", "typename", "union", "unsigned", "using", "virtual", "void",
176     "volatile", "wchar_t", "while", "xor", "xor_eq",
177 
178     // C99 keywords
179     // From
180     // http://publib.boulder.ibm.com/infocenter/lnxpcomp/v8v101/index.jsp?topic=%2Fcom.ibm.xlcpp8l.doc%2Flanguage%2Fref%2Fkeyw.htm
181     "restrict",
182 
183     // Objective-C Runtime typedefs
184     // From <obc/runtime.h>
185     "Category", "Ivar", "Method", "Protocol",
186 
187     // NSObject Methods
188     // new is covered by C++ keywords.
189     "description", "debugDescription", "finalize", "hash", "dealloc", "init",
190     "class", "superclass", "retain", "release", "autorelease", "retainCount",
191     "zone", "isProxy", "copy", "mutableCopy", "classForCoder",
192 
193     // GPBMessage Methods
194     // Only need to add instance methods that may conflict with
195     // method declared in protos. The main cases are methods
196     // that take no arguments, or setFoo:/hasFoo: type methods.
197     "clear", "data", "delimitedData", "descriptor", "extensionRegistry",
198     "extensionsCurrentlySet", "isInitialized", "serializedSize",
199     "sortedExtensionsInUse", "unknownFields",
200 
201     // MacTypes.h names
202     "Fixed", "Fract", "Size", "LogicalAddress", "PhysicalAddress", "ByteCount",
203     "ByteOffset", "Duration", "AbsoluteTime", "OptionBits", "ItemCount",
204     "PBVersion", "ScriptCode", "LangCode", "RegionCode", "OSType",
205     "ProcessSerialNumber", "Point", "Rect", "FixedPoint", "FixedRect", "Style",
206     "StyleParameter", "StyleField", "TimeScale", "TimeBase", "TimeRecord",
207 };
208 
209 hash_set<string> kReservedWords =
210     MakeWordsMap(kReservedWordList, GOOGLE_ARRAYSIZE(kReservedWordList));
211 
SanitizeNameForObjC(const string & input,const string & extension)212 string SanitizeNameForObjC(const string& input, const string& extension) {
213   if (kReservedWords.count(input) > 0) {
214     return input + extension;
215   }
216   return input;
217 }
218 
NameFromFieldDescriptor(const FieldDescriptor * field)219 string NameFromFieldDescriptor(const FieldDescriptor* field) {
220   if (field->type() == FieldDescriptor::TYPE_GROUP) {
221     return field->message_type()->name();
222   } else {
223     return field->name();
224   }
225 }
226 
PathSplit(const string & path,string * directory,string * basename)227 void PathSplit(const string& path, string* directory, string* basename) {
228   string::size_type last_slash = path.rfind('/');
229   if (last_slash == string::npos) {
230     if (directory) {
231       *directory = "";
232     }
233     if (basename) {
234       *basename = path;
235     }
236   } else {
237     if (directory) {
238       *directory = path.substr(0, last_slash);
239     }
240     if (basename) {
241       *basename = path.substr(last_slash + 1);
242     }
243   }
244 }
245 
IsSpecialName(const string & name,const string * special_names,size_t count)246 bool IsSpecialName(const string& name, const string* special_names,
247                    size_t count) {
248   for (size_t i = 0; i < count; ++i) {
249     size_t length = special_names[i].length();
250     if (name.compare(0, length, special_names[i]) == 0) {
251       if (name.length() > length) {
252         // If name is longer than the retained_name[i] that it matches
253         // the next character must be not lower case (newton vs newTon vs
254         // new_ton).
255         return !ascii_islower(name[length]);
256       } else {
257         return true;
258       }
259     }
260   }
261   return false;
262 }
263 
264 }  // namespace
265 
266 // Escape C++ trigraphs by escaping question marks to \?
EscapeTrigraphs(const string & to_escape)267 string EscapeTrigraphs(const string& to_escape) {
268   return StringReplace(to_escape, "?", "\\?", true);
269 }
270 
StripProto(const string & filename)271 string StripProto(const string& filename) {
272   if (HasSuffixString(filename, ".protodevel")) {
273     return StripSuffixString(filename, ".protodevel");
274   } else {
275     return StripSuffixString(filename, ".proto");
276   }
277 }
278 
StringPieceTrimWhitespace(StringPiece * input)279 void StringPieceTrimWhitespace(StringPiece* input) {
280   while (!input->empty() && ascii_isspace(*input->data())) {
281     input->remove_prefix(1);
282   }
283   while (!input->empty() && ascii_isspace((*input)[input->length() - 1])) {
284     input->remove_suffix(1);
285   }
286 }
287 
288 
IsRetainedName(const string & name)289 bool IsRetainedName(const string& name) {
290   // List of prefixes from
291   // http://developer.apple.com/library/mac/#documentation/Cocoa/Conceptual/MemoryMgmt/Articles/mmRules.html
292   static const string retained_names[] = {"new", "alloc", "copy",
293                                           "mutableCopy"};
294   return IsSpecialName(name, retained_names,
295                        sizeof(retained_names) / sizeof(retained_names[0]));
296 }
297 
IsInitName(const string & name)298 bool IsInitName(const string& name) {
299   static const string init_names[] = {"init"};
300   return IsSpecialName(name, init_names,
301                        sizeof(init_names) / sizeof(init_names[0]));
302 }
303 
BaseFileName(const FileDescriptor * file)304 string BaseFileName(const FileDescriptor* file) {
305   string basename;
306   PathSplit(file->name(), NULL, &basename);
307   return basename;
308 }
309 
FilePath(const FileDescriptor * file)310 string FilePath(const FileDescriptor* file) {
311   string output;
312   string basename;
313   string directory;
314   PathSplit(file->name(), &directory, &basename);
315   if (directory.length() > 0) {
316     output = directory + "/";
317   }
318   basename = StripProto(basename);
319 
320   // CamelCase to be more ObjC friendly.
321   basename = UnderscoresToCamelCase(basename, true);
322 
323   output += basename;
324   return output;
325 }
326 
FilePathBasename(const FileDescriptor * file)327 string FilePathBasename(const FileDescriptor* file) {
328   string output;
329   string basename;
330   string directory;
331   PathSplit(file->name(), &directory, &basename);
332   basename = StripProto(basename);
333 
334   // CamelCase to be more ObjC friendly.
335   output = UnderscoresToCamelCase(basename, true);
336 
337   return output;
338 }
339 
FileClassPrefix(const FileDescriptor * file)340 string FileClassPrefix(const FileDescriptor* file) {
341   // Default is empty string, no need to check has_objc_class_prefix.
342   string result = file->options().objc_class_prefix();
343   return result;
344 }
345 
FileClassName(const FileDescriptor * file)346 string FileClassName(const FileDescriptor* file) {
347   string name = FileClassPrefix(file);
348   name += UnderscoresToCamelCase(StripProto(BaseFileName(file)), true);
349   name += "Root";
350   // There aren't really any reserved words that end in "Root", but playing
351   // it safe and checking.
352   return SanitizeNameForObjC(name, "_RootClass");
353 }
354 
ClassNameWorker(const Descriptor * descriptor)355 string ClassNameWorker(const Descriptor* descriptor) {
356   string name;
357   if (descriptor->containing_type() != NULL) {
358     name = ClassNameWorker(descriptor->containing_type());
359     name += "_";
360   }
361   return name + descriptor->name();
362 }
363 
ClassNameWorker(const EnumDescriptor * descriptor)364 string ClassNameWorker(const EnumDescriptor* descriptor) {
365   string name;
366   if (descriptor->containing_type() != NULL) {
367     name = ClassNameWorker(descriptor->containing_type());
368     name += "_";
369   }
370   return name + descriptor->name();
371 }
372 
ClassName(const Descriptor * descriptor)373 string ClassName(const Descriptor* descriptor) {
374   // 1. Message names are used as is (style calls for CamelCase, trust it).
375   // 2. Check for reserved word at the very end and then suffix things.
376   string prefix = FileClassPrefix(descriptor->file());
377   string name = ClassNameWorker(descriptor);
378   return SanitizeNameForObjC(prefix + name, "_Class");
379 }
380 
EnumName(const EnumDescriptor * descriptor)381 string EnumName(const EnumDescriptor* descriptor) {
382   // 1. Enum names are used as is (style calls for CamelCase, trust it).
383   // 2. Check for reserved word at the every end and then suffix things.
384   //      message Fixed {
385   //        message Size {...}
386   //        enum Mumble {...}
387   //      ...
388   //      }
389   //    yields Fixed_Class, Fixed_Size.
390   string name = FileClassPrefix(descriptor->file());
391   name += ClassNameWorker(descriptor);
392   return SanitizeNameForObjC(name, "_Enum");
393 }
394 
EnumValueName(const EnumValueDescriptor * descriptor)395 string EnumValueName(const EnumValueDescriptor* descriptor) {
396   // Because of the Switch enum compatibility, the name on the enum has to have
397   // the suffix handing, so it slightly diverges from how nested classes work.
398   //   enum Fixed {
399   //     FOO = 1
400   //   }
401   // yields Fixed_Enum and Fixed_Enum_Foo (not Fixed_Foo).
402   const string& class_name = EnumName(descriptor->type());
403   const string& value_str = UnderscoresToCamelCase(descriptor->name(), true);
404   const string& name = class_name + "_" + value_str;
405   // There aren't really any reserved words with an underscore and a leading
406   // capital letter, but playing it safe and checking.
407   return SanitizeNameForObjC(name, "_Value");
408 }
409 
EnumValueShortName(const EnumValueDescriptor * descriptor)410 string EnumValueShortName(const EnumValueDescriptor* descriptor) {
411   // Enum value names (EnumValueName above) are the enum name turned into
412   // a class name and then the value name is CamelCased and concatenated; the
413   // whole thing then gets sanitized for reserved words.
414   // The "short name" is intended to be the final leaf, the value name; but
415   // you can't simply send that off to sanitize as that could result in it
416   // getting modified when the full name didn't.  For example enum
417   // "StorageModes" has a value "retain".  So the full name is
418   // "StorageModes_Retain", but if we sanitize "retain" it would become
419   // "RetainValue".
420   // So the right way to get the short name is to take the full enum name
421   // and then strip off the enum name (leaving the value name and anything
422   // done by sanitize).
423   const string& class_name = EnumName(descriptor->type());
424   const string& long_name_prefix = class_name + "_";
425   const string& long_name = EnumValueName(descriptor);
426   return StripPrefixString(long_name, long_name_prefix);
427 }
428 
UnCamelCaseEnumShortName(const string & name)429 string UnCamelCaseEnumShortName(const string& name) {
430   string result;
431   for (int i = 0; i < name.size(); i++) {
432     char c = name[i];
433     if (i > 0 && ascii_isupper(c)) {
434       result += '_';
435     }
436     result += ascii_toupper(c);
437   }
438   return result;
439 }
440 
ExtensionMethodName(const FieldDescriptor * descriptor)441 string ExtensionMethodName(const FieldDescriptor* descriptor) {
442   const string& name = NameFromFieldDescriptor(descriptor);
443   const string& result = UnderscoresToCamelCase(name, false);
444   return SanitizeNameForObjC(result, "_Extension");
445 }
446 
FieldName(const FieldDescriptor * field)447 string FieldName(const FieldDescriptor* field) {
448   const string& name = NameFromFieldDescriptor(field);
449   string result = UnderscoresToCamelCase(name, false);
450   if (field->is_repeated() && !field->is_map()) {
451     // Add "Array" before do check for reserved worlds.
452     result += "Array";
453   } else {
454     // If it wasn't repeated, but ends in "Array", force on the _p suffix.
455     if (HasSuffixString(result, "Array")) {
456       result += "_p";
457     }
458   }
459   return SanitizeNameForObjC(result, "_p");
460 }
461 
FieldNameCapitalized(const FieldDescriptor * field)462 string FieldNameCapitalized(const FieldDescriptor* field) {
463   // Want the same suffix handling, so upcase the first letter of the other
464   // name.
465   string result = FieldName(field);
466   if (result.length() > 0) {
467     result[0] = ascii_toupper(result[0]);
468   }
469   return result;
470 }
471 
OneofEnumName(const OneofDescriptor * descriptor)472 string OneofEnumName(const OneofDescriptor* descriptor) {
473   const Descriptor* fieldDescriptor = descriptor->containing_type();
474   string name = ClassName(fieldDescriptor);
475   name += "_" + UnderscoresToCamelCase(descriptor->name(), true) + "_OneOfCase";
476   // No sanitize needed because the OS never has names that end in _OneOfCase.
477   return name;
478 }
479 
OneofName(const OneofDescriptor * descriptor)480 string OneofName(const OneofDescriptor* descriptor) {
481   string name = UnderscoresToCamelCase(descriptor->name(), false);
482   // No sanitize needed because it gets OneOfCase added and that shouldn't
483   // ever conflict.
484   return name;
485 }
486 
OneofNameCapitalized(const OneofDescriptor * descriptor)487 string OneofNameCapitalized(const OneofDescriptor* descriptor) {
488   // Use the common handling and then up-case the first letter.
489   string result = OneofName(descriptor);
490   if (result.length() > 0) {
491     result[0] = ascii_toupper(result[0]);
492   }
493   return result;
494 }
495 
UnCamelCaseFieldName(const string & name,const FieldDescriptor * field)496 string UnCamelCaseFieldName(const string& name, const FieldDescriptor* field) {
497   string worker(name);
498   if (HasSuffixString(worker, "_p")) {
499     worker = StripSuffixString(worker, "_p");
500   }
501   if (field->is_repeated() && HasSuffixString(worker, "Array")) {
502     worker = StripSuffixString(worker, "Array");
503   }
504   if (field->type() == FieldDescriptor::TYPE_GROUP) {
505     if (worker.length() > 0) {
506       if (ascii_islower(worker[0])) {
507         worker[0] = ascii_toupper(worker[0]);
508       }
509     }
510     return worker;
511   } else {
512     string result;
513     for (int i = 0; i < worker.size(); i++) {
514       char c = worker[i];
515       if (ascii_isupper(c)) {
516         if (i > 0) {
517           result += '_';
518         }
519         result += ascii_tolower(c);
520       } else {
521         result += c;
522       }
523     }
524     return result;
525   }
526 }
527 
GetCapitalizedType(const FieldDescriptor * field)528 string GetCapitalizedType(const FieldDescriptor* field) {
529   switch (field->type()) {
530     case FieldDescriptor::TYPE_INT32:
531       return "Int32";
532     case FieldDescriptor::TYPE_UINT32:
533       return "UInt32";
534     case FieldDescriptor::TYPE_SINT32:
535       return "SInt32";
536     case FieldDescriptor::TYPE_FIXED32:
537       return "Fixed32";
538     case FieldDescriptor::TYPE_SFIXED32:
539       return "SFixed32";
540     case FieldDescriptor::TYPE_INT64:
541       return "Int64";
542     case FieldDescriptor::TYPE_UINT64:
543       return "UInt64";
544     case FieldDescriptor::TYPE_SINT64:
545       return "SInt64";
546     case FieldDescriptor::TYPE_FIXED64:
547       return "Fixed64";
548     case FieldDescriptor::TYPE_SFIXED64:
549       return "SFixed64";
550     case FieldDescriptor::TYPE_FLOAT:
551       return "Float";
552     case FieldDescriptor::TYPE_DOUBLE:
553       return "Double";
554     case FieldDescriptor::TYPE_BOOL:
555       return "Bool";
556     case FieldDescriptor::TYPE_STRING:
557       return "String";
558     case FieldDescriptor::TYPE_BYTES:
559       return "Bytes";
560     case FieldDescriptor::TYPE_ENUM:
561       return "Enum";
562     case FieldDescriptor::TYPE_GROUP:
563       return "Group";
564     case FieldDescriptor::TYPE_MESSAGE:
565       return "Message";
566   }
567 
568   // Some compilers report reaching end of function even though all cases of
569   // the enum are handed in the switch.
570   GOOGLE_LOG(FATAL) << "Can't get here.";
571   return NULL;
572 }
573 
GetObjectiveCType(FieldDescriptor::Type field_type)574 ObjectiveCType GetObjectiveCType(FieldDescriptor::Type field_type) {
575   switch (field_type) {
576     case FieldDescriptor::TYPE_INT32:
577     case FieldDescriptor::TYPE_SINT32:
578     case FieldDescriptor::TYPE_SFIXED32:
579       return OBJECTIVECTYPE_INT32;
580 
581     case FieldDescriptor::TYPE_UINT32:
582     case FieldDescriptor::TYPE_FIXED32:
583       return OBJECTIVECTYPE_UINT32;
584 
585     case FieldDescriptor::TYPE_INT64:
586     case FieldDescriptor::TYPE_SINT64:
587     case FieldDescriptor::TYPE_SFIXED64:
588       return OBJECTIVECTYPE_INT64;
589 
590     case FieldDescriptor::TYPE_UINT64:
591     case FieldDescriptor::TYPE_FIXED64:
592       return OBJECTIVECTYPE_UINT64;
593 
594     case FieldDescriptor::TYPE_FLOAT:
595       return OBJECTIVECTYPE_FLOAT;
596 
597     case FieldDescriptor::TYPE_DOUBLE:
598       return OBJECTIVECTYPE_DOUBLE;
599 
600     case FieldDescriptor::TYPE_BOOL:
601       return OBJECTIVECTYPE_BOOLEAN;
602 
603     case FieldDescriptor::TYPE_STRING:
604       return OBJECTIVECTYPE_STRING;
605 
606     case FieldDescriptor::TYPE_BYTES:
607       return OBJECTIVECTYPE_DATA;
608 
609     case FieldDescriptor::TYPE_ENUM:
610       return OBJECTIVECTYPE_ENUM;
611 
612     case FieldDescriptor::TYPE_GROUP:
613     case FieldDescriptor::TYPE_MESSAGE:
614       return OBJECTIVECTYPE_MESSAGE;
615   }
616 
617   // Some compilers report reaching end of function even though all cases of
618   // the enum are handed in the switch.
619   GOOGLE_LOG(FATAL) << "Can't get here.";
620   return OBJECTIVECTYPE_INT32;
621 }
622 
IsPrimitiveType(const FieldDescriptor * field)623 bool IsPrimitiveType(const FieldDescriptor* field) {
624   ObjectiveCType type = GetObjectiveCType(field);
625   switch (type) {
626     case OBJECTIVECTYPE_INT32:
627     case OBJECTIVECTYPE_UINT32:
628     case OBJECTIVECTYPE_INT64:
629     case OBJECTIVECTYPE_UINT64:
630     case OBJECTIVECTYPE_FLOAT:
631     case OBJECTIVECTYPE_DOUBLE:
632     case OBJECTIVECTYPE_BOOLEAN:
633     case OBJECTIVECTYPE_ENUM:
634       return true;
635       break;
636     default:
637       return false;
638   }
639 }
640 
IsReferenceType(const FieldDescriptor * field)641 bool IsReferenceType(const FieldDescriptor* field) {
642   return !IsPrimitiveType(field);
643 }
644 
HandleExtremeFloatingPoint(string val,bool add_float_suffix)645 static string HandleExtremeFloatingPoint(string val, bool add_float_suffix) {
646   if (val == "nan") {
647     return "NAN";
648   } else if (val == "inf") {
649     return "INFINITY";
650   } else if (val == "-inf") {
651     return "-INFINITY";
652   } else {
653     // float strings with ., e or E need to have f appended
654     if (add_float_suffix &&
655         (val.find(".") != string::npos || val.find("e") != string::npos ||
656          val.find("E") != string::npos)) {
657       val += "f";
658     }
659     return val;
660   }
661 }
662 
GPBGenericValueFieldName(const FieldDescriptor * field)663 string GPBGenericValueFieldName(const FieldDescriptor* field) {
664   // Returns the field within the GPBGenericValue union to use for the given
665   // field.
666   if (field->is_repeated()) {
667       return "valueMessage";
668   }
669   switch (field->cpp_type()) {
670     case FieldDescriptor::CPPTYPE_INT32:
671       return "valueInt32";
672     case FieldDescriptor::CPPTYPE_UINT32:
673       return "valueUInt32";
674     case FieldDescriptor::CPPTYPE_INT64:
675       return "valueInt64";
676     case FieldDescriptor::CPPTYPE_UINT64:
677       return "valueUInt64";
678     case FieldDescriptor::CPPTYPE_FLOAT:
679       return "valueFloat";
680     case FieldDescriptor::CPPTYPE_DOUBLE:
681       return "valueDouble";
682     case FieldDescriptor::CPPTYPE_BOOL:
683       return "valueBool";
684     case FieldDescriptor::CPPTYPE_STRING:
685       if (field->type() == FieldDescriptor::TYPE_BYTES) {
686         return "valueData";
687       } else {
688         return "valueString";
689       }
690     case FieldDescriptor::CPPTYPE_ENUM:
691       return "valueEnum";
692     case FieldDescriptor::CPPTYPE_MESSAGE:
693       return "valueMessage";
694   }
695 
696   // Some compilers report reaching end of function even though all cases of
697   // the enum are handed in the switch.
698   GOOGLE_LOG(FATAL) << "Can't get here.";
699   return NULL;
700 }
701 
702 
DefaultValue(const FieldDescriptor * field)703 string DefaultValue(const FieldDescriptor* field) {
704   // Repeated fields don't have defaults.
705   if (field->is_repeated()) {
706     return "nil";
707   }
708 
709   // Switch on cpp_type since we need to know which default_value_* method
710   // of FieldDescriptor to call.
711   switch (field->cpp_type()) {
712     case FieldDescriptor::CPPTYPE_INT32:
713       // gcc and llvm reject the decimal form of kint32min and kint64min.
714       if (field->default_value_int32() == INT_MIN) {
715         return "-0x80000000";
716       }
717       return SimpleItoa(field->default_value_int32());
718     case FieldDescriptor::CPPTYPE_UINT32:
719       return SimpleItoa(field->default_value_uint32()) + "U";
720     case FieldDescriptor::CPPTYPE_INT64:
721       // gcc and llvm reject the decimal form of kint32min and kint64min.
722       if (field->default_value_int64() == LLONG_MIN) {
723         return "-0x8000000000000000LL";
724       }
725       return SimpleItoa(field->default_value_int64()) + "LL";
726     case FieldDescriptor::CPPTYPE_UINT64:
727       return SimpleItoa(field->default_value_uint64()) + "ULL";
728     case FieldDescriptor::CPPTYPE_DOUBLE:
729       return HandleExtremeFloatingPoint(
730           SimpleDtoa(field->default_value_double()), false);
731     case FieldDescriptor::CPPTYPE_FLOAT:
732       return HandleExtremeFloatingPoint(
733           SimpleFtoa(field->default_value_float()), true);
734     case FieldDescriptor::CPPTYPE_BOOL:
735       return field->default_value_bool() ? "YES" : "NO";
736     case FieldDescriptor::CPPTYPE_STRING: {
737       const bool has_default_value = field->has_default_value();
738       const string& default_string = field->default_value_string();
739       if (!has_default_value || default_string.length() == 0) {
740         // If the field is defined as being the empty string,
741         // then we will just assign to nil, as the empty string is the
742         // default for both strings and data.
743         return "nil";
744       }
745       if (field->type() == FieldDescriptor::TYPE_BYTES) {
746         // We want constant fields in our data structures so we can
747         // declare them as static. To achieve this we cheat and stuff
748         // a escaped c string (prefixed with a length) into the data
749         // field, and cast it to an (NSData*) so it will compile.
750         // The runtime library knows how to handle it.
751 
752         // Must convert to a standard byte order for packing length into
753         // a cstring.
754         uint32 length = ghtonl(default_string.length());
755         string bytes((const char*)&length, sizeof(length));
756         bytes.append(default_string);
757         return "(NSData*)\"" + EscapeTrigraphs(CEscape(bytes)) + "\"";
758       } else {
759         return "@\"" + EscapeTrigraphs(CEscape(default_string)) + "\"";
760       }
761     }
762     case FieldDescriptor::CPPTYPE_ENUM:
763       return EnumValueName(field->default_value_enum());
764     case FieldDescriptor::CPPTYPE_MESSAGE:
765       return "nil";
766   }
767 
768   // Some compilers report reaching end of function even though all cases of
769   // the enum are handed in the switch.
770   GOOGLE_LOG(FATAL) << "Can't get here.";
771   return NULL;
772 }
773 
HasNonZeroDefaultValue(const FieldDescriptor * field)774 bool HasNonZeroDefaultValue(const FieldDescriptor* field) {
775   // Repeated fields don't have defaults.
776   if (field->is_repeated()) {
777     return false;
778   }
779 
780   // As much as checking field->has_default_value() seems useful, it isn't
781   // because of enums. proto2 syntax allows the first item in an enum (the
782   // default) to be non zero. So checking field->has_default_value() would
783   // result in missing this non zero default.  See MessageWithOneBasedEnum in
784   // objectivec/Tests/unittest_objc.proto for a test Message to confirm this.
785 
786   // Some proto file set the default to the zero value, so make sure the value
787   // isn't the zero case.
788   switch (field->cpp_type()) {
789     case FieldDescriptor::CPPTYPE_INT32:
790       return field->default_value_int32() != 0;
791     case FieldDescriptor::CPPTYPE_UINT32:
792       return field->default_value_uint32() != 0U;
793     case FieldDescriptor::CPPTYPE_INT64:
794       return field->default_value_int64() != 0LL;
795     case FieldDescriptor::CPPTYPE_UINT64:
796       return field->default_value_uint64() != 0ULL;
797     case FieldDescriptor::CPPTYPE_DOUBLE:
798       return field->default_value_double() != 0.0;
799     case FieldDescriptor::CPPTYPE_FLOAT:
800       return field->default_value_float() != 0.0f;
801     case FieldDescriptor::CPPTYPE_BOOL:
802       return field->default_value_bool();
803     case FieldDescriptor::CPPTYPE_STRING: {
804       const string& default_string = field->default_value_string();
805       return default_string.length() != 0;
806     }
807     case FieldDescriptor::CPPTYPE_ENUM:
808       return field->default_value_enum()->number() != 0;
809     case FieldDescriptor::CPPTYPE_MESSAGE:
810       return false;
811   }
812 
813   // Some compilers report reaching end of function even though all cases of
814   // the enum are handed in the switch.
815   GOOGLE_LOG(FATAL) << "Can't get here.";
816   return false;
817 }
818 
BuildFlagsString(const vector<string> & strings)819 string BuildFlagsString(const vector<string>& strings) {
820   if (strings.size() == 0) {
821     return "0";
822   }
823   string string;
824   for (size_t i = 0; i != strings.size(); ++i) {
825     if (i > 0) {
826       string.append(" | ");
827     }
828     string.append(strings[i]);
829   }
830   return string;
831 }
832 
BuildCommentsString(const SourceLocation & location)833 string BuildCommentsString(const SourceLocation& location) {
834   const string& comments = location.leading_comments.empty()
835                                ? location.trailing_comments
836                                : location.leading_comments;
837   vector<string> lines;
838   SplitStringAllowEmpty(comments, "\n", &lines);
839   while (!lines.empty() && lines.back().empty()) {
840     lines.pop_back();
841   }
842   string prefix("///");
843   string suffix("\n");
844   string final_comments;
845   for (int i = 0; i < lines.size(); i++) {
846     // HeaderDoc uses '\' and '@' for markers; escape them.
847     const string line = StringReplace(lines[i], "\\", "\\\\", true);
848     final_comments +=
849         prefix + StringReplace(line, "@", "\\@", true) + suffix;
850   }
851   return final_comments;
852 }
853 
854 // Making these a generator option for folks that don't use CocoaPods, but do
855 // want to put the library in a framework is an interesting question. The
856 // problem is it means changing sources shipped with the library to actually
857 // use a different value; so it isn't as simple as a option.
858 const char* const ProtobufLibraryFrameworkName = "Protobuf";
859 
ProtobufFrameworkImportSymbol(const string & framework_name)860 string ProtobufFrameworkImportSymbol(const string& framework_name) {
861   // GPB_USE_[framework_name]_FRAMEWORK_IMPORTS
862   string result = string("GPB_USE_");
863   result += ToUpper(framework_name);
864   result += "_FRAMEWORK_IMPORTS";
865   return result;
866 }
867 
IsProtobufLibraryBundledProtoFile(const FileDescriptor * file)868 bool IsProtobufLibraryBundledProtoFile(const FileDescriptor* file) {
869   // We don't check the name prefix or proto package because some files
870   // (descriptor.proto), aren't shipped generated by the library, so this
871   // seems to be the safest way to only catch the ones shipped.
872   const string name = file->name();
873   if (name == "google/protobuf/any.proto" ||
874       name == "google/protobuf/api.proto" ||
875       name == "google/protobuf/duration.proto" ||
876       name == "google/protobuf/empty.proto" ||
877       name == "google/protobuf/field_mask.proto" ||
878       name == "google/protobuf/source_context.proto" ||
879       name == "google/protobuf/struct.proto" ||
880       name == "google/protobuf/timestamp.proto" ||
881       name == "google/protobuf/type.proto" ||
882       name == "google/protobuf/wrappers.proto") {
883     return true;
884   }
885   return false;
886 }
887 
ReadLine(StringPiece * input,StringPiece * line)888 bool ReadLine(StringPiece* input, StringPiece* line) {
889   for (int len = 0; len < input->size(); ++len) {
890     if (ascii_isnewline((*input)[len])) {
891       *line = StringPiece(input->data(), len);
892       ++len;  // advance over the newline
893       *input = StringPiece(input->data() + len, input->size() - len);
894       return true;
895     }
896   }
897   return false;  // Ran out of input with no newline.
898 }
899 
RemoveComment(StringPiece * input)900 void RemoveComment(StringPiece* input) {
901   int offset = input->find('#');
902   if (offset != StringPiece::npos) {
903     input->remove_suffix(input->length() - offset);
904   }
905 }
906 
907 namespace {
908 
909 class ExpectedPrefixesCollector : public LineConsumer {
910  public:
ExpectedPrefixesCollector(map<string,string> * inout_package_to_prefix_map)911   ExpectedPrefixesCollector(map<string, string>* inout_package_to_prefix_map)
912       : prefix_map_(inout_package_to_prefix_map) {}
913 
914   virtual bool ConsumeLine(const StringPiece& line, string* out_error);
915 
916  private:
917   map<string, string>* prefix_map_;
918 };
919 
ConsumeLine(const StringPiece & line,string * out_error)920 bool ExpectedPrefixesCollector::ConsumeLine(
921     const StringPiece& line, string* out_error) {
922   int offset = line.find('=');
923   if (offset == StringPiece::npos) {
924     *out_error =
925         string("Expected prefixes file line without equal sign: '") +
926         line.ToString() + "'.";
927     return false;
928   }
929   StringPiece package(line, 0, offset);
930   StringPiece prefix(line, offset + 1, line.length() - offset - 1);
931   StringPieceTrimWhitespace(&package);
932   StringPieceTrimWhitespace(&prefix);
933   // Don't really worry about error checking the package/prefix for
934   // being valid.  Assume the file is validated when it is created/edited.
935   (*prefix_map_)[package.ToString()] = prefix.ToString();
936   return true;
937 }
938 
LoadExpectedPackagePrefixes(const Options & generation_options,map<string,string> * prefix_map,string * out_error)939 bool LoadExpectedPackagePrefixes(const Options &generation_options,
940                                  map<string, string>* prefix_map,
941                                  string* out_error) {
942   if (generation_options.expected_prefixes_path.empty()) {
943     return true;
944   }
945 
946   ExpectedPrefixesCollector collector(prefix_map);
947   return ParseSimpleFile(
948       generation_options.expected_prefixes_path, &collector, out_error);
949 }
950 
951 }  // namespace
952 
ValidateObjCClassPrefix(const FileDescriptor * file,const Options & generation_options,string * out_error)953 bool ValidateObjCClassPrefix(const FileDescriptor* file,
954                              const Options& generation_options,
955                              string* out_error) {
956   const string prefix = file->options().objc_class_prefix();
957   const string package = file->package();
958 
959   // NOTE: src/google/protobuf/compiler/plugin.cc makes use of cerr for some
960   // error cases, so it seems to be ok to use as a back door for warnings.
961 
962   // Load any expected package prefixes to validate against those.
963   map<string, string> expected_package_prefixes;
964   if (!LoadExpectedPackagePrefixes(generation_options,
965                                    &expected_package_prefixes,
966                                    out_error)) {
967     return false;
968   }
969 
970   // Check: Error - See if there was an expected prefix for the package and
971   // report if it doesn't match (wrong or missing).
972   map<string, string>::iterator package_match =
973       expected_package_prefixes.find(package);
974   if (package_match != expected_package_prefixes.end()) {
975     // There was an entry, and...
976     if (package_match->second == prefix) {
977       // ...it matches.  All good, out of here!
978       return true;
979     } else {
980       // ...it didn't match!
981       *out_error = "error: Expected 'option objc_class_prefix = \"" +
982                    package_match->second + "\";' for package '" + package +
983                    "' in '" + file->name() + "'";
984       if (prefix.length()) {
985         *out_error += "; but found '" + prefix + "' instead";
986       }
987       *out_error += ".";
988       return false;
989     }
990   }
991 
992   // If there was no prefix option, we're done at this point.
993   if (prefix.length() == 0) {
994     // No prefix, nothing left to check.
995     return true;
996   }
997 
998   // Check: Error - Make sure the prefix wasn't expected for a different
999   // package (overlap is allowed, but it has to be listed as an expected
1000   // overlap).
1001   for (map<string, string>::iterator i = expected_package_prefixes.begin();
1002        i != expected_package_prefixes.end(); ++i) {
1003     if (i->second == prefix) {
1004       *out_error =
1005           "error: Found 'option objc_class_prefix = \"" + prefix +
1006           "\";' in '" + file->name() +
1007           "'; that prefix is already used for 'package " + i->first +
1008           ";'. It can only be reused by listing it in the expected file (" +
1009           generation_options.expected_prefixes_path + ").";
1010       return false;  // Only report first usage of the prefix.
1011     }
1012   }
1013 
1014   // Check: Warning - Make sure the prefix is is a reasonable value according
1015   // to Apple's rules (the checks above implicitly whitelist anything that
1016   // doesn't meet these rules).
1017   if (!ascii_isupper(prefix[0])) {
1018     cerr << endl
1019          << "protoc:0: warning: Invalid 'option objc_class_prefix = \""
1020          << prefix << "\";' in '" << file->name() << "';"
1021          << " it should start with a capital letter." << endl;
1022     cerr.flush();
1023   }
1024   if (prefix.length() < 3) {
1025     // Apple reserves 2 character prefixes for themselves. They do use some
1026     // 3 character prefixes, but they haven't updated the rules/docs.
1027     cerr << endl
1028          << "protoc:0: warning: Invalid 'option objc_class_prefix = \""
1029          << prefix << "\";' in '" << file->name() << "';"
1030          << " Apple recommends they should be at least 3 characters long."
1031          << endl;
1032     cerr.flush();
1033   }
1034 
1035   // Check: Warning - If the given package/prefix pair wasn't expected, issue a
1036   // warning issue a warning suggesting it gets added to the file.
1037   if (!expected_package_prefixes.empty()) {
1038     cerr << endl
1039          << "protoc:0: warning: Found unexpected 'option objc_class_prefix = \""
1040          << prefix << "\";' in '" << file->name() << "';"
1041          << " consider adding it to the expected prefixes file ("
1042          << generation_options.expected_prefixes_path << ")." << endl;
1043     cerr.flush();
1044   }
1045 
1046   return true;
1047 }
1048 
TextFormatDecodeData()1049 TextFormatDecodeData::TextFormatDecodeData() { }
1050 
~TextFormatDecodeData()1051 TextFormatDecodeData::~TextFormatDecodeData() { }
1052 
AddString(int32 key,const string & input_for_decode,const string & desired_output)1053 void TextFormatDecodeData::AddString(int32 key,
1054                                      const string& input_for_decode,
1055                                      const string& desired_output) {
1056   for (vector<DataEntry>::const_iterator i = entries_.begin();
1057        i != entries_.end(); ++i) {
1058     if (i->first == key) {
1059       cerr << "error: duplicate key (" << key
1060            << ") making TextFormat data, input: \"" << input_for_decode
1061            << "\", desired: \"" << desired_output << "\"." << endl;
1062       cerr.flush();
1063       abort();
1064     }
1065   }
1066 
1067   const string& data = TextFormatDecodeData::DecodeDataForString(
1068       input_for_decode, desired_output);
1069   entries_.push_back(DataEntry(key, data));
1070 }
1071 
Data() const1072 string TextFormatDecodeData::Data() const {
1073   ostringstream data_stringstream;
1074 
1075   if (num_entries() > 0) {
1076     io::OstreamOutputStream data_outputstream(&data_stringstream);
1077     io::CodedOutputStream output_stream(&data_outputstream);
1078 
1079     output_stream.WriteVarint32(num_entries());
1080     for (vector<DataEntry>::const_iterator i = entries_.begin();
1081          i != entries_.end(); ++i) {
1082       output_stream.WriteVarint32(i->first);
1083       output_stream.WriteString(i->second);
1084     }
1085   }
1086 
1087   data_stringstream.flush();
1088   return data_stringstream.str();
1089 }
1090 
1091 namespace {
1092 
1093 // Helper to build up the decode data for a string.
1094 class DecodeDataBuilder {
1095  public:
DecodeDataBuilder()1096   DecodeDataBuilder() { Reset(); }
1097 
1098   bool AddCharacter(const char desired, const char input);
AddUnderscore()1099   void AddUnderscore() {
1100     Push();
1101     need_underscore_ = true;
1102   }
Finish()1103   string Finish() {
1104     Push();
1105     return decode_data_;
1106   }
1107 
1108  private:
1109   static const uint8 kAddUnderscore = 0x80;
1110 
1111   static const uint8 kOpAsIs        = 0x00;
1112   static const uint8 kOpFirstUpper  = 0x40;
1113   static const uint8 kOpFirstLower  = 0x20;
1114   static const uint8 kOpAllUpper    = 0x60;
1115 
1116   static const int kMaxSegmentLen     = 0x1f;
1117 
AddChar(const char desired)1118   void AddChar(const char desired) {
1119     ++segment_len_;
1120     is_all_upper_ &= ascii_isupper(desired);
1121   }
1122 
Push()1123   void Push() {
1124     uint8 op = (op_ | segment_len_);
1125     if (need_underscore_) op |= kAddUnderscore;
1126     if (op != 0) {
1127       decode_data_ += (char)op;
1128     }
1129     Reset();
1130   }
1131 
AddFirst(const char desired,const char input)1132   bool AddFirst(const char desired, const char input) {
1133     if (desired == input) {
1134       op_ = kOpAsIs;
1135     } else if (desired == ascii_toupper(input)) {
1136       op_ = kOpFirstUpper;
1137     } else if (desired == ascii_tolower(input)) {
1138       op_ = kOpFirstLower;
1139     } else {
1140       // Can't be transformed to match.
1141       return false;
1142     }
1143     AddChar(desired);
1144     return true;
1145   }
1146 
Reset()1147   void Reset() {
1148     need_underscore_ = false;
1149     op_ = 0;
1150     segment_len_ = 0;
1151     is_all_upper_ = true;
1152   }
1153 
1154   bool need_underscore_;
1155   bool is_all_upper_;
1156   uint8 op_;
1157   int segment_len_;
1158 
1159   string decode_data_;
1160 };
1161 
AddCharacter(const char desired,const char input)1162 bool DecodeDataBuilder::AddCharacter(const char desired, const char input) {
1163   // If we've hit the max size, push to start a new segment.
1164   if (segment_len_ == kMaxSegmentLen) {
1165     Push();
1166   }
1167   if (segment_len_ == 0) {
1168     return AddFirst(desired, input);
1169   }
1170 
1171   // Desired and input match...
1172   if (desired == input) {
1173     // If we aren't transforming it, or we're upper casing it and it is
1174     // supposed to be uppercase; just add it to the segment.
1175     if ((op_ != kOpAllUpper) || ascii_isupper(desired)) {
1176       AddChar(desired);
1177       return true;
1178     }
1179 
1180     // Add the current segment, and start the next one.
1181     Push();
1182     return AddFirst(desired, input);
1183   }
1184 
1185   // If we need to uppercase, and everything so far has been uppercase,
1186   // promote op to AllUpper.
1187   if ((desired == ascii_toupper(input)) && is_all_upper_) {
1188     op_ = kOpAllUpper;
1189     AddChar(desired);
1190     return true;
1191   }
1192 
1193   // Give up, push and start a new segment.
1194   Push();
1195   return AddFirst(desired, input);
1196 }
1197 
1198 // If decode data can't be generated, a directive for the raw string
1199 // is used instead.
DirectDecodeString(const string & str)1200 string DirectDecodeString(const string& str) {
1201   string result;
1202   result += (char)'\0';  // Marker for full string.
1203   result += str;
1204   result += (char)'\0';  // End of string.
1205   return result;
1206 }
1207 
1208 }  // namespace
1209 
1210 // static
DecodeDataForString(const string & input_for_decode,const string & desired_output)1211 string TextFormatDecodeData::DecodeDataForString(const string& input_for_decode,
1212                                                  const string& desired_output) {
1213   if ((input_for_decode.size() == 0) || (desired_output.size() == 0)) {
1214     cerr << "error: got empty string for making TextFormat data, input: \""
1215          << input_for_decode << "\", desired: \"" << desired_output << "\"."
1216          << endl;
1217     cerr.flush();
1218     abort();
1219   }
1220   if ((input_for_decode.find('\0') != string::npos) ||
1221       (desired_output.find('\0') != string::npos)) {
1222     cerr << "error: got a null char in a string for making TextFormat data,"
1223          << " input: \"" << CEscape(input_for_decode) << "\", desired: \""
1224          << CEscape(desired_output) << "\"." << endl;
1225     cerr.flush();
1226     abort();
1227   }
1228 
1229   DecodeDataBuilder builder;
1230 
1231   // Walk the output building it from the input.
1232   int x = 0;
1233   for (int y = 0; y < desired_output.size(); y++) {
1234     const char d = desired_output[y];
1235     if (d == '_') {
1236       builder.AddUnderscore();
1237       continue;
1238     }
1239 
1240     if (x >= input_for_decode.size()) {
1241       // Out of input, no way to encode it, just return a full decode.
1242       return DirectDecodeString(desired_output);
1243     }
1244     if (builder.AddCharacter(d, input_for_decode[x])) {
1245       ++x;  // Consumed one input
1246     } else {
1247       // Couldn't transform for the next character, just return a full decode.
1248       return DirectDecodeString(desired_output);
1249     }
1250   }
1251 
1252   if (x != input_for_decode.size()) {
1253     // Extra input (suffix from name sanitizing?), just return a full decode.
1254     return DirectDecodeString(desired_output);
1255   }
1256 
1257   // Add the end marker.
1258   return builder.Finish() + (char)'\0';
1259 }
1260 
1261 namespace {
1262 
1263 class Parser {
1264  public:
Parser(LineConsumer * line_consumer)1265   Parser(LineConsumer* line_consumer)
1266       : line_consumer_(line_consumer), line_(0) {}
1267 
1268   // Parses a check of input, returning success/failure.
1269   bool ParseChunk(StringPiece chunk);
1270 
1271   // Should be called to finish parsing (after all input has been provided via
1272   // ParseChunk()).  Returns success/failure.
1273   bool Finish();
1274 
last_line() const1275   int last_line() const { return line_; }
error_str() const1276   string error_str() const { return error_str_; }
1277 
1278  private:
1279   bool ParseLoop();
1280 
1281   LineConsumer* line_consumer_;
1282   int line_;
1283   string error_str_;
1284   StringPiece p_;
1285   string leftover_;
1286 };
1287 
ParseChunk(StringPiece chunk)1288 bool Parser::ParseChunk(StringPiece chunk) {
1289   if (!leftover_.empty()) {
1290     chunk.AppendToString(&leftover_);
1291     p_ = StringPiece(leftover_);
1292   } else {
1293     p_ = chunk;
1294   }
1295   bool result = ParseLoop();
1296   if (p_.empty()) {
1297     leftover_.clear();
1298   } else {
1299     leftover_ = p_.ToString();
1300   }
1301   return result;
1302 }
1303 
Finish()1304 bool Parser::Finish() {
1305   if (leftover_.empty()) {
1306     return true;
1307   }
1308   // Force a newline onto the end to finish parsing.
1309   leftover_ += "\n";
1310   p_ = StringPiece(leftover_);
1311   if (!ParseLoop()) {
1312     return false;
1313   }
1314   return p_.empty();  // Everything used?
1315 }
1316 
ParseLoop()1317 bool Parser::ParseLoop() {
1318   StringPiece line;
1319   while (ReadLine(&p_, &line)) {
1320     ++line_;
1321     RemoveComment(&line);
1322     StringPieceTrimWhitespace(&line);
1323     if (line.size() == 0) {
1324       continue;  // Blank line.
1325     }
1326     if (!line_consumer_->ConsumeLine(line, &error_str_)) {
1327       return false;
1328     }
1329   }
1330   return true;
1331 }
1332 
1333 }  // namespace
1334 
LineConsumer()1335 LineConsumer::LineConsumer() {}
1336 
~LineConsumer()1337 LineConsumer::~LineConsumer() {}
1338 
ParseSimpleFile(const string & path,LineConsumer * line_consumer,string * out_error)1339 bool ParseSimpleFile(
1340     const string& path, LineConsumer* line_consumer, string* out_error) {
1341   int fd;
1342   do {
1343     fd = open(path.c_str(), O_RDONLY);
1344   } while (fd < 0 && errno == EINTR);
1345   if (fd < 0) {
1346     *out_error =
1347         string("error: Unable to open \"") + path + "\", " + strerror(errno);
1348     return false;
1349   }
1350   io::FileInputStream file_stream(fd);
1351   file_stream.SetCloseOnDelete(true);
1352 
1353   Parser parser(line_consumer);
1354   const void* buf;
1355   int buf_len;
1356   while (file_stream.Next(&buf, &buf_len)) {
1357     if (buf_len == 0) {
1358       continue;
1359     }
1360 
1361     if (!parser.ParseChunk(StringPiece(static_cast<const char*>(buf), buf_len))) {
1362       *out_error =
1363           string("error: ") + path +
1364           " Line " + SimpleItoa(parser.last_line()) + ", " + parser.error_str();
1365       return false;
1366     }
1367   }
1368   return parser.Finish();
1369 }
1370 
1371 
1372 }  // namespace objectivec
1373 }  // namespace compiler
1374 }  // namespace protobuf
1375 }  // namespace google
1376