1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 #ifndef _MSC_VER
32 #include <unistd.h>
33 #endif
34 #include <climits>
35 #include <errno.h>
36 #include <fcntl.h>
37 #include <fstream>
38 #include <iostream>
39 #include <sstream>
40 #include <stdlib.h>
41 #include <unordered_set>
42 #include <vector>
43 
44 #include <google/protobuf/compiler/objectivec/objectivec_helpers.h>
45 #include <google/protobuf/compiler/objectivec/objectivec_nsobject_methods.h>
46 #include <google/protobuf/descriptor.pb.h>
47 #include <google/protobuf/io/coded_stream.h>
48 #include <google/protobuf/io/printer.h>
49 #include <google/protobuf/io/zero_copy_stream_impl.h>
50 #include <google/protobuf/io/io_win32.h>
51 #include <google/protobuf/stubs/port.h>
52 #include <google/protobuf/stubs/strutil.h>
53 
54 // NOTE: src/google/protobuf/compiler/plugin.cc makes use of cerr for some
55 // error cases, so it seems to be ok to use as a back door for errors.
56 
57 namespace google {
58 namespace protobuf {
59 namespace compiler {
60 namespace objectivec {
61 
62 // <io.h> is transitively included in this file. Import the functions explicitly
63 // in this port namespace to avoid ambiguous definition.
64 namespace posix {
65 #ifdef _WIN32
66 using ::google::protobuf::io::win32::open;
67 #else
68 using ::open;
69 #endif
70 }  // namespace port
71 
Options()72 Options::Options() {
73   // Default is the value of the env for the package prefixes.
74   const char* file_path = getenv("GPB_OBJC_EXPECTED_PACKAGE_PREFIXES");
75   if (file_path) {
76     expected_prefixes_path = file_path;
77   }
78   const char* suppressions = getenv("GPB_OBJC_EXPECTED_PACKAGE_PREFIXES_SUPPRESSIONS");
79   if (suppressions) {
80     SplitStringUsing(suppressions, ";", &expected_prefixes_suppressions);
81   }
82 }
83 
84 namespace {
85 
MakeWordsMap(const char * const words[],size_t num_words)86 std::unordered_set<string> MakeWordsMap(const char* const words[], size_t num_words) {
87   std::unordered_set<string> result;
88   for (int i = 0; i < num_words; i++) {
89     result.insert(words[i]);
90   }
91   return result;
92 }
93 
94 const char* const kUpperSegmentsList[] = {"url", "http", "https"};
95 
96 std::unordered_set<string> kUpperSegments =
97     MakeWordsMap(kUpperSegmentsList, GOOGLE_ARRAYSIZE(kUpperSegmentsList));
98 
ascii_isnewline(char c)99 bool ascii_isnewline(char c) {
100   return c == '\n' || c == '\r';
101 }
102 
103 // Internal helper for name handing.
104 // Do not expose this outside of helpers, stick to having functions for specific
105 // cases (ClassName(), FieldName()), so there is always consistent suffix rules.
UnderscoresToCamelCase(const string & input,bool first_capitalized)106 string UnderscoresToCamelCase(const string& input, bool first_capitalized) {
107   std::vector<string> values;
108   string current;
109 
110   bool last_char_was_number = false;
111   bool last_char_was_lower = false;
112   bool last_char_was_upper = false;
113   for (int i = 0; i < input.size(); i++) {
114     char c = input[i];
115     if (ascii_isdigit(c)) {
116       if (!last_char_was_number) {
117         values.push_back(current);
118         current = "";
119       }
120       current += c;
121       last_char_was_number = last_char_was_lower = last_char_was_upper = false;
122       last_char_was_number = true;
123     } else if (ascii_islower(c)) {
124       // lowercase letter can follow a lowercase or uppercase letter
125       if (!last_char_was_lower && !last_char_was_upper) {
126         values.push_back(current);
127         current = "";
128       }
129       current += c;  // already lower
130       last_char_was_number = last_char_was_lower = last_char_was_upper = false;
131       last_char_was_lower = true;
132     } else if (ascii_isupper(c)) {
133       if (!last_char_was_upper) {
134         values.push_back(current);
135         current = "";
136       }
137       current += ascii_tolower(c);
138       last_char_was_number = last_char_was_lower = last_char_was_upper = false;
139       last_char_was_upper = true;
140     } else {
141       last_char_was_number = last_char_was_lower = last_char_was_upper = false;
142     }
143   }
144   values.push_back(current);
145 
146   string result;
147   bool first_segment_forces_upper = false;
148   for (std::vector<string>::iterator i = values.begin(); i != values.end(); ++i) {
149     string value = *i;
150     bool all_upper = (kUpperSegments.count(value) > 0);
151     if (all_upper && (result.length() == 0)) {
152       first_segment_forces_upper = true;
153     }
154     for (int j = 0; j < value.length(); j++) {
155       if (j == 0 || all_upper) {
156         value[j] = ascii_toupper(value[j]);
157       } else {
158         // Nothing, already in lower.
159       }
160     }
161     result += value;
162   }
163   if ((result.length() != 0) &&
164       !first_capitalized &&
165       !first_segment_forces_upper) {
166     result[0] = ascii_tolower(result[0]);
167   }
168   return result;
169 }
170 
171 const char* const kReservedWordList[] = {
172   // Note NSObject Methods:
173   // These are brought in from objectivec_nsobject_methods.h that is generated
174   // using method_dump.sh. See kNSObjectMethods below.
175 
176   // Objective C "keywords" that aren't in C
177   // From
178   // http://stackoverflow.com/questions/1873630/reserved-keywords-in-objective-c
179   // with some others added on.
180   "id", "_cmd", "super", "in", "out", "inout", "bycopy", "byref", "oneway",
181   "self", "instancetype", "nullable", "nonnull", "nil", "Nil",
182   "YES", "NO", "weak",
183 
184   // C/C++ keywords (Incl C++ 0x11)
185   // From http://en.cppreference.com/w/cpp/keywords
186   "and", "and_eq", "alignas", "alignof", "asm", "auto", "bitand", "bitor",
187   "bool", "break", "case", "catch", "char", "char16_t", "char32_t", "class",
188   "compl", "const", "constexpr", "const_cast", "continue", "decltype",
189   "default", "delete", "double", "dynamic_cast", "else", "enum", "explicit",
190   "export", "extern ", "false", "float", "for", "friend", "goto", "if",
191   "inline", "int", "long", "mutable", "namespace", "new", "noexcept", "not",
192   "not_eq", "nullptr", "operator", "or", "or_eq", "private", "protected",
193   "public", "register", "reinterpret_cast", "return", "short", "signed",
194   "sizeof", "static", "static_assert", "static_cast", "struct", "switch",
195   "template", "this", "thread_local", "throw", "true", "try", "typedef",
196   "typeid", "typename", "union", "unsigned", "using", "virtual", "void",
197   "volatile", "wchar_t", "while", "xor", "xor_eq",
198 
199   // C99 keywords
200   // From
201   // http://publib.boulder.ibm.com/infocenter/lnxpcomp/v8v101/index.jsp?topic=%2Fcom.ibm.xlcpp8l.doc%2Flanguage%2Fref%2Fkeyw.htm
202   "restrict",
203 
204   // GCC/Clang extension
205   "typeof",
206 
207   // Not a keyword, but will break you
208   "NULL",
209 
210   // Objective-C Runtime typedefs
211   // From <obc/runtime.h>
212   "Category", "Ivar", "Method", "Protocol",
213 
214   // GPBMessage Methods
215   // Only need to add instance methods that may conflict with
216   // method declared in protos. The main cases are methods
217   // that take no arguments, or setFoo:/hasFoo: type methods.
218   "clear", "data", "delimitedData", "descriptor", "extensionRegistry",
219   "extensionsCurrentlySet", "initialized", "isInitialized", "serializedSize",
220   "sortedExtensionsInUse", "unknownFields",
221 
222   // MacTypes.h names
223   "Fixed", "Fract", "Size", "LogicalAddress", "PhysicalAddress", "ByteCount",
224   "ByteOffset", "Duration", "AbsoluteTime", "OptionBits", "ItemCount",
225   "PBVersion", "ScriptCode", "LangCode", "RegionCode", "OSType",
226   "ProcessSerialNumber", "Point", "Rect", "FixedPoint", "FixedRect", "Style",
227   "StyleParameter", "StyleField", "TimeScale", "TimeBase", "TimeRecord",
228 };
229 
230 // returns true is input starts with __ or _[A-Z] which are reserved identifiers
231 // in C/ C++. All calls should go through UnderscoresToCamelCase before getting here
232 // but this verifies and allows for future expansion if we decide to redefine what a
233 // reserved C identifier is (for example the GNU list
234 // https://www.gnu.org/software/libc/manual/html_node/Reserved-Names.html )
IsReservedCIdentifier(const string & input)235 bool IsReservedCIdentifier(const string& input) {
236   if (input.length() > 2) {
237     if (input.at(0) == '_') {
238       if (isupper(input.at(1)) || input.at(1) == '_') {
239         return true;
240       }
241     }
242   }
243   return false;
244 }
245 
SanitizeNameForObjC(const string & prefix,const string & input,const string & extension,string * out_suffix_added)246 string SanitizeNameForObjC(const string& prefix,
247                            const string& input,
248                            const string& extension,
249                            string* out_suffix_added) {
250   static const std::unordered_set<string> kReservedWords =
251       MakeWordsMap(kReservedWordList, GOOGLE_ARRAYSIZE(kReservedWordList));
252   static const std::unordered_set<string> kNSObjectMethods =
253       MakeWordsMap(kNSObjectMethodsList, GOOGLE_ARRAYSIZE(kNSObjectMethodsList));
254   string sanitized;
255   // We add the prefix in the cases where the string is missing a prefix.
256   // We define "missing a prefix" as where 'input':
257   // a) Doesn't start with the prefix or
258   // b) Isn't equivalent to the prefix or
259   // c) Has the prefix, but the letter after the prefix is lowercase
260   if (HasPrefixString(input, prefix)) {
261     if (input.length() == prefix.length() || !ascii_isupper(input[prefix.length()])) {
262       sanitized = prefix + input;
263     } else {
264       sanitized = input;
265     }
266   } else {
267     sanitized = prefix + input;
268   }
269   if (IsReservedCIdentifier(sanitized) ||
270       (kReservedWords.count(sanitized) > 0) ||
271       (kNSObjectMethods.count(sanitized) > 0)) {
272     if (out_suffix_added) *out_suffix_added = extension;
273     return sanitized + extension;
274   }
275   if (out_suffix_added) out_suffix_added->clear();
276   return sanitized;
277 }
278 
NameFromFieldDescriptor(const FieldDescriptor * field)279 string NameFromFieldDescriptor(const FieldDescriptor* field) {
280   if (field->type() == FieldDescriptor::TYPE_GROUP) {
281     return field->message_type()->name();
282   } else {
283     return field->name();
284   }
285 }
286 
PathSplit(const string & path,string * directory,string * basename)287 void PathSplit(const string& path, string* directory, string* basename) {
288   string::size_type last_slash = path.rfind('/');
289   if (last_slash == string::npos) {
290     if (directory) {
291       *directory = "";
292     }
293     if (basename) {
294       *basename = path;
295     }
296   } else {
297     if (directory) {
298       *directory = path.substr(0, last_slash);
299     }
300     if (basename) {
301       *basename = path.substr(last_slash + 1);
302     }
303   }
304 }
305 
IsSpecialName(const string & name,const string * special_names,size_t count)306 bool IsSpecialName(const string& name, const string* special_names,
307                    size_t count) {
308   for (size_t i = 0; i < count; ++i) {
309     size_t length = special_names[i].length();
310     if (name.compare(0, length, special_names[i]) == 0) {
311       if (name.length() > length) {
312         // If name is longer than the retained_name[i] that it matches
313         // the next character must be not lower case (newton vs newTon vs
314         // new_ton).
315         return !ascii_islower(name[length]);
316       } else {
317         return true;
318       }
319     }
320   }
321   return false;
322 }
323 
GetZeroEnumNameForFlagType(const FlagType flag_type)324 string GetZeroEnumNameForFlagType(const FlagType flag_type) {
325   switch(flag_type) {
326     case FLAGTYPE_DESCRIPTOR_INITIALIZATION:
327       return "GPBDescriptorInitializationFlag_None";
328     case FLAGTYPE_EXTENSION:
329       return "GPBExtensionNone";
330     case FLAGTYPE_FIELD:
331       return "GPBFieldNone";
332     default:
333       GOOGLE_LOG(FATAL) << "Can't get here.";
334       return "0";
335   }
336 }
337 
GetEnumNameForFlagType(const FlagType flag_type)338 string GetEnumNameForFlagType(const FlagType flag_type) {
339   switch(flag_type) {
340     case FLAGTYPE_DESCRIPTOR_INITIALIZATION:
341       return "GPBDescriptorInitializationFlags";
342     case FLAGTYPE_EXTENSION:
343       return "GPBExtensionOptions";
344     case FLAGTYPE_FIELD:
345       return "GPBFieldFlags";
346     default:
347       GOOGLE_LOG(FATAL) << "Can't get here.";
348       return string();
349   }
350 }
351 
352 }  // namespace
353 
354 // Escape C++ trigraphs by escaping question marks to \?
EscapeTrigraphs(const string & to_escape)355 string EscapeTrigraphs(const string& to_escape) {
356   return StringReplace(to_escape, "?", "\\?", true);
357 }
358 
StripProto(const string & filename)359 string StripProto(const string& filename) {
360   if (HasSuffixString(filename, ".protodevel")) {
361     return StripSuffixString(filename, ".protodevel");
362   } else {
363     return StripSuffixString(filename, ".proto");
364   }
365 }
366 
TrimWhitespace(StringPiece * input)367 void TrimWhitespace(StringPiece* input) {
368   while (!input->empty() && ascii_isspace(*input->data())) {
369     input->remove_prefix(1);
370   }
371   while (!input->empty() && ascii_isspace((*input)[input->length() - 1])) {
372     input->remove_suffix(1);
373   }
374 }
375 
376 
IsRetainedName(const string & name)377 bool IsRetainedName(const string& name) {
378   // List of prefixes from
379   // http://developer.apple.com/library/mac/#documentation/Cocoa/Conceptual/MemoryMgmt/Articles/mmRules.html
380   static const string retained_names[] = {"new", "alloc", "copy",
381                                           "mutableCopy"};
382   return IsSpecialName(name, retained_names,
383                        sizeof(retained_names) / sizeof(retained_names[0]));
384 }
385 
IsInitName(const string & name)386 bool IsInitName(const string& name) {
387   static const string init_names[] = {"init"};
388   return IsSpecialName(name, init_names,
389                        sizeof(init_names) / sizeof(init_names[0]));
390 }
391 
BaseFileName(const FileDescriptor * file)392 string BaseFileName(const FileDescriptor* file) {
393   string basename;
394   PathSplit(file->name(), NULL, &basename);
395   return basename;
396 }
397 
FileClassPrefix(const FileDescriptor * file)398 string FileClassPrefix(const FileDescriptor* file) {
399   // Default is empty string, no need to check has_objc_class_prefix.
400   string result = file->options().objc_class_prefix();
401   return result;
402 }
403 
FilePath(const FileDescriptor * file)404 string FilePath(const FileDescriptor* file) {
405   string output;
406   string basename;
407   string directory;
408   PathSplit(file->name(), &directory, &basename);
409   if (directory.length() > 0) {
410     output = directory + "/";
411   }
412   basename = StripProto(basename);
413 
414   // CamelCase to be more ObjC friendly.
415   basename = UnderscoresToCamelCase(basename, true);
416 
417   output += basename;
418   return output;
419 }
420 
FilePathBasename(const FileDescriptor * file)421 string FilePathBasename(const FileDescriptor* file) {
422   string output;
423   string basename;
424   string directory;
425   PathSplit(file->name(), &directory, &basename);
426   basename = StripProto(basename);
427 
428   // CamelCase to be more ObjC friendly.
429   output = UnderscoresToCamelCase(basename, true);
430 
431   return output;
432 }
433 
FileClassName(const FileDescriptor * file)434 string FileClassName(const FileDescriptor* file) {
435   const string prefix = FileClassPrefix(file);
436   const string name = UnderscoresToCamelCase(StripProto(BaseFileName(file)), true) + "Root";
437   // There aren't really any reserved words that end in "Root", but playing
438   // it safe and checking.
439   return SanitizeNameForObjC(prefix, name, "_RootClass", NULL);
440 }
441 
ClassNameWorker(const Descriptor * descriptor)442 string ClassNameWorker(const Descriptor* descriptor) {
443   string name;
444   if (descriptor->containing_type() != NULL) {
445     name = ClassNameWorker(descriptor->containing_type());
446     name += "_";
447   }
448   return name + descriptor->name();
449 }
450 
ClassNameWorker(const EnumDescriptor * descriptor)451 string ClassNameWorker(const EnumDescriptor* descriptor) {
452   string name;
453   if (descriptor->containing_type() != NULL) {
454     name = ClassNameWorker(descriptor->containing_type());
455     name += "_";
456   }
457   return name + descriptor->name();
458 }
459 
ClassName(const Descriptor * descriptor)460 string ClassName(const Descriptor* descriptor) {
461   return ClassName(descriptor, NULL);
462 }
463 
ClassName(const Descriptor * descriptor,string * out_suffix_added)464 string ClassName(const Descriptor* descriptor, string* out_suffix_added) {
465   // 1. Message names are used as is (style calls for CamelCase, trust it).
466   // 2. Check for reserved word at the very end and then suffix things.
467   const string prefix = FileClassPrefix(descriptor->file());
468   const string name = ClassNameWorker(descriptor);
469   return SanitizeNameForObjC(prefix, name, "_Class", out_suffix_added);
470 }
471 
EnumName(const EnumDescriptor * descriptor)472 string EnumName(const EnumDescriptor* descriptor) {
473   // 1. Enum names are used as is (style calls for CamelCase, trust it).
474   // 2. Check for reserved word at the every end and then suffix things.
475   //      message Fixed {
476   //        message Size {...}
477   //        enum Mumble {...}
478   //      ...
479   //      }
480   //    yields Fixed_Class, Fixed_Size.
481   const string prefix = FileClassPrefix(descriptor->file());
482   const string name = ClassNameWorker(descriptor);
483   return SanitizeNameForObjC(prefix, name, "_Enum", NULL);
484 }
485 
EnumValueName(const EnumValueDescriptor * descriptor)486 string EnumValueName(const EnumValueDescriptor* descriptor) {
487   // Because of the Switch enum compatibility, the name on the enum has to have
488   // the suffix handing, so it slightly diverges from how nested classes work.
489   //   enum Fixed {
490   //     FOO = 1
491   //   }
492   // yields Fixed_Enum and Fixed_Enum_Foo (not Fixed_Foo).
493   const string class_name = EnumName(descriptor->type());
494   const string value_str = UnderscoresToCamelCase(descriptor->name(), true);
495   const string name = class_name + "_" + value_str;
496   // There aren't really any reserved words with an underscore and a leading
497   // capital letter, but playing it safe and checking.
498   return SanitizeNameForObjC("", name, "_Value", NULL);
499 }
500 
EnumValueShortName(const EnumValueDescriptor * descriptor)501 string EnumValueShortName(const EnumValueDescriptor* descriptor) {
502   // Enum value names (EnumValueName above) are the enum name turned into
503   // a class name and then the value name is CamelCased and concatenated; the
504   // whole thing then gets sanitized for reserved words.
505   // The "short name" is intended to be the final leaf, the value name; but
506   // you can't simply send that off to sanitize as that could result in it
507   // getting modified when the full name didn't.  For example enum
508   // "StorageModes" has a value "retain".  So the full name is
509   // "StorageModes_Retain", but if we sanitize "retain" it would become
510   // "RetainValue".
511   // So the right way to get the short name is to take the full enum name
512   // and then strip off the enum name (leaving the value name and anything
513   // done by sanitize).
514   const string class_name = EnumName(descriptor->type());
515   const string long_name_prefix = class_name + "_";
516   const string long_name = EnumValueName(descriptor);
517   return StripPrefixString(long_name, long_name_prefix);
518 }
519 
UnCamelCaseEnumShortName(const string & name)520 string UnCamelCaseEnumShortName(const string& name) {
521   string result;
522   for (int i = 0; i < name.size(); i++) {
523     char c = name[i];
524     if (i > 0 && ascii_isupper(c)) {
525       result += '_';
526     }
527     result += ascii_toupper(c);
528   }
529   return result;
530 }
531 
ExtensionMethodName(const FieldDescriptor * descriptor)532 string ExtensionMethodName(const FieldDescriptor* descriptor) {
533   const string name = NameFromFieldDescriptor(descriptor);
534   const string result = UnderscoresToCamelCase(name, false);
535   return SanitizeNameForObjC("", result, "_Extension", NULL);
536 }
537 
FieldName(const FieldDescriptor * field)538 string FieldName(const FieldDescriptor* field) {
539   const string name = NameFromFieldDescriptor(field);
540   string result = UnderscoresToCamelCase(name, false);
541   if (field->is_repeated() && !field->is_map()) {
542     // Add "Array" before do check for reserved worlds.
543     result += "Array";
544   } else {
545     // If it wasn't repeated, but ends in "Array", force on the _p suffix.
546     if (HasSuffixString(result, "Array")) {
547       result += "_p";
548     }
549   }
550   return SanitizeNameForObjC("", result, "_p", NULL);
551 }
552 
FieldNameCapitalized(const FieldDescriptor * field)553 string FieldNameCapitalized(const FieldDescriptor* field) {
554   // Want the same suffix handling, so upcase the first letter of the other
555   // name.
556   string result = FieldName(field);
557   if (result.length() > 0) {
558     result[0] = ascii_toupper(result[0]);
559   }
560   return result;
561 }
562 
OneofEnumName(const OneofDescriptor * descriptor)563 string OneofEnumName(const OneofDescriptor* descriptor) {
564   const Descriptor* fieldDescriptor = descriptor->containing_type();
565   string name = ClassName(fieldDescriptor);
566   name += "_" + UnderscoresToCamelCase(descriptor->name(), true) + "_OneOfCase";
567   // No sanitize needed because the OS never has names that end in _OneOfCase.
568   return name;
569 }
570 
OneofName(const OneofDescriptor * descriptor)571 string OneofName(const OneofDescriptor* descriptor) {
572   string name = UnderscoresToCamelCase(descriptor->name(), false);
573   // No sanitize needed because it gets OneOfCase added and that shouldn't
574   // ever conflict.
575   return name;
576 }
577 
OneofNameCapitalized(const OneofDescriptor * descriptor)578 string OneofNameCapitalized(const OneofDescriptor* descriptor) {
579   // Use the common handling and then up-case the first letter.
580   string result = OneofName(descriptor);
581   if (result.length() > 0) {
582     result[0] = ascii_toupper(result[0]);
583   }
584   return result;
585 }
586 
UnCamelCaseFieldName(const string & name,const FieldDescriptor * field)587 string UnCamelCaseFieldName(const string& name, const FieldDescriptor* field) {
588   string worker(name);
589   if (HasSuffixString(worker, "_p")) {
590     worker = StripSuffixString(worker, "_p");
591   }
592   if (field->is_repeated() && HasSuffixString(worker, "Array")) {
593     worker = StripSuffixString(worker, "Array");
594   }
595   if (field->type() == FieldDescriptor::TYPE_GROUP) {
596     if (worker.length() > 0) {
597       if (ascii_islower(worker[0])) {
598         worker[0] = ascii_toupper(worker[0]);
599       }
600     }
601     return worker;
602   } else {
603     string result;
604     for (int i = 0; i < worker.size(); i++) {
605       char c = worker[i];
606       if (ascii_isupper(c)) {
607         if (i > 0) {
608           result += '_';
609         }
610         result += ascii_tolower(c);
611       } else {
612         result += c;
613       }
614     }
615     return result;
616   }
617 }
618 
GetCapitalizedType(const FieldDescriptor * field)619 string GetCapitalizedType(const FieldDescriptor* field) {
620   switch (field->type()) {
621     case FieldDescriptor::TYPE_INT32:
622       return "Int32";
623     case FieldDescriptor::TYPE_UINT32:
624       return "UInt32";
625     case FieldDescriptor::TYPE_SINT32:
626       return "SInt32";
627     case FieldDescriptor::TYPE_FIXED32:
628       return "Fixed32";
629     case FieldDescriptor::TYPE_SFIXED32:
630       return "SFixed32";
631     case FieldDescriptor::TYPE_INT64:
632       return "Int64";
633     case FieldDescriptor::TYPE_UINT64:
634       return "UInt64";
635     case FieldDescriptor::TYPE_SINT64:
636       return "SInt64";
637     case FieldDescriptor::TYPE_FIXED64:
638       return "Fixed64";
639     case FieldDescriptor::TYPE_SFIXED64:
640       return "SFixed64";
641     case FieldDescriptor::TYPE_FLOAT:
642       return "Float";
643     case FieldDescriptor::TYPE_DOUBLE:
644       return "Double";
645     case FieldDescriptor::TYPE_BOOL:
646       return "Bool";
647     case FieldDescriptor::TYPE_STRING:
648       return "String";
649     case FieldDescriptor::TYPE_BYTES:
650       return "Bytes";
651     case FieldDescriptor::TYPE_ENUM:
652       return "Enum";
653     case FieldDescriptor::TYPE_GROUP:
654       return "Group";
655     case FieldDescriptor::TYPE_MESSAGE:
656       return "Message";
657   }
658 
659   // Some compilers report reaching end of function even though all cases of
660   // the enum are handed in the switch.
661   GOOGLE_LOG(FATAL) << "Can't get here.";
662   return string();
663 }
664 
GetObjectiveCType(FieldDescriptor::Type field_type)665 ObjectiveCType GetObjectiveCType(FieldDescriptor::Type field_type) {
666   switch (field_type) {
667     case FieldDescriptor::TYPE_INT32:
668     case FieldDescriptor::TYPE_SINT32:
669     case FieldDescriptor::TYPE_SFIXED32:
670       return OBJECTIVECTYPE_INT32;
671 
672     case FieldDescriptor::TYPE_UINT32:
673     case FieldDescriptor::TYPE_FIXED32:
674       return OBJECTIVECTYPE_UINT32;
675 
676     case FieldDescriptor::TYPE_INT64:
677     case FieldDescriptor::TYPE_SINT64:
678     case FieldDescriptor::TYPE_SFIXED64:
679       return OBJECTIVECTYPE_INT64;
680 
681     case FieldDescriptor::TYPE_UINT64:
682     case FieldDescriptor::TYPE_FIXED64:
683       return OBJECTIVECTYPE_UINT64;
684 
685     case FieldDescriptor::TYPE_FLOAT:
686       return OBJECTIVECTYPE_FLOAT;
687 
688     case FieldDescriptor::TYPE_DOUBLE:
689       return OBJECTIVECTYPE_DOUBLE;
690 
691     case FieldDescriptor::TYPE_BOOL:
692       return OBJECTIVECTYPE_BOOLEAN;
693 
694     case FieldDescriptor::TYPE_STRING:
695       return OBJECTIVECTYPE_STRING;
696 
697     case FieldDescriptor::TYPE_BYTES:
698       return OBJECTIVECTYPE_DATA;
699 
700     case FieldDescriptor::TYPE_ENUM:
701       return OBJECTIVECTYPE_ENUM;
702 
703     case FieldDescriptor::TYPE_GROUP:
704     case FieldDescriptor::TYPE_MESSAGE:
705       return OBJECTIVECTYPE_MESSAGE;
706   }
707 
708   // Some compilers report reaching end of function even though all cases of
709   // the enum are handed in the switch.
710   GOOGLE_LOG(FATAL) << "Can't get here.";
711   return OBJECTIVECTYPE_INT32;
712 }
713 
IsPrimitiveType(const FieldDescriptor * field)714 bool IsPrimitiveType(const FieldDescriptor* field) {
715   ObjectiveCType type = GetObjectiveCType(field);
716   switch (type) {
717     case OBJECTIVECTYPE_INT32:
718     case OBJECTIVECTYPE_UINT32:
719     case OBJECTIVECTYPE_INT64:
720     case OBJECTIVECTYPE_UINT64:
721     case OBJECTIVECTYPE_FLOAT:
722     case OBJECTIVECTYPE_DOUBLE:
723     case OBJECTIVECTYPE_BOOLEAN:
724     case OBJECTIVECTYPE_ENUM:
725       return true;
726       break;
727     default:
728       return false;
729   }
730 }
731 
IsReferenceType(const FieldDescriptor * field)732 bool IsReferenceType(const FieldDescriptor* field) {
733   return !IsPrimitiveType(field);
734 }
735 
HandleExtremeFloatingPoint(string val,bool add_float_suffix)736 static string HandleExtremeFloatingPoint(string val, bool add_float_suffix) {
737   if (val == "nan") {
738     return "NAN";
739   } else if (val == "inf") {
740     return "INFINITY";
741   } else if (val == "-inf") {
742     return "-INFINITY";
743   } else {
744     // float strings with ., e or E need to have f appended
745     if (add_float_suffix &&
746         (val.find(".") != string::npos || val.find("e") != string::npos ||
747          val.find("E") != string::npos)) {
748       val += "f";
749     }
750     return val;
751   }
752 }
753 
GPBGenericValueFieldName(const FieldDescriptor * field)754 string GPBGenericValueFieldName(const FieldDescriptor* field) {
755   // Returns the field within the GPBGenericValue union to use for the given
756   // field.
757   if (field->is_repeated()) {
758       return "valueMessage";
759   }
760   switch (field->cpp_type()) {
761     case FieldDescriptor::CPPTYPE_INT32:
762       return "valueInt32";
763     case FieldDescriptor::CPPTYPE_UINT32:
764       return "valueUInt32";
765     case FieldDescriptor::CPPTYPE_INT64:
766       return "valueInt64";
767     case FieldDescriptor::CPPTYPE_UINT64:
768       return "valueUInt64";
769     case FieldDescriptor::CPPTYPE_FLOAT:
770       return "valueFloat";
771     case FieldDescriptor::CPPTYPE_DOUBLE:
772       return "valueDouble";
773     case FieldDescriptor::CPPTYPE_BOOL:
774       return "valueBool";
775     case FieldDescriptor::CPPTYPE_STRING:
776       if (field->type() == FieldDescriptor::TYPE_BYTES) {
777         return "valueData";
778       } else {
779         return "valueString";
780       }
781     case FieldDescriptor::CPPTYPE_ENUM:
782       return "valueEnum";
783     case FieldDescriptor::CPPTYPE_MESSAGE:
784       return "valueMessage";
785   }
786 
787   // Some compilers report reaching end of function even though all cases of
788   // the enum are handed in the switch.
789   GOOGLE_LOG(FATAL) << "Can't get here.";
790   return string();
791 }
792 
793 
DefaultValue(const FieldDescriptor * field)794 string DefaultValue(const FieldDescriptor* field) {
795   // Repeated fields don't have defaults.
796   if (field->is_repeated()) {
797     return "nil";
798   }
799 
800   // Switch on cpp_type since we need to know which default_value_* method
801   // of FieldDescriptor to call.
802   switch (field->cpp_type()) {
803     case FieldDescriptor::CPPTYPE_INT32:
804       // gcc and llvm reject the decimal form of kint32min and kint64min.
805       if (field->default_value_int32() == INT_MIN) {
806         return "-0x80000000";
807       }
808       return StrCat(field->default_value_int32());
809     case FieldDescriptor::CPPTYPE_UINT32:
810       return StrCat(field->default_value_uint32()) + "U";
811     case FieldDescriptor::CPPTYPE_INT64:
812       // gcc and llvm reject the decimal form of kint32min and kint64min.
813       if (field->default_value_int64() == LLONG_MIN) {
814         return "-0x8000000000000000LL";
815       }
816       return StrCat(field->default_value_int64()) + "LL";
817     case FieldDescriptor::CPPTYPE_UINT64:
818       return StrCat(field->default_value_uint64()) + "ULL";
819     case FieldDescriptor::CPPTYPE_DOUBLE:
820       return HandleExtremeFloatingPoint(
821           SimpleDtoa(field->default_value_double()), false);
822     case FieldDescriptor::CPPTYPE_FLOAT:
823       return HandleExtremeFloatingPoint(
824           SimpleFtoa(field->default_value_float()), true);
825     case FieldDescriptor::CPPTYPE_BOOL:
826       return field->default_value_bool() ? "YES" : "NO";
827     case FieldDescriptor::CPPTYPE_STRING: {
828       const bool has_default_value = field->has_default_value();
829       const string& default_string = field->default_value_string();
830       if (!has_default_value || default_string.length() == 0) {
831         // If the field is defined as being the empty string,
832         // then we will just assign to nil, as the empty string is the
833         // default for both strings and data.
834         return "nil";
835       }
836       if (field->type() == FieldDescriptor::TYPE_BYTES) {
837         // We want constant fields in our data structures so we can
838         // declare them as static. To achieve this we cheat and stuff
839         // a escaped c string (prefixed with a length) into the data
840         // field, and cast it to an (NSData*) so it will compile.
841         // The runtime library knows how to handle it.
842 
843         // Must convert to a standard byte order for packing length into
844         // a cstring.
845         uint32 length = ghtonl(default_string.length());
846         string bytes((const char*)&length, sizeof(length));
847         bytes.append(default_string);
848         return "(NSData*)\"" + EscapeTrigraphs(CEscape(bytes)) + "\"";
849       } else {
850         return "@\"" + EscapeTrigraphs(CEscape(default_string)) + "\"";
851       }
852     }
853     case FieldDescriptor::CPPTYPE_ENUM:
854       return EnumValueName(field->default_value_enum());
855     case FieldDescriptor::CPPTYPE_MESSAGE:
856       return "nil";
857   }
858 
859   // Some compilers report reaching end of function even though all cases of
860   // the enum are handed in the switch.
861   GOOGLE_LOG(FATAL) << "Can't get here.";
862   return string();
863 }
864 
HasNonZeroDefaultValue(const FieldDescriptor * field)865 bool HasNonZeroDefaultValue(const FieldDescriptor* field) {
866   // Repeated fields don't have defaults.
867   if (field->is_repeated()) {
868     return false;
869   }
870 
871   // As much as checking field->has_default_value() seems useful, it isn't
872   // because of enums. proto2 syntax allows the first item in an enum (the
873   // default) to be non zero. So checking field->has_default_value() would
874   // result in missing this non zero default.  See MessageWithOneBasedEnum in
875   // objectivec/Tests/unittest_objc.proto for a test Message to confirm this.
876 
877   // Some proto file set the default to the zero value, so make sure the value
878   // isn't the zero case.
879   switch (field->cpp_type()) {
880     case FieldDescriptor::CPPTYPE_INT32:
881       return field->default_value_int32() != 0;
882     case FieldDescriptor::CPPTYPE_UINT32:
883       return field->default_value_uint32() != 0U;
884     case FieldDescriptor::CPPTYPE_INT64:
885       return field->default_value_int64() != 0LL;
886     case FieldDescriptor::CPPTYPE_UINT64:
887       return field->default_value_uint64() != 0ULL;
888     case FieldDescriptor::CPPTYPE_DOUBLE:
889       return field->default_value_double() != 0.0;
890     case FieldDescriptor::CPPTYPE_FLOAT:
891       return field->default_value_float() != 0.0f;
892     case FieldDescriptor::CPPTYPE_BOOL:
893       return field->default_value_bool();
894     case FieldDescriptor::CPPTYPE_STRING: {
895       const string& default_string = field->default_value_string();
896       return default_string.length() != 0;
897     }
898     case FieldDescriptor::CPPTYPE_ENUM:
899       return field->default_value_enum()->number() != 0;
900     case FieldDescriptor::CPPTYPE_MESSAGE:
901       return false;
902   }
903 
904   // Some compilers report reaching end of function even though all cases of
905   // the enum are handed in the switch.
906   GOOGLE_LOG(FATAL) << "Can't get here.";
907   return false;
908 }
909 
BuildFlagsString(const FlagType flag_type,const std::vector<string> & strings)910 string BuildFlagsString(const FlagType flag_type,
911                         const std::vector<string>& strings) {
912   if (strings.size() == 0) {
913     return GetZeroEnumNameForFlagType(flag_type);
914   } else if (strings.size() == 1) {
915     return strings[0];
916   }
917   string string("(" + GetEnumNameForFlagType(flag_type) + ")(");
918   for (size_t i = 0; i != strings.size(); ++i) {
919     if (i > 0) {
920       string.append(" | ");
921     }
922     string.append(strings[i]);
923   }
924   string.append(")");
925   return string;
926 }
927 
BuildCommentsString(const SourceLocation & location,bool prefer_single_line)928 string BuildCommentsString(const SourceLocation& location,
929                            bool prefer_single_line) {
930   const string& comments = location.leading_comments.empty()
931                                ? location.trailing_comments
932                                : location.leading_comments;
933   std::vector<string> lines;
934   SplitStringAllowEmpty(comments, "\n", &lines);
935   while (!lines.empty() && lines.back().empty()) {
936     lines.pop_back();
937   }
938   // If there are no comments, just return an empty string.
939   if (lines.size() == 0) {
940     return "";
941   }
942 
943   string prefix;
944   string suffix;
945   string final_comments;
946   string epilogue;
947 
948   bool add_leading_space = false;
949 
950   if (prefer_single_line && lines.size() == 1) {
951     prefix = "/** ";
952     suffix = " */\n";
953   } else {
954     prefix = "* ";
955     suffix = "\n";
956     final_comments += "/**\n";
957     epilogue = " **/\n";
958     add_leading_space = true;
959   }
960 
961   for (int i = 0; i < lines.size(); i++) {
962     string line = StripPrefixString(lines[i], " ");
963     // HeaderDoc and appledoc use '\' and '@' for markers; escape them.
964     line = StringReplace(line, "\\", "\\\\", true);
965     line = StringReplace(line, "@", "\\@", true);
966     // Decouple / from * to not have inline comments inside comments.
967     line = StringReplace(line, "/*", "/\\*", true);
968     line = StringReplace(line, "*/", "*\\/", true);
969     line = prefix + line;
970     StripWhitespace(&line);
971     // If not a one line, need to add the first space before *, as
972     // StripWhitespace would have removed it.
973     line = (add_leading_space ? " " : "") + line;
974     final_comments += line + suffix;
975   }
976   final_comments += epilogue;
977   return final_comments;
978 }
979 
980 // Making these a generator option for folks that don't use CocoaPods, but do
981 // want to put the library in a framework is an interesting question. The
982 // problem is it means changing sources shipped with the library to actually
983 // use a different value; so it isn't as simple as a option.
984 const char* const ProtobufLibraryFrameworkName = "Protobuf";
985 
ProtobufFrameworkImportSymbol(const string & framework_name)986 string ProtobufFrameworkImportSymbol(const string& framework_name) {
987   // GPB_USE_[framework_name]_FRAMEWORK_IMPORTS
988   string result = string("GPB_USE_");
989   result += ToUpper(framework_name);
990   result += "_FRAMEWORK_IMPORTS";
991   return result;
992 }
993 
IsProtobufLibraryBundledProtoFile(const FileDescriptor * file)994 bool IsProtobufLibraryBundledProtoFile(const FileDescriptor* file) {
995   // We don't check the name prefix or proto package because some files
996   // (descriptor.proto), aren't shipped generated by the library, so this
997   // seems to be the safest way to only catch the ones shipped.
998   const string name = file->name();
999   if (name == "google/protobuf/any.proto" ||
1000       name == "google/protobuf/api.proto" ||
1001       name == "google/protobuf/duration.proto" ||
1002       name == "google/protobuf/empty.proto" ||
1003       name == "google/protobuf/field_mask.proto" ||
1004       name == "google/protobuf/source_context.proto" ||
1005       name == "google/protobuf/struct.proto" ||
1006       name == "google/protobuf/timestamp.proto" ||
1007       name == "google/protobuf/type.proto" ||
1008       name == "google/protobuf/wrappers.proto") {
1009     return true;
1010   }
1011   return false;
1012 }
1013 
ReadLine(StringPiece * input,StringPiece * line)1014 bool ReadLine(StringPiece* input, StringPiece* line) {
1015   for (int len = 0; len < input->size(); ++len) {
1016     if (ascii_isnewline((*input)[len])) {
1017       *line = StringPiece(input->data(), len);
1018       ++len;  // advance over the newline
1019       *input = StringPiece(input->data() + len, input->size() - len);
1020       return true;
1021     }
1022   }
1023   return false;  // Ran out of input with no newline.
1024 }
1025 
RemoveComment(StringPiece * input)1026 void RemoveComment(StringPiece* input) {
1027   int offset = input->find('#');
1028   if (offset != StringPiece::npos) {
1029     input->remove_suffix(input->length() - offset);
1030   }
1031 }
1032 
1033 namespace {
1034 
1035 class ExpectedPrefixesCollector : public LineConsumer {
1036  public:
ExpectedPrefixesCollector(std::map<string,string> * inout_package_to_prefix_map)1037   ExpectedPrefixesCollector(std::map<string, string>* inout_package_to_prefix_map)
1038       : prefix_map_(inout_package_to_prefix_map) {}
1039 
1040   virtual bool ConsumeLine(const StringPiece& line, string* out_error);
1041 
1042  private:
1043   std::map<string, string>* prefix_map_;
1044 };
1045 
ConsumeLine(const StringPiece & line,string * out_error)1046 bool ExpectedPrefixesCollector::ConsumeLine(
1047     const StringPiece& line, string* out_error) {
1048   int offset = line.find('=');
1049   if (offset == StringPiece::npos) {
1050     *out_error = string("Expected prefixes file line without equal sign: '") +
1051                  string(line) + "'.";
1052     return false;
1053   }
1054   StringPiece package = line.substr(0, offset);
1055   StringPiece prefix = line.substr(offset + 1);
1056   TrimWhitespace(&package);
1057   TrimWhitespace(&prefix);
1058   // Don't really worry about error checking the package/prefix for
1059   // being valid.  Assume the file is validated when it is created/edited.
1060   (*prefix_map_)[string(package)] = string(prefix);
1061   return true;
1062 }
1063 
LoadExpectedPackagePrefixes(const Options & generation_options,std::map<string,string> * prefix_map,string * out_error)1064 bool LoadExpectedPackagePrefixes(const Options &generation_options,
1065                                  std::map<string, string>* prefix_map,
1066                                  string* out_error) {
1067   if (generation_options.expected_prefixes_path.empty()) {
1068     return true;
1069   }
1070 
1071   ExpectedPrefixesCollector collector(prefix_map);
1072   return ParseSimpleFile(
1073       generation_options.expected_prefixes_path, &collector, out_error);
1074 }
1075 
ValidateObjCClassPrefix(const FileDescriptor * file,const string & expected_prefixes_path,const std::map<string,string> & expected_package_prefixes,string * out_error)1076 bool ValidateObjCClassPrefix(
1077     const FileDescriptor* file,
1078     const string& expected_prefixes_path,
1079     const std::map<string, string>& expected_package_prefixes,
1080     string* out_error) {
1081   const string prefix = file->options().objc_class_prefix();
1082   const string package = file->package();
1083 
1084   // NOTE: src/google/protobuf/compiler/plugin.cc makes use of cerr for some
1085   // error cases, so it seems to be ok to use as a back door for warnings.
1086 
1087   // Check: Error - See if there was an expected prefix for the package and
1088   // report if it doesn't match (wrong or missing).
1089   std::map<string, string>::const_iterator package_match =
1090       expected_package_prefixes.find(package);
1091   if (package_match != expected_package_prefixes.end()) {
1092     // There was an entry, and...
1093     if (package_match->second == prefix) {
1094       // ...it matches.  All good, out of here!
1095       return true;
1096     } else {
1097       // ...it didn't match!
1098       *out_error = "error: Expected 'option objc_class_prefix = \"" +
1099                    package_match->second + "\";' for package '" + package +
1100                    "' in '" + file->name() + "'";
1101       if (prefix.length()) {
1102         *out_error += "; but found '" + prefix + "' instead";
1103       }
1104       *out_error += ".";
1105       return false;
1106     }
1107   }
1108 
1109   // If there was no prefix option, we're done at this point.
1110   if (prefix.empty()) {
1111     // No prefix, nothing left to check.
1112     return true;
1113   }
1114 
1115   // Check: Warning - Make sure the prefix is is a reasonable value according
1116   // to Apple's rules (the checks above implicitly whitelist anything that
1117   // doesn't meet these rules).
1118   if (!ascii_isupper(prefix[0])) {
1119     std::cerr << std::endl
1120          << "protoc:0: warning: Invalid 'option objc_class_prefix = \""
1121          << prefix << "\";' in '" << file->name() << "';"
1122          << " it should start with a capital letter." << std::endl;
1123     std::cerr.flush();
1124   }
1125   if (prefix.length() < 3) {
1126     // Apple reserves 2 character prefixes for themselves. They do use some
1127     // 3 character prefixes, but they haven't updated the rules/docs.
1128     std::cerr << std::endl
1129          << "protoc:0: warning: Invalid 'option objc_class_prefix = \""
1130          << prefix << "\";' in '" << file->name() << "';"
1131          << " Apple recommends they should be at least 3 characters long."
1132          << std::endl;
1133     std::cerr.flush();
1134   }
1135 
1136   // Look for any other package that uses the same prefix.
1137   string other_package_for_prefix;
1138   for (std::map<string, string>::const_iterator i = expected_package_prefixes.begin();
1139        i != expected_package_prefixes.end(); ++i) {
1140     if (i->second == prefix) {
1141       other_package_for_prefix = i->first;
1142       break;
1143     }
1144   }
1145 
1146   // Check: Warning - If the file does not have a package, check whether
1147   // the prefix declared is being used by another package or not.
1148   if (package.empty()) {
1149     // The file does not have a package and ...
1150     if (other_package_for_prefix.empty()) {
1151       // ... no other package has declared that prefix.
1152       std::cerr << std::endl
1153            << "protoc:0: warning: File '" << file->name() << "' has no "
1154            << "package. Consider adding a new package to the proto and adding '"
1155            << "new.package = " << prefix << "' to the expected prefixes file ("
1156            << expected_prefixes_path << ")." << std::endl;
1157       std::cerr.flush();
1158     } else {
1159       // ... another package has declared the same prefix.
1160       std::cerr << std::endl
1161            << "protoc:0: warning: File '" << file->name() << "' has no package "
1162            << "and package '" << other_package_for_prefix << "' already uses '"
1163            << prefix << "' as its prefix. Consider either adding a new package "
1164            << "to the proto, or reusing one of the packages already using this "
1165            << "prefix in the expected prefixes file ("
1166            << expected_prefixes_path << ")." << std::endl;
1167       std::cerr.flush();
1168     }
1169     return true;
1170   }
1171 
1172   // Check: Error - Make sure the prefix wasn't expected for a different
1173   // package (overlap is allowed, but it has to be listed as an expected
1174   // overlap).
1175   if (!other_package_for_prefix.empty()) {
1176     *out_error =
1177         "error: Found 'option objc_class_prefix = \"" + prefix +
1178         "\";' in '" + file->name() +
1179         "'; that prefix is already used for 'package " +
1180         other_package_for_prefix + ";'. It can only be reused by listing " +
1181         "it in the expected file (" +
1182         expected_prefixes_path + ").";
1183     return false;  // Only report first usage of the prefix.
1184   }
1185 
1186   // Check: Warning - If the given package/prefix pair wasn't expected, issue a
1187   // warning issue a warning suggesting it gets added to the file.
1188   if (!expected_package_prefixes.empty()) {
1189     std::cerr << std::endl
1190          << "protoc:0: warning: Found unexpected 'option objc_class_prefix = \""
1191          << prefix << "\";' in '" << file->name() << "';"
1192          << " consider adding it to the expected prefixes file ("
1193          << expected_prefixes_path << ")." << std::endl;
1194     std::cerr.flush();
1195   }
1196 
1197   return true;
1198 }
1199 
1200 }  // namespace
1201 
ValidateObjCClassPrefixes(const std::vector<const FileDescriptor * > & files,const Options & generation_options,string * out_error)1202 bool ValidateObjCClassPrefixes(const std::vector<const FileDescriptor*>& files,
1203                                const Options& generation_options,
1204                                string* out_error) {
1205   // Load the expected package prefixes, if available, to validate against.
1206   std::map<string, string> expected_package_prefixes;
1207   if (!LoadExpectedPackagePrefixes(generation_options,
1208                                    &expected_package_prefixes,
1209                                    out_error)) {
1210     return false;
1211   }
1212 
1213   for (int i = 0; i < files.size(); i++) {
1214     bool should_skip =
1215       (std::find(generation_options.expected_prefixes_suppressions.begin(),
1216                  generation_options.expected_prefixes_suppressions.end(),
1217                  files[i]->name())
1218           != generation_options.expected_prefixes_suppressions.end());
1219     if (should_skip) {
1220       continue;
1221     }
1222 
1223     bool is_valid =
1224         ValidateObjCClassPrefix(files[i],
1225                                 generation_options.expected_prefixes_path,
1226                                 expected_package_prefixes,
1227                                 out_error);
1228     if (!is_valid) {
1229       return false;
1230     }
1231   }
1232   return true;
1233 }
1234 
TextFormatDecodeData()1235 TextFormatDecodeData::TextFormatDecodeData() { }
1236 
~TextFormatDecodeData()1237 TextFormatDecodeData::~TextFormatDecodeData() { }
1238 
AddString(int32 key,const string & input_for_decode,const string & desired_output)1239 void TextFormatDecodeData::AddString(int32 key,
1240                                      const string& input_for_decode,
1241                                      const string& desired_output) {
1242   for (std::vector<DataEntry>::const_iterator i = entries_.begin();
1243        i != entries_.end(); ++i) {
1244     if (i->first == key) {
1245       std::cerr << "error: duplicate key (" << key
1246            << ") making TextFormat data, input: \"" << input_for_decode
1247            << "\", desired: \"" << desired_output << "\"." << std::endl;
1248       std::cerr.flush();
1249       abort();
1250     }
1251   }
1252 
1253   const string& data = TextFormatDecodeData::DecodeDataForString(
1254       input_for_decode, desired_output);
1255   entries_.push_back(DataEntry(key, data));
1256 }
1257 
Data() const1258 string TextFormatDecodeData::Data() const {
1259   std::ostringstream data_stringstream;
1260 
1261   if (num_entries() > 0) {
1262     io::OstreamOutputStream data_outputstream(&data_stringstream);
1263     io::CodedOutputStream output_stream(&data_outputstream);
1264 
1265     output_stream.WriteVarint32(num_entries());
1266     for (std::vector<DataEntry>::const_iterator i = entries_.begin();
1267          i != entries_.end(); ++i) {
1268       output_stream.WriteVarint32(i->first);
1269       output_stream.WriteString(i->second);
1270     }
1271   }
1272 
1273   data_stringstream.flush();
1274   return data_stringstream.str();
1275 }
1276 
1277 namespace {
1278 
1279 // Helper to build up the decode data for a string.
1280 class DecodeDataBuilder {
1281  public:
DecodeDataBuilder()1282   DecodeDataBuilder() { Reset(); }
1283 
1284   bool AddCharacter(const char desired, const char input);
AddUnderscore()1285   void AddUnderscore() {
1286     Push();
1287     need_underscore_ = true;
1288   }
Finish()1289   string Finish() {
1290     Push();
1291     return decode_data_;
1292   }
1293 
1294  private:
1295   static const uint8 kAddUnderscore = 0x80;
1296 
1297   static const uint8 kOpAsIs        = 0x00;
1298   static const uint8 kOpFirstUpper  = 0x40;
1299   static const uint8 kOpFirstLower  = 0x20;
1300   static const uint8 kOpAllUpper    = 0x60;
1301 
1302   static const int kMaxSegmentLen     = 0x1f;
1303 
AddChar(const char desired)1304   void AddChar(const char desired) {
1305     ++segment_len_;
1306     is_all_upper_ &= ascii_isupper(desired);
1307   }
1308 
Push()1309   void Push() {
1310     uint8 op = (op_ | segment_len_);
1311     if (need_underscore_) op |= kAddUnderscore;
1312     if (op != 0) {
1313       decode_data_ += (char)op;
1314     }
1315     Reset();
1316   }
1317 
AddFirst(const char desired,const char input)1318   bool AddFirst(const char desired, const char input) {
1319     if (desired == input) {
1320       op_ = kOpAsIs;
1321     } else if (desired == ascii_toupper(input)) {
1322       op_ = kOpFirstUpper;
1323     } else if (desired == ascii_tolower(input)) {
1324       op_ = kOpFirstLower;
1325     } else {
1326       // Can't be transformed to match.
1327       return false;
1328     }
1329     AddChar(desired);
1330     return true;
1331   }
1332 
Reset()1333   void Reset() {
1334     need_underscore_ = false;
1335     op_ = 0;
1336     segment_len_ = 0;
1337     is_all_upper_ = true;
1338   }
1339 
1340   bool need_underscore_;
1341   bool is_all_upper_;
1342   uint8 op_;
1343   int segment_len_;
1344 
1345   string decode_data_;
1346 };
1347 
AddCharacter(const char desired,const char input)1348 bool DecodeDataBuilder::AddCharacter(const char desired, const char input) {
1349   // If we've hit the max size, push to start a new segment.
1350   if (segment_len_ == kMaxSegmentLen) {
1351     Push();
1352   }
1353   if (segment_len_ == 0) {
1354     return AddFirst(desired, input);
1355   }
1356 
1357   // Desired and input match...
1358   if (desired == input) {
1359     // If we aren't transforming it, or we're upper casing it and it is
1360     // supposed to be uppercase; just add it to the segment.
1361     if ((op_ != kOpAllUpper) || ascii_isupper(desired)) {
1362       AddChar(desired);
1363       return true;
1364     }
1365 
1366     // Add the current segment, and start the next one.
1367     Push();
1368     return AddFirst(desired, input);
1369   }
1370 
1371   // If we need to uppercase, and everything so far has been uppercase,
1372   // promote op to AllUpper.
1373   if ((desired == ascii_toupper(input)) && is_all_upper_) {
1374     op_ = kOpAllUpper;
1375     AddChar(desired);
1376     return true;
1377   }
1378 
1379   // Give up, push and start a new segment.
1380   Push();
1381   return AddFirst(desired, input);
1382 }
1383 
1384 // If decode data can't be generated, a directive for the raw string
1385 // is used instead.
DirectDecodeString(const string & str)1386 string DirectDecodeString(const string& str) {
1387   string result;
1388   result += (char)'\0';  // Marker for full string.
1389   result += str;
1390   result += (char)'\0';  // End of string.
1391   return result;
1392 }
1393 
1394 }  // namespace
1395 
1396 // static
DecodeDataForString(const string & input_for_decode,const string & desired_output)1397 string TextFormatDecodeData::DecodeDataForString(const string& input_for_decode,
1398                                                  const string& desired_output) {
1399   if ((input_for_decode.size() == 0) || (desired_output.size() == 0)) {
1400     std::cerr << "error: got empty string for making TextFormat data, input: \""
1401          << input_for_decode << "\", desired: \"" << desired_output << "\"."
1402          << std::endl;
1403     std::cerr.flush();
1404     abort();
1405   }
1406   if ((input_for_decode.find('\0') != string::npos) ||
1407       (desired_output.find('\0') != string::npos)) {
1408     std::cerr << "error: got a null char in a string for making TextFormat data,"
1409          << " input: \"" << CEscape(input_for_decode) << "\", desired: \""
1410          << CEscape(desired_output) << "\"." << std::endl;
1411     std::cerr.flush();
1412     abort();
1413   }
1414 
1415   DecodeDataBuilder builder;
1416 
1417   // Walk the output building it from the input.
1418   int x = 0;
1419   for (int y = 0; y < desired_output.size(); y++) {
1420     const char d = desired_output[y];
1421     if (d == '_') {
1422       builder.AddUnderscore();
1423       continue;
1424     }
1425 
1426     if (x >= input_for_decode.size()) {
1427       // Out of input, no way to encode it, just return a full decode.
1428       return DirectDecodeString(desired_output);
1429     }
1430     if (builder.AddCharacter(d, input_for_decode[x])) {
1431       ++x;  // Consumed one input
1432     } else {
1433       // Couldn't transform for the next character, just return a full decode.
1434       return DirectDecodeString(desired_output);
1435     }
1436   }
1437 
1438   if (x != input_for_decode.size()) {
1439     // Extra input (suffix from name sanitizing?), just return a full decode.
1440     return DirectDecodeString(desired_output);
1441   }
1442 
1443   // Add the end marker.
1444   return builder.Finish() + (char)'\0';
1445 }
1446 
1447 namespace {
1448 
1449 class Parser {
1450  public:
Parser(LineConsumer * line_consumer)1451   Parser(LineConsumer* line_consumer)
1452       : line_consumer_(line_consumer), line_(0) {}
1453 
1454   // Parses a check of input, returning success/failure.
1455   bool ParseChunk(StringPiece chunk);
1456 
1457   // Should be called to finish parsing (after all input has been provided via
1458   // ParseChunk()).  Returns success/failure.
1459   bool Finish();
1460 
last_line() const1461   int last_line() const { return line_; }
error_str() const1462   string error_str() const { return error_str_; }
1463 
1464  private:
1465   bool ParseLoop();
1466 
1467   LineConsumer* line_consumer_;
1468   int line_;
1469   string error_str_;
1470   StringPiece p_;
1471   string leftover_;
1472 };
1473 
ParseChunk(StringPiece chunk)1474 bool Parser::ParseChunk(StringPiece chunk) {
1475   if (!leftover_.empty()) {
1476     leftover_ += string(chunk);
1477     p_ = StringPiece(leftover_);
1478   } else {
1479     p_ = chunk;
1480   }
1481   bool result = ParseLoop();
1482   if (p_.empty()) {
1483     leftover_.clear();
1484   } else {
1485     leftover_ = string(p_);
1486   }
1487   return result;
1488 }
1489 
Finish()1490 bool Parser::Finish() {
1491   if (leftover_.empty()) {
1492     return true;
1493   }
1494   // Force a newline onto the end to finish parsing.
1495   leftover_ += "\n";
1496   p_ = StringPiece(leftover_);
1497   if (!ParseLoop()) {
1498     return false;
1499   }
1500   return p_.empty();  // Everything used?
1501 }
1502 
ParseLoop()1503 bool Parser::ParseLoop() {
1504   StringPiece line;
1505   while (ReadLine(&p_, &line)) {
1506     ++line_;
1507     RemoveComment(&line);
1508     TrimWhitespace(&line);
1509     if (line.size() == 0) {
1510       continue;  // Blank line.
1511     }
1512     if (!line_consumer_->ConsumeLine(line, &error_str_)) {
1513       return false;
1514     }
1515   }
1516   return true;
1517 }
1518 
1519 }  // namespace
1520 
LineConsumer()1521 LineConsumer::LineConsumer() {}
1522 
~LineConsumer()1523 LineConsumer::~LineConsumer() {}
1524 
ParseSimpleFile(const string & path,LineConsumer * line_consumer,string * out_error)1525 bool ParseSimpleFile(
1526     const string& path, LineConsumer* line_consumer, string* out_error) {
1527   int fd;
1528   do {
1529     fd = posix::open(path.c_str(), O_RDONLY);
1530   } while (fd < 0 && errno == EINTR);
1531   if (fd < 0) {
1532     *out_error =
1533         string("error: Unable to open \"") + path + "\", " + strerror(errno);
1534     return false;
1535   }
1536   io::FileInputStream file_stream(fd);
1537   file_stream.SetCloseOnDelete(true);
1538 
1539   Parser parser(line_consumer);
1540   const void* buf;
1541   int buf_len;
1542   while (file_stream.Next(&buf, &buf_len)) {
1543     if (buf_len == 0) {
1544       continue;
1545     }
1546 
1547     if (!parser.ParseChunk(StringPiece(static_cast<const char*>(buf), buf_len))) {
1548       *out_error =
1549           string("error: ") + path +
1550           " Line " + StrCat(parser.last_line()) + ", " + parser.error_str();
1551       return false;
1552     }
1553   }
1554   return parser.Finish();
1555 }
1556 
ImportWriter(const string & generate_for_named_framework,const string & named_framework_to_proto_path_mappings_path,bool include_wkt_imports)1557 ImportWriter::ImportWriter(
1558   const string& generate_for_named_framework,
1559   const string& named_framework_to_proto_path_mappings_path,
1560   bool include_wkt_imports)
1561     : generate_for_named_framework_(generate_for_named_framework),
1562       named_framework_to_proto_path_mappings_path_(
1563           named_framework_to_proto_path_mappings_path),
1564       include_wkt_imports_(include_wkt_imports),
1565       need_to_parse_mapping_file_(true) {
1566 }
1567 
~ImportWriter()1568 ImportWriter::~ImportWriter() {}
1569 
AddFile(const FileDescriptor * file,const string & header_extension)1570 void ImportWriter::AddFile(const FileDescriptor* file,
1571                            const string& header_extension) {
1572   const string file_path(FilePath(file));
1573 
1574   if (IsProtobufLibraryBundledProtoFile(file)) {
1575     // The imports of the WKTs are only needed within the library itself,
1576     // in other cases, they get skipped because the generated code already
1577     // import GPBProtocolBuffers.h and hence proves them.
1578     if (include_wkt_imports_) {
1579       protobuf_framework_imports_.push_back(
1580           FilePathBasename(file) + header_extension);
1581       protobuf_non_framework_imports_.push_back(file_path + header_extension);
1582     }
1583     return;
1584   }
1585 
1586   // Lazy parse any mappings.
1587   if (need_to_parse_mapping_file_) {
1588     ParseFrameworkMappings();
1589   }
1590 
1591   std::map<string, string>::iterator proto_lookup =
1592       proto_file_to_framework_name_.find(file->name());
1593   if (proto_lookup != proto_file_to_framework_name_.end()) {
1594     other_framework_imports_.push_back(
1595         proto_lookup->second + "/" +
1596         FilePathBasename(file) + header_extension);
1597     return;
1598   }
1599 
1600   if (!generate_for_named_framework_.empty()) {
1601     other_framework_imports_.push_back(
1602         generate_for_named_framework_ + "/" +
1603         FilePathBasename(file) + header_extension);
1604     return;
1605   }
1606 
1607   other_imports_.push_back(file_path + header_extension);
1608 }
1609 
Print(io::Printer * printer) const1610 void ImportWriter::Print(io::Printer* printer) const {
1611   assert(protobuf_non_framework_imports_.size() ==
1612          protobuf_framework_imports_.size());
1613 
1614   bool add_blank_line = false;
1615 
1616   if (protobuf_framework_imports_.size() > 0) {
1617     const string framework_name(ProtobufLibraryFrameworkName);
1618     const string cpp_symbol(ProtobufFrameworkImportSymbol(framework_name));
1619 
1620     printer->Print(
1621         "#if $cpp_symbol$\n",
1622         "cpp_symbol", cpp_symbol);
1623     for (std::vector<string>::const_iterator iter = protobuf_framework_imports_.begin();
1624          iter != protobuf_framework_imports_.end(); ++iter) {
1625       printer->Print(
1626           " #import <$framework_name$/$header$>\n",
1627           "framework_name", framework_name,
1628           "header", *iter);
1629     }
1630     printer->Print(
1631         "#else\n");
1632     for (std::vector<string>::const_iterator iter = protobuf_non_framework_imports_.begin();
1633          iter != protobuf_non_framework_imports_.end(); ++iter) {
1634       printer->Print(
1635           " #import \"$header$\"\n",
1636           "header", *iter);
1637     }
1638     printer->Print(
1639         "#endif\n");
1640 
1641     add_blank_line = true;
1642   }
1643 
1644   if (other_framework_imports_.size() > 0) {
1645     if (add_blank_line) {
1646       printer->Print("\n");
1647     }
1648 
1649     for (std::vector<string>::const_iterator iter = other_framework_imports_.begin();
1650          iter != other_framework_imports_.end(); ++iter) {
1651       printer->Print(
1652           "#import <$header$>\n",
1653           "header", *iter);
1654     }
1655 
1656     add_blank_line = true;
1657   }
1658 
1659   if (other_imports_.size() > 0) {
1660     if (add_blank_line) {
1661       printer->Print("\n");
1662     }
1663 
1664     for (std::vector<string>::const_iterator iter = other_imports_.begin();
1665          iter != other_imports_.end(); ++iter) {
1666       printer->Print(
1667           "#import \"$header$\"\n",
1668           "header", *iter);
1669     }
1670   }
1671 }
1672 
ParseFrameworkMappings()1673 void ImportWriter::ParseFrameworkMappings() {
1674   need_to_parse_mapping_file_ = false;
1675   if (named_framework_to_proto_path_mappings_path_.empty()) {
1676     return;  // Nothing to do.
1677   }
1678 
1679   ProtoFrameworkCollector collector(&proto_file_to_framework_name_);
1680   string parse_error;
1681   if (!ParseSimpleFile(named_framework_to_proto_path_mappings_path_,
1682                        &collector, &parse_error)) {
1683     std::cerr << "error parsing " << named_framework_to_proto_path_mappings_path_
1684          << " : " << parse_error << std::endl;
1685     std::cerr.flush();
1686   }
1687 }
1688 
ConsumeLine(const StringPiece & line,string * out_error)1689 bool ImportWriter::ProtoFrameworkCollector::ConsumeLine(
1690     const StringPiece& line, string* out_error) {
1691   int offset = line.find(':');
1692   if (offset == StringPiece::npos) {
1693     *out_error =
1694         string("Framework/proto file mapping line without colon sign: '") +
1695         string(line) + "'.";
1696     return false;
1697   }
1698   StringPiece framework_name = line.substr(0, offset);
1699   StringPiece proto_file_list = line.substr(offset + 1);
1700   TrimWhitespace(&framework_name);
1701 
1702   int start = 0;
1703   while (start < proto_file_list.length()) {
1704     offset = proto_file_list.find(',', start);
1705     if (offset == StringPiece::npos) {
1706       offset = proto_file_list.length();
1707     }
1708 
1709     StringPiece proto_file = proto_file_list.substr(start, offset - start);
1710     TrimWhitespace(&proto_file);
1711     if (proto_file.size() != 0) {
1712       std::map<string, string>::iterator existing_entry =
1713           map_->find(string(proto_file));
1714       if (existing_entry != map_->end()) {
1715         std::cerr << "warning: duplicate proto file reference, replacing "
1716                      "framework entry for '"
1717                   << string(proto_file) << "' with '" << string(framework_name)
1718                   << "' (was '" << existing_entry->second << "')." << std::endl;
1719         std::cerr.flush();
1720       }
1721 
1722       if (proto_file.find(' ') != StringPiece::npos) {
1723         std::cerr << "note: framework mapping file had a proto file with a "
1724                      "space in, hopefully that isn't a missing comma: '"
1725                   << string(proto_file) << "'" << std::endl;
1726         std::cerr.flush();
1727       }
1728 
1729       (*map_)[string(proto_file)] = string(framework_name);
1730     }
1731 
1732     start = offset + 1;
1733   }
1734 
1735   return true;
1736 }
1737 
1738 
1739 }  // namespace objectivec
1740 }  // namespace compiler
1741 }  // namespace protobuf
1742 }  // namespace google
1743