1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <algorithm>
36 #include <google/protobuf/stubs/hash.h>
37 #include <limits>
38 #include <vector>
39 
40 #include <google/protobuf/compiler/csharp/csharp_helpers.h>
41 #include <google/protobuf/descriptor.pb.h>
42 #include <google/protobuf/io/printer.h>
43 #include <google/protobuf/wire_format.h>
44 #include <google/protobuf/stubs/strutil.h>
45 #include <google/protobuf/stubs/substitute.h>
46 
47 #include <google/protobuf/compiler/csharp/csharp_field_base.h>
48 #include <google/protobuf/compiler/csharp/csharp_enum_field.h>
49 #include <google/protobuf/compiler/csharp/csharp_map_field.h>
50 #include <google/protobuf/compiler/csharp/csharp_message_field.h>
51 #include <google/protobuf/compiler/csharp/csharp_options.h>
52 #include <google/protobuf/compiler/csharp/csharp_primitive_field.h>
53 #include <google/protobuf/compiler/csharp/csharp_repeated_enum_field.h>
54 #include <google/protobuf/compiler/csharp/csharp_repeated_message_field.h>
55 #include <google/protobuf/compiler/csharp/csharp_repeated_primitive_field.h>
56 #include <google/protobuf/compiler/csharp/csharp_wrapper_field.h>
57 
58 namespace google {
59 namespace protobuf {
60 namespace compiler {
61 namespace csharp {
62 
GetCSharpType(FieldDescriptor::Type type)63 CSharpType GetCSharpType(FieldDescriptor::Type type) {
64   switch (type) {
65     case FieldDescriptor::TYPE_INT32:
66       return CSHARPTYPE_INT32;
67     case FieldDescriptor::TYPE_INT64:
68       return CSHARPTYPE_INT64;
69     case FieldDescriptor::TYPE_UINT32:
70       return CSHARPTYPE_UINT32;
71     case FieldDescriptor::TYPE_UINT64:
72       return CSHARPTYPE_UINT32;
73     case FieldDescriptor::TYPE_SINT32:
74       return CSHARPTYPE_INT32;
75     case FieldDescriptor::TYPE_SINT64:
76       return CSHARPTYPE_INT64;
77     case FieldDescriptor::TYPE_FIXED32:
78       return CSHARPTYPE_UINT32;
79     case FieldDescriptor::TYPE_FIXED64:
80       return CSHARPTYPE_UINT64;
81     case FieldDescriptor::TYPE_SFIXED32:
82       return CSHARPTYPE_INT32;
83     case FieldDescriptor::TYPE_SFIXED64:
84       return CSHARPTYPE_INT64;
85     case FieldDescriptor::TYPE_FLOAT:
86       return CSHARPTYPE_FLOAT;
87     case FieldDescriptor::TYPE_DOUBLE:
88       return CSHARPTYPE_DOUBLE;
89     case FieldDescriptor::TYPE_BOOL:
90       return CSHARPTYPE_BOOL;
91     case FieldDescriptor::TYPE_ENUM:
92       return CSHARPTYPE_ENUM;
93     case FieldDescriptor::TYPE_STRING:
94       return CSHARPTYPE_STRING;
95     case FieldDescriptor::TYPE_BYTES:
96       return CSHARPTYPE_BYTESTRING;
97     case FieldDescriptor::TYPE_GROUP:
98       return CSHARPTYPE_MESSAGE;
99     case FieldDescriptor::TYPE_MESSAGE:
100       return CSHARPTYPE_MESSAGE;
101 
102       // No default because we want the compiler to complain if any new
103       // types are added.
104   }
105   GOOGLE_LOG(FATAL)<< "Can't get here.";
106   return (CSharpType) -1;
107 }
108 
StripDotProto(const std::string & proto_file)109 std::string StripDotProto(const std::string& proto_file) {
110   int lastindex = proto_file.find_last_of(".");
111   return proto_file.substr(0, lastindex);
112 }
113 
GetFileNamespace(const FileDescriptor * descriptor)114 std::string GetFileNamespace(const FileDescriptor* descriptor) {
115   if (descriptor->options().has_csharp_namespace()) {
116     return descriptor->options().csharp_namespace();
117   }
118   return UnderscoresToCamelCase(descriptor->package(), true, true);
119 }
120 
121 // Returns the Pascal-cased last part of the proto file. For example,
122 // input of "google/protobuf/foo_bar.proto" would result in "FooBar".
GetFileNameBase(const FileDescriptor * descriptor)123 std::string GetFileNameBase(const FileDescriptor* descriptor) {
124     std::string proto_file = descriptor->name();
125     int lastslash = proto_file.find_last_of("/");
126     std::string base = proto_file.substr(lastslash + 1);
127     return UnderscoresToPascalCase(StripDotProto(base));
128 }
129 
GetReflectionClassUnqualifiedName(const FileDescriptor * descriptor)130 std::string GetReflectionClassUnqualifiedName(const FileDescriptor* descriptor) {
131   // TODO: Detect collisions with existing messages,
132   // and append an underscore if necessary.
133   return GetFileNameBase(descriptor) + "Reflection";
134 }
135 
136 // TODO(jtattermusch): can we reuse a utility function?
UnderscoresToCamelCase(const std::string & input,bool cap_next_letter,bool preserve_period)137 std::string UnderscoresToCamelCase(const std::string& input,
138                                    bool cap_next_letter,
139                                    bool preserve_period) {
140   string result;
141   // Note:  I distrust ctype.h due to locales.
142   for (int i = 0; i < input.size(); i++) {
143     if ('a' <= input[i] && input[i] <= 'z') {
144       if (cap_next_letter) {
145         result += input[i] + ('A' - 'a');
146       } else {
147         result += input[i];
148       }
149       cap_next_letter = false;
150     } else if ('A' <= input[i] && input[i] <= 'Z') {
151       if (i == 0 && !cap_next_letter) {
152         // Force first letter to lower-case unless explicitly told to
153         // capitalize it.
154         result += input[i] + ('a' - 'A');
155       } else {
156         // Capital letters after the first are left as-is.
157         result += input[i];
158       }
159       cap_next_letter = false;
160     } else if ('0' <= input[i] && input[i] <= '9') {
161       result += input[i];
162       cap_next_letter = true;
163     } else {
164       cap_next_letter = true;
165       if (input[i] == '.' && preserve_period) {
166         result += '.';
167       }
168     }
169   }
170   // Add a trailing "_" if the name should be altered.
171   if (input[input.size() - 1] == '#') {
172     result += '_';
173   }
174   return result;
175 }
176 
UnderscoresToPascalCase(const std::string & input)177 std::string UnderscoresToPascalCase(const std::string& input) {
178   return UnderscoresToCamelCase(input, true);
179 }
180 
181 // Convert a string which is expected to be SHOUTY_CASE (but may not be *precisely* shouty)
182 // into a PascalCase string. Precise rules implemented:
183 
184 // Previous input character      Current character         Case
185 // Any                           Non-alphanumeric          Skipped
186 // None - first char of input    Alphanumeric              Upper
187 // Non-letter (e.g. _ or 1)      Alphanumeric              Upper
188 // Numeric                       Alphanumeric              Upper
189 // Lower letter                  Alphanumeric              Same as current
190 // Upper letter                  Alphanumeric              Lower
ShoutyToPascalCase(const std::string & input)191 std::string ShoutyToPascalCase(const std::string& input) {
192   string result;
193   // Simple way of implementing "always start with upper"
194   char previous = '_';
195   for (int i = 0; i < input.size(); i++) {
196     char current = input[i];
197     if (!ascii_isalnum(current)) {
198       previous = current;
199       continue;
200     }
201     if (!ascii_isalnum(previous)) {
202       result += ascii_toupper(current);
203     } else if (ascii_isdigit(previous)) {
204       result += ascii_toupper(current);
205     } else if (ascii_islower(previous)) {
206       result += current;
207     } else {
208       result += ascii_tolower(current);
209     }
210     previous = current;
211   }
212   return result;
213 }
214 
215 // Attempt to remove a prefix from a value, ignoring casing and skipping underscores.
216 // (foo, foo_bar) => bar - underscore after prefix is skipped
217 // (FOO, foo_bar) => bar - casing is ignored
218 // (foo_bar, foobarbaz) => baz - underscore in prefix is ignored
219 // (foobar, foo_barbaz) => baz - underscore in value is ignored
220 // (foo, bar) => bar - prefix isn't matched; return original value
TryRemovePrefix(const std::string & prefix,const std::string & value)221 std::string TryRemovePrefix(const std::string& prefix, const std::string& value) {
222   // First normalize to a lower-case no-underscores prefix to match against
223   std::string prefix_to_match = "";
224   for (size_t i = 0; i < prefix.size(); i++) {
225     if (prefix[i] != '_') {
226       prefix_to_match += ascii_tolower(prefix[i]);
227     }
228   }
229 
230   // This keeps track of how much of value we've consumed
231   size_t prefix_index, value_index;
232   for (prefix_index = 0, value_index = 0;
233       prefix_index < prefix_to_match.size() && value_index < value.size();
234       value_index++) {
235     // Skip over underscores in the value
236     if (value[value_index] == '_') {
237       continue;
238     }
239     if (ascii_tolower(value[value_index]) != prefix_to_match[prefix_index++]) {
240       // Failed to match the prefix - bail out early.
241       return value;
242     }
243   }
244 
245   // If we didn't finish looking through the prefix, we can't strip it.
246   if (prefix_index < prefix_to_match.size()) {
247     return value;
248   }
249 
250   // Step over any underscores after the prefix
251   while (value_index < value.size() && value[value_index] == '_') {
252     value_index++;
253   }
254 
255   // If there's nothing left (e.g. it was a prefix with only underscores afterwards), don't strip.
256   if (value_index == value.size()) {
257     return value;
258   }
259 
260   return value.substr(value_index);
261 }
262 
263 // Format the enum value name in a pleasant way for C#:
264 // - Strip the enum name as a prefix if possible
265 // - Convert to PascalCase.
266 // For example, an enum called Color with a value of COLOR_BLUE should
267 // result in an enum value in C# called just Blue
GetEnumValueName(const std::string & enum_name,const std::string & enum_value_name)268 std::string GetEnumValueName(const std::string& enum_name, const std::string& enum_value_name) {
269   std::string stripped = TryRemovePrefix(enum_name, enum_value_name);
270   std::string result = ShoutyToPascalCase(stripped);
271   // Just in case we have an enum name of FOO and a value of FOO_2... make sure the returned
272   // string is a valid identifier.
273   if (ascii_isdigit(result[0])) {
274     result = "_" + result;
275   }
276   return result;
277 }
278 
ToCSharpName(const std::string & name,const FileDescriptor * file)279 std::string ToCSharpName(const std::string& name, const FileDescriptor* file) {
280   std::string result = GetFileNamespace(file);
281   if (result != "") {
282     result += '.';
283   }
284   string classname;
285   if (file->package().empty()) {
286     classname = name;
287   } else {
288     // Strip the proto package from full_name since we've replaced it with
289     // the C# namespace.
290     classname = name.substr(file->package().size() + 1);
291   }
292   result += StringReplace(classname, ".", ".Types.", true);
293   return "global::" + result;
294 }
295 
GetReflectionClassName(const FileDescriptor * descriptor)296 std::string GetReflectionClassName(const FileDescriptor* descriptor) {
297   std::string result = GetFileNamespace(descriptor);
298   if (!result.empty()) {
299     result += '.';
300   }
301   result += GetReflectionClassUnqualifiedName(descriptor);
302   return "global::" + result;
303 }
304 
GetClassName(const Descriptor * descriptor)305 std::string GetClassName(const Descriptor* descriptor) {
306   return ToCSharpName(descriptor->full_name(), descriptor->file());
307 }
308 
GetClassName(const EnumDescriptor * descriptor)309 std::string GetClassName(const EnumDescriptor* descriptor) {
310   return ToCSharpName(descriptor->full_name(), descriptor->file());
311 }
312 
313 // Groups are hacky:  The name of the field is just the lower-cased name
314 // of the group type.  In C#, though, we would like to retain the original
315 // capitalization of the type name.
GetFieldName(const FieldDescriptor * descriptor)316 std::string GetFieldName(const FieldDescriptor* descriptor) {
317   if (descriptor->type() == FieldDescriptor::TYPE_GROUP) {
318     return descriptor->message_type()->name();
319   } else {
320     return descriptor->name();
321   }
322 }
323 
GetFieldConstantName(const FieldDescriptor * field)324 std::string GetFieldConstantName(const FieldDescriptor* field) {
325   return GetPropertyName(field) + "FieldNumber";
326 }
327 
GetPropertyName(const FieldDescriptor * descriptor)328 std::string GetPropertyName(const FieldDescriptor* descriptor) {
329   // TODO(jtattermusch): consider introducing csharp_property_name field option
330   std::string property_name = UnderscoresToPascalCase(GetFieldName(descriptor));
331   // Avoid either our own type name or reserved names. Note that not all names
332   // are reserved - a field called to_string, write_to etc would still cause a problem.
333   // There are various ways of ending up with naming collisions, but we try to avoid obvious
334   // ones.
335   if (property_name == descriptor->containing_type()->name()
336       || property_name == "Types"
337       || property_name == "Descriptor") {
338     property_name += "_";
339   }
340   return property_name;
341 }
342 
GetOutputFile(const google::protobuf::FileDescriptor * descriptor,const std::string file_extension,const bool generate_directories,const std::string base_namespace,string * error)343 std::string GetOutputFile(
344     const google::protobuf::FileDescriptor* descriptor,
345     const std::string file_extension,
346     const bool generate_directories,
347     const std::string base_namespace,
348     string* error) {
349   string relative_filename = GetFileNameBase(descriptor) + file_extension;
350   if (!generate_directories) {
351     return relative_filename;
352   }
353   string ns = GetFileNamespace(descriptor);
354   string namespace_suffix = ns;
355   if (!base_namespace.empty()) {
356     // Check that the base_namespace is either equal to or a leading part of
357     // the file namespace. This isn't just a simple prefix; "Foo.B" shouldn't
358     // be regarded as a prefix of "Foo.Bar". The simplest option is to add "."
359     // to both.
360     string extended_ns = ns + ".";
361     if (extended_ns.find(base_namespace + ".") != 0) {
362       *error = "Namespace " + ns + " is not a prefix namespace of base namespace " + base_namespace;
363       return ""; // This will be ignored, because we've set an error.
364     }
365     namespace_suffix = ns.substr(base_namespace.length());
366     if (namespace_suffix.find(".") == 0) {
367       namespace_suffix = namespace_suffix.substr(1);
368     }
369   }
370 
371   string namespace_dir = StringReplace(namespace_suffix, ".", "/", true);
372   if (!namespace_dir.empty()) {
373     namespace_dir += "/";
374   }
375   return namespace_dir + relative_filename;
376 }
377 
378 // TODO: c&p from Java protoc plugin
379 // For encodings with fixed sizes, returns that size in bytes.  Otherwise
380 // returns -1.
GetFixedSize(FieldDescriptor::Type type)381 int GetFixedSize(FieldDescriptor::Type type) {
382   switch (type) {
383     case FieldDescriptor::TYPE_INT32   : return -1;
384     case FieldDescriptor::TYPE_INT64   : return -1;
385     case FieldDescriptor::TYPE_UINT32  : return -1;
386     case FieldDescriptor::TYPE_UINT64  : return -1;
387     case FieldDescriptor::TYPE_SINT32  : return -1;
388     case FieldDescriptor::TYPE_SINT64  : return -1;
389     case FieldDescriptor::TYPE_FIXED32 : return internal::WireFormatLite::kFixed32Size;
390     case FieldDescriptor::TYPE_FIXED64 : return internal::WireFormatLite::kFixed64Size;
391     case FieldDescriptor::TYPE_SFIXED32: return internal::WireFormatLite::kSFixed32Size;
392     case FieldDescriptor::TYPE_SFIXED64: return internal::WireFormatLite::kSFixed64Size;
393     case FieldDescriptor::TYPE_FLOAT   : return internal::WireFormatLite::kFloatSize;
394     case FieldDescriptor::TYPE_DOUBLE  : return internal::WireFormatLite::kDoubleSize;
395 
396     case FieldDescriptor::TYPE_BOOL    : return internal::WireFormatLite::kBoolSize;
397     case FieldDescriptor::TYPE_ENUM    : return -1;
398 
399     case FieldDescriptor::TYPE_STRING  : return -1;
400     case FieldDescriptor::TYPE_BYTES   : return -1;
401     case FieldDescriptor::TYPE_GROUP   : return -1;
402     case FieldDescriptor::TYPE_MESSAGE : return -1;
403 
404     // No default because we want the compiler to complain if any new
405     // types are added.
406   }
407   GOOGLE_LOG(FATAL) << "Can't get here.";
408   return -1;
409 }
410 
411 static const char base64_chars[] =
412     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
413 
StringToBase64(const std::string & input)414 std::string StringToBase64(const std::string& input) {
415   std::string result;
416   size_t remaining = input.size();
417   const unsigned char *src = (const unsigned char*) input.c_str();
418   while (remaining > 2) {
419     result += base64_chars[src[0] >> 2];
420     result += base64_chars[((src[0] & 0x3) << 4) | (src[1] >> 4)];
421     result += base64_chars[((src[1] & 0xf) << 2) | (src[2] >> 6)];
422     result += base64_chars[src[2] & 0x3f];
423     remaining -= 3;
424     src += 3;
425   }
426   switch (remaining) {
427     case 2:
428       result += base64_chars[src[0] >> 2];
429       result += base64_chars[((src[0] & 0x3) << 4) | (src[1] >> 4)];
430       result += base64_chars[(src[1] & 0xf) << 2];
431       result += '=';
432       src += 2;
433       break;
434     case 1:
435       result += base64_chars[src[0] >> 2];
436       result += base64_chars[((src[0] & 0x3) << 4)];
437       result += '=';
438       result += '=';
439       src += 1;
440       break;
441   }
442   return result;
443 }
444 
FileDescriptorToBase64(const FileDescriptor * descriptor)445 std::string FileDescriptorToBase64(const FileDescriptor* descriptor) {
446   std::string fdp_bytes;
447   FileDescriptorProto fdp;
448   descriptor->CopyTo(&fdp);
449   fdp.SerializeToString(&fdp_bytes);
450   return StringToBase64(fdp_bytes);
451 }
452 
CreateFieldGenerator(const FieldDescriptor * descriptor,int fieldOrdinal,const Options * options)453 FieldGeneratorBase* CreateFieldGenerator(const FieldDescriptor* descriptor,
454                                          int fieldOrdinal,
455                                          const Options* options) {
456   switch (descriptor->type()) {
457     case FieldDescriptor::TYPE_GROUP:
458     case FieldDescriptor::TYPE_MESSAGE:
459       if (descriptor->is_repeated()) {
460         if (descriptor->is_map()) {
461           return new MapFieldGenerator(descriptor, fieldOrdinal, options);
462         } else {
463           return new RepeatedMessageFieldGenerator(descriptor, fieldOrdinal, options);
464         }
465       } else {
466         if (IsWrapperType(descriptor)) {
467           if (descriptor->containing_oneof()) {
468             return new WrapperOneofFieldGenerator(descriptor, fieldOrdinal, options);
469           } else {
470             return new WrapperFieldGenerator(descriptor, fieldOrdinal, options);
471           }
472         } else {
473           if (descriptor->containing_oneof()) {
474             return new MessageOneofFieldGenerator(descriptor, fieldOrdinal, options);
475           } else {
476             return new MessageFieldGenerator(descriptor, fieldOrdinal, options);
477           }
478         }
479       }
480     case FieldDescriptor::TYPE_ENUM:
481       if (descriptor->is_repeated()) {
482         return new RepeatedEnumFieldGenerator(descriptor, fieldOrdinal, options);
483       } else {
484         if (descriptor->containing_oneof()) {
485           return new EnumOneofFieldGenerator(descriptor, fieldOrdinal, options);
486         } else {
487           return new EnumFieldGenerator(descriptor, fieldOrdinal, options);
488         }
489       }
490     default:
491       if (descriptor->is_repeated()) {
492         return new RepeatedPrimitiveFieldGenerator(descriptor, fieldOrdinal, options);
493       } else {
494         if (descriptor->containing_oneof()) {
495           return new PrimitiveOneofFieldGenerator(descriptor, fieldOrdinal, options);
496         } else {
497           return new PrimitiveFieldGenerator(descriptor, fieldOrdinal, options);
498         }
499       }
500   }
501 }
502 
503 }  // namespace csharp
504 }  // namespace compiler
505 }  // namespace protobuf
506 }  // namespace google
507