1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <algorithm>
36 #include <limits>
37 #include <unordered_set>
38 #include <vector>
39 
40 #include <google/protobuf/stubs/stringprintf.h>
41 #include <google/protobuf/compiler/java/java_helpers.h>
42 #include <google/protobuf/compiler/java/java_name_resolver.h>
43 #include <google/protobuf/descriptor.pb.h>
44 #include <google/protobuf/wire_format.h>
45 #include <google/protobuf/stubs/strutil.h>
46 #include <google/protobuf/stubs/substitute.h>
47 
48 
49 
50 #include <google/protobuf/stubs/hash.h>  // for hash<T *>
51 
52 namespace google {
53 namespace protobuf {
54 namespace compiler {
55 namespace java {
56 
57 using internal::WireFormat;
58 using internal::WireFormatLite;
59 
60 const char kThickSeparator[] =
61     "// ===================================================================\n";
62 const char kThinSeparator[] =
63     "// -------------------------------------------------------------------\n";
64 
65 namespace {
66 
67 const char* kDefaultPackage = "";
68 
69 // Names that should be avoided as field names.
70 // Using them will cause the compiler to generate accessors whose names are
71 // colliding with methods defined in base classes.
72 const char* kForbiddenWordList[] = {
73     // message base class:
74     "cached_size",
75     "serialized_size",
76     // java.lang.Object:
77     "class",
78 };
79 
80 const std::unordered_set<string>* kReservedNames =
81     new std::unordered_set<string>({
82         "abstract",   "assert",       "boolean",   "break",      "byte",
83         "case",       "catch",        "char",      "class",      "const",
84         "continue",   "default",      "do",        "double",     "else",
85         "enum",       "extends",      "final",     "finally",    "float",
86         "for",        "goto",         "if",        "implements", "import",
87         "instanceof", "int",          "interface", "long",       "native",
88         "new",        "package",      "private",   "protected",  "public",
89         "return",     "short",        "static",    "strictfp",   "super",
90         "switch",     "synchronized", "this",      "throw",      "throws",
91         "transient",  "try",          "void",      "volatile",   "while",
92     });
93 
IsForbidden(const std::string & field_name)94 bool IsForbidden(const std::string& field_name) {
95   for (int i = 0; i < GOOGLE_ARRAYSIZE(kForbiddenWordList); ++i) {
96     if (field_name == kForbiddenWordList[i]) {
97       return true;
98     }
99   }
100   return false;
101 }
102 
FieldName(const FieldDescriptor * field)103 std::string FieldName(const FieldDescriptor* field) {
104   std::string field_name;
105   // Groups are hacky:  The name of the field is just the lower-cased name
106   // of the group type.  In Java, though, we would like to retain the original
107   // capitalization of the type name.
108   if (GetType(field) == FieldDescriptor::TYPE_GROUP) {
109     field_name = field->message_type()->name();
110   } else {
111     field_name = field->name();
112   }
113   if (IsForbidden(field_name)) {
114     // Append a trailing "#" to indicate that the name should be decorated to
115     // avoid collision with other names.
116     field_name += "#";
117   }
118   return field_name;
119 }
120 
121 
122 }  // namespace
123 
PrintGeneratedAnnotation(io::Printer * printer,char delimiter,const std::string & annotation_file)124 void PrintGeneratedAnnotation(io::Printer* printer, char delimiter,
125                               const std::string& annotation_file) {
126   if (annotation_file.empty()) {
127     return;
128   }
129   std::string ptemplate =
130       "@javax.annotation.Generated(value=\"protoc\", comments=\"annotations:";
131   ptemplate.push_back(delimiter);
132   ptemplate.append("annotation_file");
133   ptemplate.push_back(delimiter);
134   ptemplate.append("\")\n");
135   printer->Print(ptemplate.c_str(), "annotation_file", annotation_file);
136 }
137 
PrintEnumVerifierLogic(io::Printer * printer,const FieldDescriptor * descriptor,const std::map<std::string,std::string> & variables,const char * var_name,const char * terminating_string,bool enforce_lite)138 void PrintEnumVerifierLogic(io::Printer* printer,
139                             const FieldDescriptor* descriptor,
140                             const std::map<std::string, std::string>& variables,
141                             const char* var_name,
142                             const char* terminating_string, bool enforce_lite) {
143   std::string enum_verifier_string =
144       enforce_lite ? StrCat(var_name, ".internalGetVerifier()")
145                    : StrCat(
146                          "new com.google.protobuf.Internal.EnumVerifier() {\n"
147                          "        @java.lang.Override\n"
148                          "        public boolean isInRange(int number) {\n"
149                          "          return ",
150                          var_name,
151                          ".forNumber(number) != null;\n"
152                          "        }\n"
153                          "      }");
154   printer->Print(
155       variables,
156       StrCat(enum_verifier_string, terminating_string).c_str());
157 }
158 
UnderscoresToCamelCase(const std::string & input,bool cap_next_letter)159 std::string UnderscoresToCamelCase(const std::string& input,
160                                    bool cap_next_letter) {
161   GOOGLE_CHECK(!input.empty());
162   std::string result;
163   // Note:  I distrust ctype.h due to locales.
164   for (int i = 0; i < input.size(); i++) {
165     if ('a' <= input[i] && input[i] <= 'z') {
166       if (cap_next_letter) {
167         result += input[i] + ('A' - 'a');
168       } else {
169         result += input[i];
170       }
171       cap_next_letter = false;
172     } else if ('A' <= input[i] && input[i] <= 'Z') {
173       if (i == 0 && !cap_next_letter) {
174         // Force first letter to lower-case unless explicitly told to
175         // capitalize it.
176         result += input[i] + ('a' - 'A');
177       } else {
178         // Capital letters after the first are left as-is.
179         result += input[i];
180       }
181       cap_next_letter = false;
182     } else if ('0' <= input[i] && input[i] <= '9') {
183       result += input[i];
184       cap_next_letter = true;
185     } else {
186       cap_next_letter = true;
187     }
188   }
189   // Add a trailing "_" if the name should be altered.
190   if (input[input.size() - 1] == '#') {
191     result += '_';
192   }
193   return result;
194 }
195 
UnderscoresToCamelCase(const FieldDescriptor * field)196 std::string UnderscoresToCamelCase(const FieldDescriptor* field) {
197   return UnderscoresToCamelCase(FieldName(field), false);
198 }
199 
UnderscoresToCapitalizedCamelCase(const FieldDescriptor * field)200 std::string UnderscoresToCapitalizedCamelCase(const FieldDescriptor* field) {
201   return UnderscoresToCamelCase(FieldName(field), true);
202 }
203 
CapitalizedFieldName(const FieldDescriptor * field)204 std::string CapitalizedFieldName(const FieldDescriptor* field) {
205   return UnderscoresToCapitalizedCamelCase(field);
206 }
207 
UnderscoresToCamelCase(const MethodDescriptor * method)208 std::string UnderscoresToCamelCase(const MethodDescriptor* method) {
209   return UnderscoresToCamelCase(method->name(), false);
210 }
211 
UnderscoresToCamelCaseCheckReserved(const FieldDescriptor * field)212 std::string UnderscoresToCamelCaseCheckReserved(const FieldDescriptor* field) {
213   std::string name = UnderscoresToCamelCase(field);
214   if (kReservedNames->find(name) != kReservedNames->end()) {
215     return name + "_";
216   }
217   return name;
218 }
219 
UniqueFileScopeIdentifier(const Descriptor * descriptor)220 std::string UniqueFileScopeIdentifier(const Descriptor* descriptor) {
221   return "static_" + StringReplace(descriptor->full_name(), ".", "_", true);
222 }
223 
CamelCaseFieldName(const FieldDescriptor * field)224 std::string CamelCaseFieldName(const FieldDescriptor* field) {
225   std::string fieldName = UnderscoresToCamelCase(field);
226   if ('0' <= fieldName[0] && fieldName[0] <= '9') {
227     return '_' + fieldName;
228   }
229   return fieldName;
230 }
231 
StripProto(const std::string & filename)232 std::string StripProto(const std::string& filename) {
233   if (HasSuffixString(filename, ".protodevel")) {
234     return StripSuffixString(filename, ".protodevel");
235   } else {
236     return StripSuffixString(filename, ".proto");
237   }
238 }
239 
FileClassName(const FileDescriptor * file,bool immutable)240 std::string FileClassName(const FileDescriptor* file, bool immutable) {
241   ClassNameResolver name_resolver;
242   return name_resolver.GetFileClassName(file, immutable);
243 }
244 
FileJavaPackage(const FileDescriptor * file,bool immutable)245 std::string FileJavaPackage(const FileDescriptor* file, bool immutable) {
246   std::string result;
247 
248   if (file->options().has_java_package()) {
249     result = file->options().java_package();
250   } else {
251     result = kDefaultPackage;
252     if (!file->package().empty()) {
253       if (!result.empty()) result += '.';
254       result += file->package();
255     }
256   }
257 
258   return result;
259 }
260 
FileJavaPackage(const FileDescriptor * file)261 std::string FileJavaPackage(const FileDescriptor* file) {
262   return FileJavaPackage(file, true /* immutable */);
263 }
264 
JavaPackageToDir(std::string package_name)265 std::string JavaPackageToDir(std::string package_name) {
266   std::string package_dir = StringReplace(package_name, ".", "/", true);
267   if (!package_dir.empty()) package_dir += "/";
268   return package_dir;
269 }
270 
ClassName(const Descriptor * descriptor)271 std::string ClassName(const Descriptor* descriptor) {
272   ClassNameResolver name_resolver;
273   return name_resolver.GetClassName(descriptor, true);
274 }
275 
ClassName(const EnumDescriptor * descriptor)276 std::string ClassName(const EnumDescriptor* descriptor) {
277   ClassNameResolver name_resolver;
278   return name_resolver.GetClassName(descriptor, true);
279 }
280 
ClassName(const ServiceDescriptor * descriptor)281 std::string ClassName(const ServiceDescriptor* descriptor) {
282   ClassNameResolver name_resolver;
283   return name_resolver.GetClassName(descriptor, true);
284 }
285 
ClassName(const FileDescriptor * descriptor)286 std::string ClassName(const FileDescriptor* descriptor) {
287   ClassNameResolver name_resolver;
288   return name_resolver.GetClassName(descriptor, true);
289 }
290 
291 
ExtraMessageInterfaces(const Descriptor * descriptor)292 std::string ExtraMessageInterfaces(const Descriptor* descriptor) {
293   std::string interfaces = "// @@protoc_insertion_point(message_implements:" +
294                            descriptor->full_name() + ")";
295   return interfaces;
296 }
297 
298 
ExtraBuilderInterfaces(const Descriptor * descriptor)299 std::string ExtraBuilderInterfaces(const Descriptor* descriptor) {
300   std::string interfaces = "// @@protoc_insertion_point(builder_implements:" +
301                            descriptor->full_name() + ")";
302   return interfaces;
303 }
304 
ExtraMessageOrBuilderInterfaces(const Descriptor * descriptor)305 std::string ExtraMessageOrBuilderInterfaces(const Descriptor* descriptor) {
306   std::string interfaces = "// @@protoc_insertion_point(interface_extends:" +
307                            descriptor->full_name() + ")";
308   return interfaces;
309 }
310 
FieldConstantName(const FieldDescriptor * field)311 std::string FieldConstantName(const FieldDescriptor* field) {
312   std::string name = field->name() + "_FIELD_NUMBER";
313   UpperString(&name);
314   return name;
315 }
316 
GetType(const FieldDescriptor * field)317 FieldDescriptor::Type GetType(const FieldDescriptor* field) {
318   return field->type();
319 }
320 
GetJavaType(const FieldDescriptor * field)321 JavaType GetJavaType(const FieldDescriptor* field) {
322   switch (GetType(field)) {
323     case FieldDescriptor::TYPE_INT32:
324     case FieldDescriptor::TYPE_UINT32:
325     case FieldDescriptor::TYPE_SINT32:
326     case FieldDescriptor::TYPE_FIXED32:
327     case FieldDescriptor::TYPE_SFIXED32:
328       return JAVATYPE_INT;
329 
330     case FieldDescriptor::TYPE_INT64:
331     case FieldDescriptor::TYPE_UINT64:
332     case FieldDescriptor::TYPE_SINT64:
333     case FieldDescriptor::TYPE_FIXED64:
334     case FieldDescriptor::TYPE_SFIXED64:
335       return JAVATYPE_LONG;
336 
337     case FieldDescriptor::TYPE_FLOAT:
338       return JAVATYPE_FLOAT;
339 
340     case FieldDescriptor::TYPE_DOUBLE:
341       return JAVATYPE_DOUBLE;
342 
343     case FieldDescriptor::TYPE_BOOL:
344       return JAVATYPE_BOOLEAN;
345 
346     case FieldDescriptor::TYPE_STRING:
347       return JAVATYPE_STRING;
348 
349     case FieldDescriptor::TYPE_BYTES:
350       return JAVATYPE_BYTES;
351 
352     case FieldDescriptor::TYPE_ENUM:
353       return JAVATYPE_ENUM;
354 
355     case FieldDescriptor::TYPE_GROUP:
356     case FieldDescriptor::TYPE_MESSAGE:
357       return JAVATYPE_MESSAGE;
358 
359       // No default because we want the compiler to complain if any new
360       // types are added.
361   }
362 
363   GOOGLE_LOG(FATAL) << "Can't get here.";
364   return JAVATYPE_INT;
365 }
366 
PrimitiveTypeName(JavaType type)367 const char* PrimitiveTypeName(JavaType type) {
368   switch (type) {
369     case JAVATYPE_INT:
370       return "int";
371     case JAVATYPE_LONG:
372       return "long";
373     case JAVATYPE_FLOAT:
374       return "float";
375     case JAVATYPE_DOUBLE:
376       return "double";
377     case JAVATYPE_BOOLEAN:
378       return "boolean";
379     case JAVATYPE_STRING:
380       return "java.lang.String";
381     case JAVATYPE_BYTES:
382       return "com.google.protobuf.ByteString";
383     case JAVATYPE_ENUM:
384       return NULL;
385     case JAVATYPE_MESSAGE:
386       return NULL;
387 
388       // No default because we want the compiler to complain if any new
389       // JavaTypes are added.
390   }
391 
392   GOOGLE_LOG(FATAL) << "Can't get here.";
393   return NULL;
394 }
395 
PrimitiveTypeName(const FieldDescriptor * descriptor)396 const char* PrimitiveTypeName(const FieldDescriptor* descriptor) {
397   return PrimitiveTypeName(GetJavaType(descriptor));
398 }
399 
BoxedPrimitiveTypeName(JavaType type)400 const char* BoxedPrimitiveTypeName(JavaType type) {
401   switch (type) {
402     case JAVATYPE_INT:
403       return "java.lang.Integer";
404     case JAVATYPE_LONG:
405       return "java.lang.Long";
406     case JAVATYPE_FLOAT:
407       return "java.lang.Float";
408     case JAVATYPE_DOUBLE:
409       return "java.lang.Double";
410     case JAVATYPE_BOOLEAN:
411       return "java.lang.Boolean";
412     case JAVATYPE_STRING:
413       return "java.lang.String";
414     case JAVATYPE_BYTES:
415       return "com.google.protobuf.ByteString";
416     case JAVATYPE_ENUM:
417       return NULL;
418     case JAVATYPE_MESSAGE:
419       return NULL;
420 
421       // No default because we want the compiler to complain if any new
422       // JavaTypes are added.
423   }
424 
425   GOOGLE_LOG(FATAL) << "Can't get here.";
426   return NULL;
427 }
428 
BoxedPrimitiveTypeName(const FieldDescriptor * descriptor)429 const char* BoxedPrimitiveTypeName(const FieldDescriptor* descriptor) {
430   return BoxedPrimitiveTypeName(GetJavaType(descriptor));
431 }
432 
GetOneofStoredType(const FieldDescriptor * field)433 std::string GetOneofStoredType(const FieldDescriptor* field) {
434   const JavaType javaType = GetJavaType(field);
435   switch (javaType) {
436     case JAVATYPE_ENUM:
437       return "java.lang.Integer";
438     case JAVATYPE_MESSAGE:
439       return ClassName(field->message_type());
440     default:
441       return BoxedPrimitiveTypeName(javaType);
442   }
443 }
444 
FieldTypeName(FieldDescriptor::Type field_type)445 const char* FieldTypeName(FieldDescriptor::Type field_type) {
446   switch (field_type) {
447     case FieldDescriptor::TYPE_INT32:
448       return "INT32";
449     case FieldDescriptor::TYPE_UINT32:
450       return "UINT32";
451     case FieldDescriptor::TYPE_SINT32:
452       return "SINT32";
453     case FieldDescriptor::TYPE_FIXED32:
454       return "FIXED32";
455     case FieldDescriptor::TYPE_SFIXED32:
456       return "SFIXED32";
457     case FieldDescriptor::TYPE_INT64:
458       return "INT64";
459     case FieldDescriptor::TYPE_UINT64:
460       return "UINT64";
461     case FieldDescriptor::TYPE_SINT64:
462       return "SINT64";
463     case FieldDescriptor::TYPE_FIXED64:
464       return "FIXED64";
465     case FieldDescriptor::TYPE_SFIXED64:
466       return "SFIXED64";
467     case FieldDescriptor::TYPE_FLOAT:
468       return "FLOAT";
469     case FieldDescriptor::TYPE_DOUBLE:
470       return "DOUBLE";
471     case FieldDescriptor::TYPE_BOOL:
472       return "BOOL";
473     case FieldDescriptor::TYPE_STRING:
474       return "STRING";
475     case FieldDescriptor::TYPE_BYTES:
476       return "BYTES";
477     case FieldDescriptor::TYPE_ENUM:
478       return "ENUM";
479     case FieldDescriptor::TYPE_GROUP:
480       return "GROUP";
481     case FieldDescriptor::TYPE_MESSAGE:
482       return "MESSAGE";
483 
484       // No default because we want the compiler to complain if any new
485       // types are added.
486   }
487 
488   GOOGLE_LOG(FATAL) << "Can't get here.";
489   return NULL;
490 }
491 
AllAscii(const std::string & text)492 bool AllAscii(const std::string& text) {
493   for (int i = 0; i < text.size(); i++) {
494     if ((text[i] & 0x80) != 0) {
495       return false;
496     }
497   }
498   return true;
499 }
500 
DefaultValue(const FieldDescriptor * field,bool immutable,ClassNameResolver * name_resolver)501 std::string DefaultValue(const FieldDescriptor* field, bool immutable,
502                          ClassNameResolver* name_resolver) {
503   // Switch on CppType since we need to know which default_value_* method
504   // of FieldDescriptor to call.
505   switch (field->cpp_type()) {
506     case FieldDescriptor::CPPTYPE_INT32:
507       return StrCat(field->default_value_int32());
508     case FieldDescriptor::CPPTYPE_UINT32:
509       // Need to print as a signed int since Java has no unsigned.
510       return StrCat(static_cast<int32>(field->default_value_uint32()));
511     case FieldDescriptor::CPPTYPE_INT64:
512       return StrCat(field->default_value_int64()) + "L";
513     case FieldDescriptor::CPPTYPE_UINT64:
514       return StrCat(static_cast<int64>(field->default_value_uint64())) +
515              "L";
516     case FieldDescriptor::CPPTYPE_DOUBLE: {
517       double value = field->default_value_double();
518       if (value == std::numeric_limits<double>::infinity()) {
519         return "Double.POSITIVE_INFINITY";
520       } else if (value == -std::numeric_limits<double>::infinity()) {
521         return "Double.NEGATIVE_INFINITY";
522       } else if (value != value) {
523         return "Double.NaN";
524       } else {
525         return SimpleDtoa(value) + "D";
526       }
527     }
528     case FieldDescriptor::CPPTYPE_FLOAT: {
529       float value = field->default_value_float();
530       if (value == std::numeric_limits<float>::infinity()) {
531         return "Float.POSITIVE_INFINITY";
532       } else if (value == -std::numeric_limits<float>::infinity()) {
533         return "Float.NEGATIVE_INFINITY";
534       } else if (value != value) {
535         return "Float.NaN";
536       } else {
537         return SimpleFtoa(value) + "F";
538       }
539     }
540     case FieldDescriptor::CPPTYPE_BOOL:
541       return field->default_value_bool() ? "true" : "false";
542     case FieldDescriptor::CPPTYPE_STRING:
543       if (GetType(field) == FieldDescriptor::TYPE_BYTES) {
544         if (field->has_default_value()) {
545           // See comments in Internal.java for gory details.
546           return strings::Substitute(
547               "com.google.protobuf.Internal.bytesDefaultValue(\"$0\")",
548               CEscape(field->default_value_string()));
549         } else {
550           return "com.google.protobuf.ByteString.EMPTY";
551         }
552       } else {
553         if (AllAscii(field->default_value_string())) {
554           // All chars are ASCII.  In this case CEscape() works fine.
555           return "\"" + CEscape(field->default_value_string()) + "\"";
556         } else {
557           // See comments in Internal.java for gory details.
558           return strings::Substitute(
559               "com.google.protobuf.Internal.stringDefaultValue(\"$0\")",
560               CEscape(field->default_value_string()));
561         }
562       }
563 
564     case FieldDescriptor::CPPTYPE_ENUM:
565       return name_resolver->GetClassName(field->enum_type(), immutable) + "." +
566              field->default_value_enum()->name();
567 
568     case FieldDescriptor::CPPTYPE_MESSAGE:
569       return name_resolver->GetClassName(field->message_type(), immutable) +
570              ".getDefaultInstance()";
571 
572       // No default because we want the compiler to complain if any new
573       // types are added.
574   }
575 
576   GOOGLE_LOG(FATAL) << "Can't get here.";
577   return "";
578 }
579 
IsDefaultValueJavaDefault(const FieldDescriptor * field)580 bool IsDefaultValueJavaDefault(const FieldDescriptor* field) {
581   // Switch on CppType since we need to know which default_value_* method
582   // of FieldDescriptor to call.
583   switch (field->cpp_type()) {
584     case FieldDescriptor::CPPTYPE_INT32:
585       return field->default_value_int32() == 0;
586     case FieldDescriptor::CPPTYPE_UINT32:
587       return field->default_value_uint32() == 0;
588     case FieldDescriptor::CPPTYPE_INT64:
589       return field->default_value_int64() == 0L;
590     case FieldDescriptor::CPPTYPE_UINT64:
591       return field->default_value_uint64() == 0L;
592     case FieldDescriptor::CPPTYPE_DOUBLE:
593       return field->default_value_double() == 0.0;
594     case FieldDescriptor::CPPTYPE_FLOAT:
595       return field->default_value_float() == 0.0;
596     case FieldDescriptor::CPPTYPE_BOOL:
597       return field->default_value_bool() == false;
598     case FieldDescriptor::CPPTYPE_ENUM:
599       return field->default_value_enum()->number() == 0;
600     case FieldDescriptor::CPPTYPE_STRING:
601     case FieldDescriptor::CPPTYPE_MESSAGE:
602       return false;
603 
604       // No default because we want the compiler to complain if any new
605       // types are added.
606   }
607 
608   GOOGLE_LOG(FATAL) << "Can't get here.";
609   return false;
610 }
611 
IsByteStringWithCustomDefaultValue(const FieldDescriptor * field)612 bool IsByteStringWithCustomDefaultValue(const FieldDescriptor* field) {
613   return GetJavaType(field) == JAVATYPE_BYTES &&
614          field->default_value_string() != "";
615 }
616 
617 const char* bit_masks[] = {
618     "0x00000001", "0x00000002", "0x00000004", "0x00000008",
619     "0x00000010", "0x00000020", "0x00000040", "0x00000080",
620 
621     "0x00000100", "0x00000200", "0x00000400", "0x00000800",
622     "0x00001000", "0x00002000", "0x00004000", "0x00008000",
623 
624     "0x00010000", "0x00020000", "0x00040000", "0x00080000",
625     "0x00100000", "0x00200000", "0x00400000", "0x00800000",
626 
627     "0x01000000", "0x02000000", "0x04000000", "0x08000000",
628     "0x10000000", "0x20000000", "0x40000000", "0x80000000",
629 };
630 
GetBitFieldName(int index)631 std::string GetBitFieldName(int index) {
632   std::string varName = "bitField";
633   varName += StrCat(index);
634   varName += "_";
635   return varName;
636 }
637 
GetBitFieldNameForBit(int bitIndex)638 std::string GetBitFieldNameForBit(int bitIndex) {
639   return GetBitFieldName(bitIndex / 32);
640 }
641 
642 namespace {
643 
GenerateGetBitInternal(const std::string & prefix,int bitIndex)644 std::string GenerateGetBitInternal(const std::string& prefix, int bitIndex) {
645   std::string varName = prefix + GetBitFieldNameForBit(bitIndex);
646   int bitInVarIndex = bitIndex % 32;
647 
648   std::string mask = bit_masks[bitInVarIndex];
649   std::string result = "((" + varName + " & " + mask + ") != 0)";
650   return result;
651 }
652 
GenerateSetBitInternal(const std::string & prefix,int bitIndex)653 std::string GenerateSetBitInternal(const std::string& prefix, int bitIndex) {
654   std::string varName = prefix + GetBitFieldNameForBit(bitIndex);
655   int bitInVarIndex = bitIndex % 32;
656 
657   std::string mask = bit_masks[bitInVarIndex];
658   std::string result = varName + " |= " + mask;
659   return result;
660 }
661 
662 }  // namespace
663 
GenerateGetBit(int bitIndex)664 std::string GenerateGetBit(int bitIndex) {
665   return GenerateGetBitInternal("", bitIndex);
666 }
667 
GenerateSetBit(int bitIndex)668 std::string GenerateSetBit(int bitIndex) {
669   return GenerateSetBitInternal("", bitIndex);
670 }
671 
GenerateClearBit(int bitIndex)672 std::string GenerateClearBit(int bitIndex) {
673   std::string varName = GetBitFieldNameForBit(bitIndex);
674   int bitInVarIndex = bitIndex % 32;
675 
676   std::string mask = bit_masks[bitInVarIndex];
677   std::string result = varName + " = (" + varName + " & ~" + mask + ")";
678   return result;
679 }
680 
GenerateGetBitFromLocal(int bitIndex)681 std::string GenerateGetBitFromLocal(int bitIndex) {
682   return GenerateGetBitInternal("from_", bitIndex);
683 }
684 
GenerateSetBitToLocal(int bitIndex)685 std::string GenerateSetBitToLocal(int bitIndex) {
686   return GenerateSetBitInternal("to_", bitIndex);
687 }
688 
GenerateGetBitMutableLocal(int bitIndex)689 std::string GenerateGetBitMutableLocal(int bitIndex) {
690   return GenerateGetBitInternal("mutable_", bitIndex);
691 }
692 
GenerateSetBitMutableLocal(int bitIndex)693 std::string GenerateSetBitMutableLocal(int bitIndex) {
694   return GenerateSetBitInternal("mutable_", bitIndex);
695 }
696 
IsReferenceType(JavaType type)697 bool IsReferenceType(JavaType type) {
698   switch (type) {
699     case JAVATYPE_INT:
700       return false;
701     case JAVATYPE_LONG:
702       return false;
703     case JAVATYPE_FLOAT:
704       return false;
705     case JAVATYPE_DOUBLE:
706       return false;
707     case JAVATYPE_BOOLEAN:
708       return false;
709     case JAVATYPE_STRING:
710       return true;
711     case JAVATYPE_BYTES:
712       return true;
713     case JAVATYPE_ENUM:
714       return true;
715     case JAVATYPE_MESSAGE:
716       return true;
717 
718       // No default because we want the compiler to complain if any new
719       // JavaTypes are added.
720   }
721 
722   GOOGLE_LOG(FATAL) << "Can't get here.";
723   return false;
724 }
725 
GetCapitalizedType(const FieldDescriptor * field,bool immutable)726 const char* GetCapitalizedType(const FieldDescriptor* field, bool immutable) {
727   switch (GetType(field)) {
728     case FieldDescriptor::TYPE_INT32:
729       return "Int32";
730     case FieldDescriptor::TYPE_UINT32:
731       return "UInt32";
732     case FieldDescriptor::TYPE_SINT32:
733       return "SInt32";
734     case FieldDescriptor::TYPE_FIXED32:
735       return "Fixed32";
736     case FieldDescriptor::TYPE_SFIXED32:
737       return "SFixed32";
738     case FieldDescriptor::TYPE_INT64:
739       return "Int64";
740     case FieldDescriptor::TYPE_UINT64:
741       return "UInt64";
742     case FieldDescriptor::TYPE_SINT64:
743       return "SInt64";
744     case FieldDescriptor::TYPE_FIXED64:
745       return "Fixed64";
746     case FieldDescriptor::TYPE_SFIXED64:
747       return "SFixed64";
748     case FieldDescriptor::TYPE_FLOAT:
749       return "Float";
750     case FieldDescriptor::TYPE_DOUBLE:
751       return "Double";
752     case FieldDescriptor::TYPE_BOOL:
753       return "Bool";
754     case FieldDescriptor::TYPE_STRING:
755       return "String";
756     case FieldDescriptor::TYPE_BYTES: {
757       return "Bytes";
758     }
759     case FieldDescriptor::TYPE_ENUM:
760       return "Enum";
761     case FieldDescriptor::TYPE_GROUP:
762       return "Group";
763     case FieldDescriptor::TYPE_MESSAGE:
764       return "Message";
765 
766       // No default because we want the compiler to complain if any new
767       // types are added.
768   }
769 
770   GOOGLE_LOG(FATAL) << "Can't get here.";
771   return NULL;
772 }
773 
774 // For encodings with fixed sizes, returns that size in bytes.  Otherwise
775 // returns -1.
FixedSize(FieldDescriptor::Type type)776 int FixedSize(FieldDescriptor::Type type) {
777   switch (type) {
778     case FieldDescriptor::TYPE_INT32:
779       return -1;
780     case FieldDescriptor::TYPE_INT64:
781       return -1;
782     case FieldDescriptor::TYPE_UINT32:
783       return -1;
784     case FieldDescriptor::TYPE_UINT64:
785       return -1;
786     case FieldDescriptor::TYPE_SINT32:
787       return -1;
788     case FieldDescriptor::TYPE_SINT64:
789       return -1;
790     case FieldDescriptor::TYPE_FIXED32:
791       return WireFormatLite::kFixed32Size;
792     case FieldDescriptor::TYPE_FIXED64:
793       return WireFormatLite::kFixed64Size;
794     case FieldDescriptor::TYPE_SFIXED32:
795       return WireFormatLite::kSFixed32Size;
796     case FieldDescriptor::TYPE_SFIXED64:
797       return WireFormatLite::kSFixed64Size;
798     case FieldDescriptor::TYPE_FLOAT:
799       return WireFormatLite::kFloatSize;
800     case FieldDescriptor::TYPE_DOUBLE:
801       return WireFormatLite::kDoubleSize;
802 
803     case FieldDescriptor::TYPE_BOOL:
804       return WireFormatLite::kBoolSize;
805     case FieldDescriptor::TYPE_ENUM:
806       return -1;
807 
808     case FieldDescriptor::TYPE_STRING:
809       return -1;
810     case FieldDescriptor::TYPE_BYTES:
811       return -1;
812     case FieldDescriptor::TYPE_GROUP:
813       return -1;
814     case FieldDescriptor::TYPE_MESSAGE:
815       return -1;
816 
817       // No default because we want the compiler to complain if any new
818       // types are added.
819   }
820   GOOGLE_LOG(FATAL) << "Can't get here.";
821   return -1;
822 }
823 
824 // Sort the fields of the given Descriptor by number into a new[]'d array
825 // and return it. The caller should delete the returned array.
SortFieldsByNumber(const Descriptor * descriptor)826 const FieldDescriptor** SortFieldsByNumber(const Descriptor* descriptor) {
827   const FieldDescriptor** fields =
828       new const FieldDescriptor*[descriptor->field_count()];
829   for (int i = 0; i < descriptor->field_count(); i++) {
830     fields[i] = descriptor->field(i);
831   }
832   std::sort(fields, fields + descriptor->field_count(),
833             FieldOrderingByNumber());
834   return fields;
835 }
836 
837 // Returns true if the message type has any required fields.  If it doesn't,
838 // we can optimize out calls to its isInitialized() method.
839 //
840 // already_seen is used to avoid checking the same type multiple times
841 // (and also to protect against recursion).
HasRequiredFields(const Descriptor * type,std::unordered_set<const Descriptor * > * already_seen)842 bool HasRequiredFields(const Descriptor* type,
843                        std::unordered_set<const Descriptor*>* already_seen) {
844   if (already_seen->count(type) > 0) {
845     // The type is already in cache.  This means that either:
846     // a. The type has no required fields.
847     // b. We are in the midst of checking if the type has required fields,
848     //    somewhere up the stack.  In this case, we know that if the type
849     //    has any required fields, they'll be found when we return to it,
850     //    and the whole call to HasRequiredFields() will return true.
851     //    Therefore, we don't have to check if this type has required fields
852     //    here.
853     return false;
854   }
855   already_seen->insert(type);
856 
857   // If the type has extensions, an extension with message type could contain
858   // required fields, so we have to be conservative and assume such an
859   // extension exists.
860   if (type->extension_range_count() > 0) return true;
861 
862   for (int i = 0; i < type->field_count(); i++) {
863     const FieldDescriptor* field = type->field(i);
864     if (field->is_required()) {
865       return true;
866     }
867     if (GetJavaType(field) == JAVATYPE_MESSAGE) {
868       if (HasRequiredFields(field->message_type(), already_seen)) {
869         return true;
870       }
871     }
872   }
873 
874   return false;
875 }
876 
HasRequiredFields(const Descriptor * type)877 bool HasRequiredFields(const Descriptor* type) {
878   std::unordered_set<const Descriptor*> already_seen;
879   return HasRequiredFields(type, &already_seen);
880 }
881 
HasRepeatedFields(const Descriptor * descriptor)882 bool HasRepeatedFields(const Descriptor* descriptor) {
883   for (int i = 0; i < descriptor->field_count(); ++i) {
884     const FieldDescriptor* field = descriptor->field(i);
885     if (field->is_repeated()) {
886       return true;
887     }
888   }
889   return false;
890 }
891 
892 // Encode an unsigned 32-bit value into a sequence of UTF-16 characters.
893 //
894 // If the value is in [0x0000, 0xD7FF], we encode it with a single character
895 // with the same numeric value.
896 //
897 // If the value is larger than 0xD7FF, we encode its lowest 13 bits into a
898 // character in the range [0xE000, 0xFFFF] by combining these 13 bits with
899 // 0xE000 using logic-or. Then we shift the value to the right by 13 bits, and
900 // encode the remaining value by repeating this same process until we get to
901 // a value in [0x0000, 0xD7FF] where we will encode it using a character with
902 // the same numeric value.
903 //
904 // Note that we only use code points in [0x0000, 0xD7FF] and [0xE000, 0xFFFF].
905 // There will be no surrogate pairs in the encoded character sequence.
WriteUInt32ToUtf16CharSequence(uint32 number,std::vector<uint16> * output)906 void WriteUInt32ToUtf16CharSequence(uint32 number,
907                                     std::vector<uint16>* output) {
908   // For values in [0x0000, 0xD7FF], only use one char to encode it.
909   if (number < 0xD800) {
910     output->push_back(static_cast<uint16>(number));
911     return;
912   }
913   // Encode into multiple chars. All except the last char will be in the range
914   // [0xE000, 0xFFFF], and the last char will be in the range [0x0000, 0xD7FF].
915   // Note that we don't use any value in range [0xD800, 0xDFFF] because they
916   // have to come in pairs and the encoding is just more space-efficient w/o
917   // them.
918   while (number >= 0xD800) {
919     // [0xE000, 0xFFFF] can represent 13 bits of info.
920     output->push_back(static_cast<uint16>(0xE000 | (number & 0x1FFF)));
921     number >>= 13;
922   }
923   output->push_back(static_cast<uint16>(number));
924 }
925 
GetExperimentalJavaFieldTypeForSingular(const FieldDescriptor * field)926 int GetExperimentalJavaFieldTypeForSingular(const FieldDescriptor* field) {
927   // j/c/g/protobuf/FieldType.java lists field types in a slightly different
928   // order from FieldDescriptor::Type so we can't do a simple cast.
929   //
930   // TODO(xiaofeng): Make j/c/g/protobuf/FieldType.java follow the same order.
931   int result = field->type();
932   if (result == FieldDescriptor::TYPE_GROUP) {
933     return 17;
934   } else if (result < FieldDescriptor::TYPE_GROUP) {
935     return result - 1;
936   } else {
937     return result - 2;
938   }
939 }
940 
GetExperimentalJavaFieldTypeForRepeated(const FieldDescriptor * field)941 int GetExperimentalJavaFieldTypeForRepeated(const FieldDescriptor* field) {
942   if (field->type() == FieldDescriptor::TYPE_GROUP) {
943     return 49;
944   } else {
945     return GetExperimentalJavaFieldTypeForSingular(field) + 18;
946   }
947 }
948 
GetExperimentalJavaFieldTypeForPacked(const FieldDescriptor * field)949 int GetExperimentalJavaFieldTypeForPacked(const FieldDescriptor* field) {
950   int result = field->type();
951   if (result < FieldDescriptor::TYPE_STRING) {
952     return result + 34;
953   } else if (result > FieldDescriptor::TYPE_BYTES) {
954     return result + 30;
955   } else {
956     GOOGLE_LOG(FATAL) << field->full_name() << " can't be packed.";
957     return 0;
958   }
959 }
960 
GetExperimentalJavaFieldType(const FieldDescriptor * field)961 int GetExperimentalJavaFieldType(const FieldDescriptor* field) {
962   static const int kMapFieldType = 50;
963   static const int kOneofFieldTypeOffset = 51;
964   static const int kRequiredBit = 0x100;
965   static const int kUtf8CheckBit = 0x200;
966   static const int kCheckInitialized = 0x400;
967   static const int kMapWithProto2EnumValue = 0x800;
968   int extra_bits = field->is_required() ? kRequiredBit : 0;
969   if (field->type() == FieldDescriptor::TYPE_STRING && CheckUtf8(field)) {
970     extra_bits |= kUtf8CheckBit;
971   }
972   if (field->is_required() || (GetJavaType(field) == JAVATYPE_MESSAGE &&
973                                HasRequiredFields(field->message_type()))) {
974     extra_bits |= kCheckInitialized;
975   }
976 
977   if (field->is_map()) {
978     if (SupportFieldPresence(field->file())) {
979       const FieldDescriptor* value =
980           field->message_type()->FindFieldByName("value");
981       if (GetJavaType(value) == JAVATYPE_ENUM) {
982         extra_bits |= kMapWithProto2EnumValue;
983       }
984     }
985     return kMapFieldType | extra_bits;
986   } else if (field->is_packed()) {
987     return GetExperimentalJavaFieldTypeForPacked(field);
988   } else if (field->is_repeated()) {
989     return GetExperimentalJavaFieldTypeForRepeated(field) | extra_bits;
990   } else if (field->containing_oneof() != NULL) {
991     return (GetExperimentalJavaFieldTypeForSingular(field) +
992             kOneofFieldTypeOffset) |
993            extra_bits;
994   } else {
995     return GetExperimentalJavaFieldTypeForSingular(field) | extra_bits;
996   }
997 }
998 
999 // Escape a UTF-16 character to be embedded in a Java string.
EscapeUtf16ToString(uint16 code,std::string * output)1000 void EscapeUtf16ToString(uint16 code, std::string* output) {
1001   if (code == '\t') {
1002     output->append("\\t");
1003   } else if (code == '\b') {
1004     output->append("\\b");
1005   } else if (code == '\n') {
1006     output->append("\\n");
1007   } else if (code == '\r') {
1008     output->append("\\r");
1009   } else if (code == '\f') {
1010     output->append("\\f");
1011   } else if (code == '\'') {
1012     output->append("\\'");
1013   } else if (code == '\"') {
1014     output->append("\\\"");
1015   } else if (code == '\\') {
1016     output->append("\\\\");
1017   } else if (code >= 0x20 && code <= 0x7f) {
1018     output->push_back(static_cast<char>(code));
1019   } else {
1020     output->append(StringPrintf("\\u%04x", code));
1021   }
1022 }
1023 
1024 }  // namespace java
1025 }  // namespace compiler
1026 }  // namespace protobuf
1027 }  // namespace google
1028