1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // Implements the Protocol Compiler front-end such that it may be reused by
36 // custom compilers written to support other languages.
37 
38 #ifndef GOOGLE_PROTOBUF_COMPILER_COMMAND_LINE_INTERFACE_H__
39 #define GOOGLE_PROTOBUF_COMPILER_COMMAND_LINE_INTERFACE_H__
40 
41 #include <google/protobuf/stubs/common.h>
42 #include <string>
43 #include <vector>
44 #include <map>
45 #include <set>
46 #include <utility>
47 
48 namespace google {
49 namespace protobuf {
50 
51 class Descriptor;            // descriptor.h
52 class DescriptorPool;        // descriptor.h
53 class FileDescriptor;        // descriptor.h
54 class FileDescriptorProto;   // descriptor.pb.h
55 template<typename T> class RepeatedPtrField;  // repeated_field.h
56 
57 namespace compiler {
58 
59 class CodeGenerator;        // code_generator.h
60 class GeneratorContext;      // code_generator.h
61 class DiskSourceTree;       // importer.h
62 
63 // This class implements the command-line interface to the protocol compiler.
64 // It is designed to make it very easy to create a custom protocol compiler
65 // supporting the languages of your choice.  For example, if you wanted to
66 // create a custom protocol compiler binary which includes both the regular
67 // C++ support plus support for your own custom output "Foo", you would
68 // write a class "FooGenerator" which implements the CodeGenerator interface,
69 // then write a main() procedure like this:
70 //
71 //   int main(int argc, char* argv[]) {
72 //     google::protobuf::compiler::CommandLineInterface cli;
73 //
74 //     // Support generation of C++ source and headers.
75 //     google::protobuf::compiler::cpp::CppGenerator cpp_generator;
76 //     cli.RegisterGenerator("--cpp_out", &cpp_generator,
77 //       "Generate C++ source and header.");
78 //
79 //     // Support generation of Foo code.
80 //     FooGenerator foo_generator;
81 //     cli.RegisterGenerator("--foo_out", &foo_generator,
82 //       "Generate Foo file.");
83 //
84 //     return cli.Run(argc, argv);
85 //   }
86 //
87 // The compiler is invoked with syntax like:
88 //   protoc --cpp_out=outdir --foo_out=outdir --proto_path=src src/foo.proto
89 //
90 // For a full description of the command-line syntax, invoke it with --help.
91 class LIBPROTOC_EXPORT CommandLineInterface {
92  public:
93   CommandLineInterface();
94   ~CommandLineInterface();
95 
96   // Register a code generator for a language.
97   //
98   // Parameters:
99   // * flag_name: The command-line flag used to specify an output file of
100   //   this type.  The name must start with a '-'.  If the name is longer
101   //   than one letter, it must start with two '-'s.
102   // * generator: The CodeGenerator which will be called to generate files
103   //   of this type.
104   // * help_text: Text describing this flag in the --help output.
105   //
106   // Some generators accept extra parameters.  You can specify this parameter
107   // on the command-line by placing it before the output directory, separated
108   // by a colon:
109   //   protoc --foo_out=enable_bar:outdir
110   // The text before the colon is passed to CodeGenerator::Generate() as the
111   // "parameter".
112   void RegisterGenerator(const string& flag_name,
113                          CodeGenerator* generator,
114                          const string& help_text);
115 
116   // Register a code generator for a language.
117   // Besides flag_name you can specify another option_flag_name that could be
118   // used to pass extra parameters to the registered code generator.
119   // Suppose you have registered a generator by calling:
120   //   command_line_interface.RegisterGenerator("--foo_out", "--foo_opt", ...)
121   // Then you could invoke the compiler with a command like:
122   //   protoc --foo_out=enable_bar:outdir --foo_opt=enable_baz
123   // This will pass "enable_bar,enable_baz" as the parameter to the generator.
124   void RegisterGenerator(const string& flag_name,
125                          const string& option_flag_name,
126                          CodeGenerator* generator,
127                          const string& help_text);
128 
129   // Enables "plugins".  In this mode, if a command-line flag ends with "_out"
130   // but does not match any registered generator, the compiler will attempt to
131   // find a "plugin" to implement the generator.  Plugins are just executables.
132   // They should live somewhere in the PATH.
133   //
134   // The compiler determines the executable name to search for by concatenating
135   // exe_name_prefix with the unrecognized flag name, removing "_out".  So, for
136   // example, if exe_name_prefix is "protoc-" and you pass the flag --foo_out,
137   // the compiler will try to run the program "protoc-foo".
138   //
139   // The plugin program should implement the following usage:
140   //   plugin [--out=OUTDIR] [--parameter=PARAMETER] PROTO_FILES < DESCRIPTORS
141   // --out indicates the output directory (as passed to the --foo_out
142   // parameter); if omitted, the current directory should be used.  --parameter
143   // gives the generator parameter, if any was provided.  The PROTO_FILES list
144   // the .proto files which were given on the compiler command-line; these are
145   // the files for which the plugin is expected to generate output code.
146   // Finally, DESCRIPTORS is an encoded FileDescriptorSet (as defined in
147   // descriptor.proto).  This is piped to the plugin's stdin.  The set will
148   // include descriptors for all the files listed in PROTO_FILES as well as
149   // all files that they import.  The plugin MUST NOT attempt to read the
150   // PROTO_FILES directly -- it must use the FileDescriptorSet.
151   //
152   // The plugin should generate whatever files are necessary, as code generators
153   // normally do.  It should write the names of all files it generates to
154   // stdout.  The names should be relative to the output directory, NOT absolute
155   // names or relative to the current directory.  If any errors occur, error
156   // messages should be written to stderr.  If an error is fatal, the plugin
157   // should exit with a non-zero exit code.
158   void AllowPlugins(const string& exe_name_prefix);
159 
160   // Run the Protocol Compiler with the given command-line parameters.
161   // Returns the error code which should be returned by main().
162   //
163   // It may not be safe to call Run() in a multi-threaded environment because
164   // it calls strerror().  I'm not sure why you'd want to do this anyway.
165   int Run(int argc, const char* const argv[]);
166 
167   // Call SetInputsAreCwdRelative(true) if the input files given on the command
168   // line should be interpreted relative to the proto import path specified
169   // using --proto_path or -I flags.  Otherwise, input file names will be
170   // interpreted relative to the current working directory (or as absolute
171   // paths if they start with '/'), though they must still reside inside
172   // a directory given by --proto_path or the compiler will fail.  The latter
173   // mode is generally more intuitive and easier to use, especially e.g. when
174   // defining implicit rules in Makefiles.
SetInputsAreProtoPathRelative(bool enable)175   void SetInputsAreProtoPathRelative(bool enable) {
176     inputs_are_proto_path_relative_ = enable;
177   }
178 
179   // Provides some text which will be printed when the --version flag is
180   // used.  The version of libprotoc will also be printed on the next line
181   // after this text.
SetVersionInfo(const string & text)182   void SetVersionInfo(const string& text) {
183     version_info_ = text;
184   }
185 
186 
187  private:
188   // -----------------------------------------------------------------
189 
190   class ErrorPrinter;
191   class GeneratorContextImpl;
192   class MemoryOutputStream;
193 
194   // Clear state from previous Run().
195   void Clear();
196 
197   // Remaps each file in input_files_ so that it is relative to one of the
198   // directories in proto_path_.  Returns false if an error occurred.  This
199   // is only used if inputs_are_proto_path_relative_ is false.
200   bool MakeInputsBeProtoPathRelative(
201     DiskSourceTree* source_tree);
202 
203   // Return status for ParseArguments() and InterpretArgument().
204   enum ParseArgumentStatus {
205     PARSE_ARGUMENT_DONE_AND_CONTINUE,
206     PARSE_ARGUMENT_DONE_AND_EXIT,
207     PARSE_ARGUMENT_FAIL
208   };
209 
210   // Parse all command-line arguments.
211   ParseArgumentStatus ParseArguments(int argc, const char* const argv[]);
212 
213   // Parses a command-line argument into a name/value pair.  Returns
214   // true if the next argument in the argv should be used as the value,
215   // false otherwise.
216   //
217   // Exmaples:
218   //   "-Isrc/protos" ->
219   //     name = "-I", value = "src/protos"
220   //   "--cpp_out=src/foo.pb2.cc" ->
221   //     name = "--cpp_out", value = "src/foo.pb2.cc"
222   //   "foo.proto" ->
223   //     name = "", value = "foo.proto"
224   bool ParseArgument(const char* arg, string* name, string* value);
225 
226   // Interprets arguments parsed with ParseArgument.
227   ParseArgumentStatus InterpretArgument(const string& name,
228                                         const string& value);
229 
230   // Print the --help text to stderr.
231   void PrintHelpText();
232 
233   // Generate the given output file from the given input.
234   struct OutputDirective;  // see below
235   bool GenerateOutput(const vector<const FileDescriptor*>& parsed_files,
236                       const OutputDirective& output_directive,
237                       GeneratorContext* generator_context);
238   bool GeneratePluginOutput(const vector<const FileDescriptor*>& parsed_files,
239                             const string& plugin_name,
240                             const string& parameter,
241                             GeneratorContext* generator_context,
242                             string* error);
243 
244   // Implements --encode and --decode.
245   bool EncodeOrDecode(const DescriptorPool* pool);
246 
247   // Implements the --descriptor_set_out option.
248   bool WriteDescriptorSet(const vector<const FileDescriptor*> parsed_files);
249 
250   // Get all transitive dependencies of the given file (including the file
251   // itself), adding them to the given list of FileDescriptorProtos.  The
252   // protos will be ordered such that every file is listed before any file that
253   // depends on it, so that you can call DescriptorPool::BuildFile() on them
254   // in order.  Any files in *already_seen will not be added, and each file
255   // added will be inserted into *already_seen.  If include_source_code_info is
256   // true then include the source code information in the FileDescriptorProtos.
257   static void GetTransitiveDependencies(
258       const FileDescriptor* file,
259       bool include_source_code_info,
260       set<const FileDescriptor*>* already_seen,
261       RepeatedPtrField<FileDescriptorProto>* output);
262 
263   // Implements the --print_free_field_numbers. This function prints free field
264   // numbers into stdout for the message and it's nested message types in
265   // post-order, i.e. nested types first. Printed range are left-right
266   // inclusive, i.e. [a, b].
267   //
268   // Groups:
269   // For historical reasons, groups are considered to share the same
270   // field number space with the parent message, thus it will not print free
271   // field numbers for groups. The field numbers used in the groups are
272   // excluded in the free field numbers of the parent message.
273   //
274   // Extension Ranges:
275   // Extension ranges are considered ocuppied field numbers and they will not be
276   // listed as free numbers in the output.
277   void PrintFreeFieldNumbers(const Descriptor* descriptor);
278 
279   // -----------------------------------------------------------------
280 
281   // The name of the executable as invoked (i.e. argv[0]).
282   string executable_name_;
283 
284   // Version info set with SetVersionInfo().
285   string version_info_;
286 
287   // Registered generators.
288   struct GeneratorInfo {
289     string flag_name;
290     string option_flag_name;
291     CodeGenerator* generator;
292     string help_text;
293   };
294   typedef map<string, GeneratorInfo> GeneratorMap;
295   GeneratorMap generators_by_flag_name_;
296   GeneratorMap generators_by_option_name_;
297   // A map from generator names to the parameters specified using the option
298   // flag. For example, if the user invokes the compiler with:
299   //   protoc --foo_out=outputdir --foo_opt=enable_bar ...
300   // Then there will be an entry ("--foo_out", "enable_bar") in this map.
301   map<string, string> generator_parameters_;
302 
303   // See AllowPlugins().  If this is empty, plugins aren't allowed.
304   string plugin_prefix_;
305 
306   // Maps specific plugin names to files.  When executing a plugin, this map
307   // is searched first to find the plugin executable.  If not found here, the
308   // PATH (or other OS-specific search strategy) is searched.
309   map<string, string> plugins_;
310 
311   // Stuff parsed from command line.
312   enum Mode {
313     MODE_COMPILE,  // Normal mode:  parse .proto files and compile them.
314     MODE_ENCODE,   // --encode:  read text from stdin, write binary to stdout.
315     MODE_DECODE,   // --decode:  read binary from stdin, write text to stdout.
316     MODE_PRINT,    // Print mode: print info of the given .proto files and exit.
317   };
318 
319   Mode mode_;
320 
321   enum PrintMode {
322     PRINT_NONE,               // Not in MODE_PRINT
323     PRINT_FREE_FIELDS,        // --print_free_fields
324   };
325 
326   PrintMode print_mode_;
327 
328   enum ErrorFormat {
329     ERROR_FORMAT_GCC,   // GCC error output format (default).
330     ERROR_FORMAT_MSVS   // Visual Studio output (--error_format=msvs).
331   };
332 
333   ErrorFormat error_format_;
334 
335   vector<pair<string, string> > proto_path_;  // Search path for proto files.
336   vector<string> input_files_;                // Names of the input proto files.
337 
338   // output_directives_ lists all the files we are supposed to output and what
339   // generator to use for each.
340   struct OutputDirective {
341     string name;                // E.g. "--foo_out"
342     CodeGenerator* generator;   // NULL for plugins
343     string parameter;
344     string output_location;
345   };
346   vector<OutputDirective> output_directives_;
347 
348   // When using --encode or --decode, this names the type we are encoding or
349   // decoding.  (Empty string indicates --decode_raw.)
350   string codec_type_;
351 
352   // If --descriptor_set_out was given, this is the filename to which the
353   // FileDescriptorSet should be written.  Otherwise, empty.
354   string descriptor_set_name_;
355 
356   // True if --include_imports was given, meaning that we should
357   // write all transitive dependencies to the DescriptorSet.  Otherwise, only
358   // the .proto files listed on the command-line are added.
359   bool imports_in_descriptor_set_;
360 
361   // True if --include_source_info was given, meaning that we should not strip
362   // SourceCodeInfo from the DescriptorSet.
363   bool source_info_in_descriptor_set_;
364 
365   // Was the --disallow_services flag used?
366   bool disallow_services_;
367 
368   // See SetInputsAreProtoPathRelative().
369   bool inputs_are_proto_path_relative_;
370 
371   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CommandLineInterface);
372 };
373 
374 }  // namespace compiler
375 }  // namespace protobuf
376 
377 }  // namespace google
378 #endif  // GOOGLE_PROTOBUF_COMPILER_COMMAND_LINE_INTERFACE_H__
379