1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/compiler/command_line_interface.h>
36 
37 
38 #include <google/protobuf/stubs/platform_macros.h>
39 
40 #include <stdio.h>
41 #include <sys/types.h>
42 #ifdef major
43 #undef major
44 #endif
45 #ifdef minor
46 #undef minor
47 #endif
48 #include <fcntl.h>
49 #include <sys/stat.h>
50 #ifndef _MSC_VER
51 #include <unistd.h>
52 #endif
53 #include <ctype.h>
54 #include <errno.h>
55 #include <fstream>
56 #include <iostream>
57 
58 #include <limits.h>  //For PATH_MAX
59 
60 #include <memory>
61 
62 #ifdef __APPLE__
63 #include <mach-o/dyld.h>
64 #endif
65 
66 #include <google/protobuf/stubs/common.h>
67 #include <google/protobuf/stubs/logging.h>
68 #include <google/protobuf/stubs/stringprintf.h>
69 #include <google/protobuf/compiler/subprocess.h>
70 #include <google/protobuf/compiler/zip_writer.h>
71 #include <google/protobuf/compiler/plugin.pb.h>
72 #include <google/protobuf/compiler/code_generator.h>
73 #include <google/protobuf/compiler/importer.h>
74 #include <google/protobuf/io/io_win32.h>
75 #include <google/protobuf/io/coded_stream.h>
76 #include <google/protobuf/io/printer.h>
77 #include <google/protobuf/io/zero_copy_stream_impl.h>
78 #include <google/protobuf/descriptor.h>
79 #include <google/protobuf/dynamic_message.h>
80 #include <google/protobuf/text_format.h>
81 #include <google/protobuf/stubs/strutil.h>
82 #include <google/protobuf/stubs/substitute.h>
83 #include <google/protobuf/stubs/map_util.h>
84 #include <google/protobuf/stubs/stl_util.h>
85 
86 
87 #include <google/protobuf/port_def.inc>
88 
89 namespace google {
90 namespace protobuf {
91 namespace compiler {
92 
93 #ifndef O_BINARY
94 #ifdef _O_BINARY
95 #define O_BINARY _O_BINARY
96 #else
97 #define O_BINARY 0  // If this isn't defined, the platform doesn't need it.
98 #endif
99 #endif
100 
101 namespace {
102 #if defined(_WIN32)
103 // DO NOT include <io.h>, instead create functions in io_win32.{h,cc} and import
104 // them like we do below.
105 using google::protobuf::io::win32::access;
106 using google::protobuf::io::win32::close;
107 using google::protobuf::io::win32::mkdir;
108 using google::protobuf::io::win32::open;
109 using google::protobuf::io::win32::setmode;
110 using google::protobuf::io::win32::write;
111 #endif
112 
113 static const char* kDefaultDirectDependenciesViolationMsg =
114     "File is imported but not declared in --direct_dependencies: %s";
115 
116 // Returns true if the text looks like a Windows-style absolute path, starting
117 // with a drive letter.  Example:  "C:\foo".  TODO(kenton):  Share this with
118 // copy in importer.cc?
IsWindowsAbsolutePath(const std::string & text)119 static bool IsWindowsAbsolutePath(const std::string& text) {
120 #if defined(_WIN32) || defined(__CYGWIN__)
121   return text.size() >= 3 && text[1] == ':' && isalpha(text[0]) &&
122          (text[2] == '/' || text[2] == '\\') && text.find_last_of(':') == 1;
123 #else
124   return false;
125 #endif
126 }
127 
SetFdToTextMode(int fd)128 void SetFdToTextMode(int fd) {
129 #ifdef _WIN32
130   if (setmode(fd, _O_TEXT) == -1) {
131     // This should never happen, I think.
132     GOOGLE_LOG(WARNING) << "setmode(" << fd << ", _O_TEXT): " << strerror(errno);
133   }
134 #endif
135   // (Text and binary are the same on non-Windows platforms.)
136 }
137 
SetFdToBinaryMode(int fd)138 void SetFdToBinaryMode(int fd) {
139 #ifdef _WIN32
140   if (setmode(fd, _O_BINARY) == -1) {
141     // This should never happen, I think.
142     GOOGLE_LOG(WARNING) << "setmode(" << fd << ", _O_BINARY): " << strerror(errno);
143   }
144 #endif
145   // (Text and binary are the same on non-Windows platforms.)
146 }
147 
AddTrailingSlash(std::string * path)148 void AddTrailingSlash(std::string* path) {
149   if (!path->empty() && path->at(path->size() - 1) != '/') {
150     path->push_back('/');
151   }
152 }
153 
VerifyDirectoryExists(const std::string & path)154 bool VerifyDirectoryExists(const std::string& path) {
155   if (path.empty()) return true;
156 
157   if (access(path.c_str(), F_OK) == -1) {
158     std::cerr << path << ": " << strerror(errno) << std::endl;
159     return false;
160   } else {
161     return true;
162   }
163 }
164 
165 // Try to create the parent directory of the given file, creating the parent's
166 // parent if necessary, and so on.  The full file name is actually
167 // (prefix + filename), but we assume |prefix| already exists and only create
168 // directories listed in |filename|.
TryCreateParentDirectory(const std::string & prefix,const std::string & filename)169 bool TryCreateParentDirectory(const std::string& prefix,
170                               const std::string& filename) {
171   // Recursively create parent directories to the output file.
172   // On Windows, both '/' and '\' are valid path separators.
173   std::vector<std::string> parts =
174       Split(filename, "/\\", true);
175   std::string path_so_far = prefix;
176   for (int i = 0; i < parts.size() - 1; i++) {
177     path_so_far += parts[i];
178     if (mkdir(path_so_far.c_str(), 0777) != 0) {
179       if (errno != EEXIST) {
180         std::cerr << filename << ": while trying to create directory "
181                   << path_so_far << ": " << strerror(errno) << std::endl;
182         return false;
183       }
184     }
185     path_so_far += '/';
186   }
187 
188   return true;
189 }
190 
191 // Get the absolute path of this protoc binary.
GetProtocAbsolutePath(std::string * path)192 bool GetProtocAbsolutePath(std::string* path) {
193 #ifdef _WIN32
194   char buffer[MAX_PATH];
195   int len = GetModuleFileNameA(NULL, buffer, MAX_PATH);
196 #elif defined(__APPLE__)
197   char buffer[PATH_MAX];
198   int len = 0;
199 
200   char dirtybuffer[PATH_MAX];
201   uint32_t size = sizeof(dirtybuffer);
202   if (_NSGetExecutablePath(dirtybuffer, &size) == 0) {
203     realpath(dirtybuffer, buffer);
204     len = strlen(buffer);
205   }
206 #else
207   char buffer[PATH_MAX];
208   int len = readlink("/proc/self/exe", buffer, PATH_MAX);
209 #endif
210   if (len > 0) {
211     path->assign(buffer, len);
212     return true;
213   } else {
214     return false;
215   }
216 }
217 
218 // Whether a path is where google/protobuf/descriptor.proto and other well-known
219 // type protos are installed.
IsInstalledProtoPath(const std::string & path)220 bool IsInstalledProtoPath(const std::string& path) {
221   // Checking the descriptor.proto file should be good enough.
222   std::string file_path = path + "/google/protobuf/descriptor.proto";
223   return access(file_path.c_str(), F_OK) != -1;
224 }
225 
226 // Add the paths where google/protobuf/descriptor.proto and other well-known
227 // type protos are installed.
AddDefaultProtoPaths(std::vector<std::pair<std::string,std::string>> * paths)228 void AddDefaultProtoPaths(
229     std::vector<std::pair<std::string, std::string> >* paths) {
230   // TODO(xiaofeng): The code currently only checks relative paths of where
231   // the protoc binary is installed. We probably should make it handle more
232   // cases than that.
233   std::string path;
234   if (!GetProtocAbsolutePath(&path)) {
235     return;
236   }
237   // Strip the binary name.
238   size_t pos = path.find_last_of("/\\");
239   if (pos == std::string::npos || pos == 0) {
240     return;
241   }
242   path = path.substr(0, pos);
243   // Check the binary's directory.
244   if (IsInstalledProtoPath(path)) {
245     paths->push_back(std::pair<std::string, std::string>("", path));
246     return;
247   }
248   // Check if there is an include subdirectory.
249   if (IsInstalledProtoPath(path + "/include")) {
250     paths->push_back(
251         std::pair<std::string, std::string>("", path + "/include"));
252     return;
253   }
254   // Check if the upper level directory has an "include" subdirectory.
255   pos = path.find_last_of("/\\");
256   if (pos == std::string::npos || pos == 0) {
257     return;
258   }
259   path = path.substr(0, pos);
260   if (IsInstalledProtoPath(path + "/include")) {
261     paths->push_back(
262         std::pair<std::string, std::string>("", path + "/include"));
263     return;
264   }
265 }
266 
PluginName(const std::string & plugin_prefix,const std::string & directive)267 string PluginName(const std::string& plugin_prefix,
268                   const std::string& directive) {
269   // Assuming the directive starts with "--" and ends with "_out" or "_opt",
270   // strip the "--" and "_out/_opt" and add the plugin prefix.
271   return plugin_prefix + "gen-" + directive.substr(2, directive.size() - 6);
272 }
273 
274 }  // namespace
275 
276 // A MultiFileErrorCollector that prints errors to stderr.
277 class CommandLineInterface::ErrorPrinter
278     : public MultiFileErrorCollector,
279       public io::ErrorCollector,
280       public DescriptorPool::ErrorCollector {
281  public:
ErrorPrinter(ErrorFormat format,DiskSourceTree * tree=NULL)282   ErrorPrinter(ErrorFormat format, DiskSourceTree* tree = NULL)
283       : format_(format), tree_(tree), found_errors_(false) {}
~ErrorPrinter()284   ~ErrorPrinter() {}
285 
286   // implements MultiFileErrorCollector ------------------------------
AddError(const std::string & filename,int line,int column,const std::string & message)287   void AddError(const std::string& filename, int line, int column,
288                 const std::string& message) {
289     found_errors_ = true;
290     AddErrorOrWarning(filename, line, column, message, "error", std::cerr);
291   }
292 
AddWarning(const std::string & filename,int line,int column,const std::string & message)293   void AddWarning(const std::string& filename, int line, int column,
294                   const std::string& message) {
295     AddErrorOrWarning(filename, line, column, message, "warning", std::clog);
296   }
297 
298   // implements io::ErrorCollector -----------------------------------
AddError(int line,int column,const std::string & message)299   void AddError(int line, int column, const std::string& message) {
300     AddError("input", line, column, message);
301   }
302 
AddWarning(int line,int column,const std::string & message)303   void AddWarning(int line, int column, const std::string& message) {
304     AddErrorOrWarning("input", line, column, message, "warning", std::clog);
305   }
306 
307   // implements DescriptorPool::ErrorCollector-------------------------
AddError(const std::string & filename,const std::string & element_name,const Message * descriptor,ErrorLocation location,const std::string & message)308   void AddError(const std::string& filename, const std::string& element_name,
309                 const Message* descriptor, ErrorLocation location,
310                 const std::string& message) {
311     AddErrorOrWarning(filename, -1, -1, message, "error", std::cerr);
312   }
313 
AddWarning(const std::string & filename,const std::string & element_name,const Message * descriptor,ErrorLocation location,const std::string & message)314   void AddWarning(const std::string& filename, const std::string& element_name,
315                   const Message* descriptor, ErrorLocation location,
316                   const std::string& message) {
317     AddErrorOrWarning(filename, -1, -1, message, "warning", std::clog);
318   }
319 
FoundErrors() const320   bool FoundErrors() const { return found_errors_; }
321 
322  private:
AddErrorOrWarning(const std::string & filename,int line,int column,const std::string & message,const std::string & type,std::ostream & out)323   void AddErrorOrWarning(const std::string& filename, int line, int column,
324                          const std::string& message, const std::string& type,
325                          std::ostream& out) {
326     // Print full path when running under MSVS
327     std::string dfile;
328     if (format_ == CommandLineInterface::ERROR_FORMAT_MSVS && tree_ != NULL &&
329         tree_->VirtualFileToDiskFile(filename, &dfile)) {
330       out << dfile;
331     } else {
332       out << filename;
333     }
334 
335     // Users typically expect 1-based line/column numbers, so we add 1
336     // to each here.
337     if (line != -1) {
338       // Allow for both GCC- and Visual-Studio-compatible output.
339       switch (format_) {
340         case CommandLineInterface::ERROR_FORMAT_GCC:
341           out << ":" << (line + 1) << ":" << (column + 1);
342           break;
343         case CommandLineInterface::ERROR_FORMAT_MSVS:
344           out << "(" << (line + 1) << ") : " << type
345               << " in column=" << (column + 1);
346           break;
347       }
348     }
349 
350     if (type == "warning") {
351       out << ": warning: " << message << std::endl;
352     } else {
353       out << ": " << message << std::endl;
354     }
355   }
356 
357   const ErrorFormat format_;
358   DiskSourceTree* tree_;
359   bool found_errors_;
360 };
361 
362 // -------------------------------------------------------------------
363 
364 // A GeneratorContext implementation that buffers files in memory, then dumps
365 // them all to disk on demand.
366 class CommandLineInterface::GeneratorContextImpl : public GeneratorContext {
367  public:
368   GeneratorContextImpl(const std::vector<const FileDescriptor*>& parsed_files);
369   ~GeneratorContextImpl();
370 
371   // Write all files in the directory to disk at the given output location,
372   // which must end in a '/'.
373   bool WriteAllToDisk(const std::string& prefix);
374 
375   // Write the contents of this directory to a ZIP-format archive with the
376   // given name.
377   bool WriteAllToZip(const std::string& filename);
378 
379   // Add a boilerplate META-INF/MANIFEST.MF file as required by the Java JAR
380   // format, unless one has already been written.
381   void AddJarManifest();
382 
383   // Get name of all output files.
384   void GetOutputFilenames(std::vector<std::string>* output_filenames);
385 
386   // implements GeneratorContext --------------------------------------
387   io::ZeroCopyOutputStream* Open(const std::string& filename);
388   io::ZeroCopyOutputStream* OpenForAppend(const std::string& filename);
389   io::ZeroCopyOutputStream* OpenForInsert(const std::string& filename,
390                                           const std::string& insertion_point);
ListParsedFiles(std::vector<const FileDescriptor * > * output)391   void ListParsedFiles(std::vector<const FileDescriptor*>* output) {
392     *output = parsed_files_;
393   }
394 
395  private:
396   friend class MemoryOutputStream;
397 
398   // map instead of unordered_map so that files are written in order (good when
399   // writing zips).
400   std::map<std::string, std::string*> files_;
401   const std::vector<const FileDescriptor*>& parsed_files_;
402   bool had_error_;
403 };
404 
405 class CommandLineInterface::MemoryOutputStream
406     : public io::ZeroCopyOutputStream {
407  public:
408   MemoryOutputStream(GeneratorContextImpl* directory,
409                      const std::string& filename, bool append_mode);
410   MemoryOutputStream(GeneratorContextImpl* directory,
411                      const std::string& filename,
412                      const std::string& insertion_point);
413   virtual ~MemoryOutputStream();
414 
415   // implements ZeroCopyOutputStream ---------------------------------
Next(void ** data,int * size)416   virtual bool Next(void** data, int* size) { return inner_->Next(data, size); }
BackUp(int count)417   virtual void BackUp(int count) { inner_->BackUp(count); }
ByteCount() const418   virtual int64 ByteCount() const { return inner_->ByteCount(); }
419 
420  private:
421   // Checks to see if "filename_.meta" exists in directory_; if so, fixes the
422   // offsets in that GeneratedCodeInfo record to reflect bytes inserted in
423   // filename_ at original offset insertion_offset with length insertion_length.
424   // We assume that insertions will not occur within any given annotated span
425   // of text.
426   void UpdateMetadata(size_t insertion_offset, size_t insertion_length);
427 
428   // Where to insert the string when it's done.
429   GeneratorContextImpl* directory_;
430   std::string filename_;
431   std::string insertion_point_;
432 
433   // The string we're building.
434   std::string data_;
435 
436   // Whether we should append the output stream to the existing file.
437   bool append_mode_;
438 
439   // StringOutputStream writing to data_.
440   std::unique_ptr<io::StringOutputStream> inner_;
441 };
442 
443 // -------------------------------------------------------------------
444 
GeneratorContextImpl(const std::vector<const FileDescriptor * > & parsed_files)445 CommandLineInterface::GeneratorContextImpl::GeneratorContextImpl(
446     const std::vector<const FileDescriptor*>& parsed_files)
447     : parsed_files_(parsed_files), had_error_(false) {}
448 
~GeneratorContextImpl()449 CommandLineInterface::GeneratorContextImpl::~GeneratorContextImpl() {
450   STLDeleteValues(&files_);
451 }
452 
WriteAllToDisk(const std::string & prefix)453 bool CommandLineInterface::GeneratorContextImpl::WriteAllToDisk(
454     const std::string& prefix) {
455   if (had_error_) {
456     return false;
457   }
458 
459   if (!VerifyDirectoryExists(prefix)) {
460     return false;
461   }
462 
463   for (std::map<std::string, std::string*>::const_iterator iter =
464            files_.begin();
465        iter != files_.end(); ++iter) {
466     const std::string& relative_filename = iter->first;
467     const char* data = iter->second->data();
468     int size = iter->second->size();
469 
470     if (!TryCreateParentDirectory(prefix, relative_filename)) {
471       return false;
472     }
473     std::string filename = prefix + relative_filename;
474 
475     // Create the output file.
476     int file_descriptor;
477     do {
478       file_descriptor =
479           open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
480     } while (file_descriptor < 0 && errno == EINTR);
481 
482     if (file_descriptor < 0) {
483       int error = errno;
484       std::cerr << filename << ": " << strerror(error);
485       return false;
486     }
487 
488     // Write the file.
489     while (size > 0) {
490       int write_result;
491       do {
492         write_result = write(file_descriptor, data, size);
493       } while (write_result < 0 && errno == EINTR);
494 
495       if (write_result <= 0) {
496         // Write error.
497 
498         // FIXME(kenton):  According to the man page, if write() returns zero,
499         //   there was no error; write() simply did not write anything.  It's
500         //   unclear under what circumstances this might happen, but presumably
501         //   errno won't be set in this case.  I am confused as to how such an
502         //   event should be handled.  For now I'm treating it as an error,
503         //   since retrying seems like it could lead to an infinite loop.  I
504         //   suspect this never actually happens anyway.
505 
506         if (write_result < 0) {
507           int error = errno;
508           std::cerr << filename << ": write: " << strerror(error);
509         } else {
510           std::cerr << filename << ": write() returned zero?" << std::endl;
511         }
512         return false;
513       }
514 
515       data += write_result;
516       size -= write_result;
517     }
518 
519     if (close(file_descriptor) != 0) {
520       int error = errno;
521       std::cerr << filename << ": close: " << strerror(error);
522       return false;
523     }
524   }
525 
526   return true;
527 }
528 
WriteAllToZip(const std::string & filename)529 bool CommandLineInterface::GeneratorContextImpl::WriteAllToZip(
530     const std::string& filename) {
531   if (had_error_) {
532     return false;
533   }
534 
535   // Create the output file.
536   int file_descriptor;
537   do {
538     file_descriptor =
539         open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
540   } while (file_descriptor < 0 && errno == EINTR);
541 
542   if (file_descriptor < 0) {
543     int error = errno;
544     std::cerr << filename << ": " << strerror(error);
545     return false;
546   }
547 
548   // Create the ZipWriter
549   io::FileOutputStream stream(file_descriptor);
550   ZipWriter zip_writer(&stream);
551 
552   for (std::map<std::string, std::string*>::const_iterator iter =
553            files_.begin();
554        iter != files_.end(); ++iter) {
555     zip_writer.Write(iter->first, *iter->second);
556   }
557 
558   zip_writer.WriteDirectory();
559 
560   if (stream.GetErrno() != 0) {
561     std::cerr << filename << ": " << strerror(stream.GetErrno()) << std::endl;
562   }
563 
564   if (!stream.Close()) {
565     std::cerr << filename << ": " << strerror(stream.GetErrno()) << std::endl;
566   }
567 
568   return true;
569 }
570 
AddJarManifest()571 void CommandLineInterface::GeneratorContextImpl::AddJarManifest() {
572   std::string** map_slot = &files_["META-INF/MANIFEST.MF"];
573   if (*map_slot == NULL) {
574     *map_slot = new std::string(
575         "Manifest-Version: 1.0\n"
576         "Created-By: 1.6.0 (protoc)\n"
577         "\n");
578   }
579 }
580 
GetOutputFilenames(std::vector<std::string> * output_filenames)581 void CommandLineInterface::GeneratorContextImpl::GetOutputFilenames(
582     std::vector<std::string>* output_filenames) {
583   for (std::map<std::string, std::string*>::iterator iter = files_.begin();
584        iter != files_.end(); ++iter) {
585     output_filenames->push_back(iter->first);
586   }
587 }
588 
Open(const std::string & filename)589 io::ZeroCopyOutputStream* CommandLineInterface::GeneratorContextImpl::Open(
590     const std::string& filename) {
591   return new MemoryOutputStream(this, filename, false);
592 }
593 
594 io::ZeroCopyOutputStream*
OpenForAppend(const std::string & filename)595 CommandLineInterface::GeneratorContextImpl::OpenForAppend(
596     const std::string& filename) {
597   return new MemoryOutputStream(this, filename, true);
598 }
599 
600 io::ZeroCopyOutputStream*
OpenForInsert(const std::string & filename,const std::string & insertion_point)601 CommandLineInterface::GeneratorContextImpl::OpenForInsert(
602     const std::string& filename, const std::string& insertion_point) {
603   return new MemoryOutputStream(this, filename, insertion_point);
604 }
605 
606 // -------------------------------------------------------------------
607 
MemoryOutputStream(GeneratorContextImpl * directory,const std::string & filename,bool append_mode)608 CommandLineInterface::MemoryOutputStream::MemoryOutputStream(
609     GeneratorContextImpl* directory, const std::string& filename,
610     bool append_mode)
611     : directory_(directory),
612       filename_(filename),
613       append_mode_(append_mode),
614       inner_(new io::StringOutputStream(&data_)) {}
615 
MemoryOutputStream(GeneratorContextImpl * directory,const std::string & filename,const std::string & insertion_point)616 CommandLineInterface::MemoryOutputStream::MemoryOutputStream(
617     GeneratorContextImpl* directory, const std::string& filename,
618     const std::string& insertion_point)
619     : directory_(directory),
620       filename_(filename),
621       insertion_point_(insertion_point),
622       inner_(new io::StringOutputStream(&data_)) {}
623 
UpdateMetadata(size_t insertion_offset,size_t insertion_length)624 void CommandLineInterface::MemoryOutputStream::UpdateMetadata(
625     size_t insertion_offset, size_t insertion_length) {
626   std::map<std::string, std::string*>::iterator meta_file =
627       directory_->files_.find(filename_ + ".meta");
628   if (meta_file == directory_->files_.end() || !meta_file->second) {
629     // No metadata was recorded for this file.
630     return;
631   }
632   std::string* encoded_data = meta_file->second;
633   GeneratedCodeInfo metadata;
634   bool is_text_format = false;
635   if (!metadata.ParseFromString(*encoded_data)) {
636     if (!TextFormat::ParseFromString(*encoded_data, &metadata)) {
637       // The metadata is invalid.
638       std::cerr << filename_
639                 << ".meta: Could not parse metadata as wire or text format."
640                 << std::endl;
641       return;
642     }
643     // Generators that use the public plugin interface emit text-format
644     // metadata (because in the public plugin protocol, file content must be
645     // UTF8-encoded strings).
646     is_text_format = true;
647   }
648   for (int i = 0; i < metadata.annotation_size(); ++i) {
649     GeneratedCodeInfo::Annotation* annotation = metadata.mutable_annotation(i);
650     if (annotation->begin() >= insertion_offset) {
651       annotation->set_begin(annotation->begin() + insertion_length);
652       annotation->set_end(annotation->end() + insertion_length);
653     }
654   }
655   if (is_text_format) {
656     TextFormat::PrintToString(metadata, encoded_data);
657   } else {
658     metadata.SerializeToString(encoded_data);
659   }
660 }
661 
~MemoryOutputStream()662 CommandLineInterface::MemoryOutputStream::~MemoryOutputStream() {
663   // Make sure all data has been written.
664   inner_.reset();
665 
666   // Insert into the directory.
667   std::string** map_slot = &directory_->files_[filename_];
668 
669   if (insertion_point_.empty()) {
670     // This was just a regular Open().
671     if (*map_slot != NULL) {
672       if (append_mode_) {
673         (*map_slot)->append(data_);
674       } else {
675         std::cerr << filename_ << ": Tried to write the same file twice."
676                   << std::endl;
677         directory_->had_error_ = true;
678       }
679       return;
680     }
681 
682     *map_slot = new std::string;
683     (*map_slot)->swap(data_);
684   } else {
685     // This was an OpenForInsert().
686 
687     // If the data doesn't end with a clean line break, add one.
688     if (!data_.empty() && data_[data_.size() - 1] != '\n') {
689       data_.push_back('\n');
690     }
691 
692     // Find the file we are going to insert into.
693     if (*map_slot == NULL) {
694       std::cerr << filename_
695                 << ": Tried to insert into file that doesn't exist."
696                 << std::endl;
697       directory_->had_error_ = true;
698       return;
699     }
700     std::string* target = *map_slot;
701 
702     // Find the insertion point.
703     std::string magic_string =
704         strings::Substitute("@@protoc_insertion_point($0)", insertion_point_);
705     std::string::size_type pos = target->find(magic_string);
706 
707     if (pos == std::string::npos) {
708       std::cerr << filename_ << ": insertion point \"" << insertion_point_
709                 << "\" not found." << std::endl;
710       directory_->had_error_ = true;
711       return;
712     }
713 
714     if ((pos > 3) && (target->substr(pos - 3, 2) == "/*")) {
715       // Support for inline "/* @@protoc_insertion_point() */"
716       pos = pos - 3;
717     } else {
718       // Seek backwards to the beginning of the line, which is where we will
719       // insert the data.  Note that this has the effect of pushing the
720       // insertion point down, so the data is inserted before it.  This is
721       // intentional because it means that multiple insertions at the same point
722       // will end up in the expected order in the final output.
723       pos = target->find_last_of('\n', pos);
724       if (pos == std::string::npos) {
725         // Insertion point is on the first line.
726         pos = 0;
727       } else {
728         // Advance to character after '\n'.
729         ++pos;
730       }
731     }
732 
733     // Extract indent.
734     std::string indent_(*target, pos,
735                         target->find_first_not_of(" \t", pos) - pos);
736 
737     if (indent_.empty()) {
738       // No indent.  This makes things easier.
739       target->insert(pos, data_);
740       UpdateMetadata(pos, data_.size());
741     } else {
742       // Calculate how much space we need.
743       int indent_size = 0;
744       for (int i = 0; i < data_.size(); i++) {
745         if (data_[i] == '\n') indent_size += indent_.size();
746       }
747 
748       // Make a hole for it.
749       target->insert(pos, data_.size() + indent_size, '\0');
750       UpdateMetadata(pos, data_.size() + indent_size);
751 
752       // Now copy in the data.
753       std::string::size_type data_pos = 0;
754       char* target_ptr = ::google::protobuf::string_as_array(target) + pos;
755       while (data_pos < data_.size()) {
756         // Copy indent.
757         memcpy(target_ptr, indent_.data(), indent_.size());
758         target_ptr += indent_.size();
759 
760         // Copy line from data_.
761         // We already guaranteed that data_ ends with a newline (above), so this
762         // search can't fail.
763         std::string::size_type line_length =
764             data_.find_first_of('\n', data_pos) + 1 - data_pos;
765         memcpy(target_ptr, data_.data() + data_pos, line_length);
766         target_ptr += line_length;
767         data_pos += line_length;
768       }
769 
770       GOOGLE_CHECK_EQ(target_ptr,
771                ::google::protobuf::string_as_array(target) + pos + data_.size() + indent_size);
772     }
773   }
774 }
775 
776 // ===================================================================
777 
778 #if defined(_WIN32) && !defined(__CYGWIN__)
779 const char* const CommandLineInterface::kPathSeparator = ";";
780 #else
781 const char* const CommandLineInterface::kPathSeparator = ":";
782 #endif
783 
CommandLineInterface()784 CommandLineInterface::CommandLineInterface()
785     : mode_(MODE_COMPILE),
786       print_mode_(PRINT_NONE),
787       error_format_(ERROR_FORMAT_GCC),
788       direct_dependencies_explicitly_set_(false),
789       direct_dependencies_violation_msg_(
790           kDefaultDirectDependenciesViolationMsg),
791       imports_in_descriptor_set_(false),
792       source_info_in_descriptor_set_(false),
793       disallow_services_(false) {
794 }
~CommandLineInterface()795 CommandLineInterface::~CommandLineInterface() {}
796 
RegisterGenerator(const std::string & flag_name,CodeGenerator * generator,const std::string & help_text)797 void CommandLineInterface::RegisterGenerator(const std::string& flag_name,
798                                              CodeGenerator* generator,
799                                              const std::string& help_text) {
800   GeneratorInfo info;
801   info.flag_name = flag_name;
802   info.generator = generator;
803   info.help_text = help_text;
804   generators_by_flag_name_[flag_name] = info;
805 }
806 
RegisterGenerator(const std::string & flag_name,const std::string & option_flag_name,CodeGenerator * generator,const std::string & help_text)807 void CommandLineInterface::RegisterGenerator(
808     const std::string& flag_name, const std::string& option_flag_name,
809     CodeGenerator* generator, const std::string& help_text) {
810   GeneratorInfo info;
811   info.flag_name = flag_name;
812   info.option_flag_name = option_flag_name;
813   info.generator = generator;
814   info.help_text = help_text;
815   generators_by_flag_name_[flag_name] = info;
816   generators_by_option_name_[option_flag_name] = info;
817 }
818 
AllowPlugins(const std::string & exe_name_prefix)819 void CommandLineInterface::AllowPlugins(const std::string& exe_name_prefix) {
820   plugin_prefix_ = exe_name_prefix;
821 }
822 
Run(int argc,const char * const argv[])823 int CommandLineInterface::Run(int argc, const char* const argv[]) {
824   Clear();
825   switch (ParseArguments(argc, argv)) {
826     case PARSE_ARGUMENT_DONE_AND_EXIT:
827       return 0;
828     case PARSE_ARGUMENT_FAIL:
829       return 1;
830     case PARSE_ARGUMENT_DONE_AND_CONTINUE:
831       break;
832   }
833 
834   std::vector<const FileDescriptor*> parsed_files;
835   std::unique_ptr<DiskSourceTree> disk_source_tree;
836   std::unique_ptr<ErrorPrinter> error_collector;
837   std::unique_ptr<DescriptorPool> descriptor_pool;
838   std::unique_ptr<SimpleDescriptorDatabase> descriptor_set_in_database;
839   std::unique_ptr<SourceTreeDescriptorDatabase> source_tree_database;
840 
841   // Any --descriptor_set_in FileDescriptorSet objects will be used as a
842   // fallback to input_files on command line, so create that db first.
843   if (!descriptor_set_in_names_.empty()) {
844     descriptor_set_in_database.reset(new SimpleDescriptorDatabase());
845     if (!PopulateSimpleDescriptorDatabase(descriptor_set_in_database.get())) {
846       return 1;
847     }
848   }
849 
850   if (proto_path_.empty()) {
851     // If there are no --proto_path flags, then just look in the specified
852     // --descriptor_set_in files.  But first, verify that the input files are
853     // there.
854     if (!VerifyInputFilesInDescriptors(descriptor_set_in_database.get())) {
855       return 1;
856     }
857 
858     error_collector.reset(new ErrorPrinter(error_format_));
859     descriptor_pool.reset(new DescriptorPool(descriptor_set_in_database.get(),
860                                              error_collector.get()));
861   } else {
862     disk_source_tree.reset(new DiskSourceTree());
863     if (!InitializeDiskSourceTree(disk_source_tree.get(),
864                                   descriptor_set_in_database.get())) {
865       return 1;
866     }
867 
868     error_collector.reset(
869         new ErrorPrinter(error_format_, disk_source_tree.get()));
870 
871     source_tree_database.reset(new SourceTreeDescriptorDatabase(
872         disk_source_tree.get(), descriptor_set_in_database.get()));
873     source_tree_database->RecordErrorsTo(error_collector.get());
874 
875     descriptor_pool.reset(new DescriptorPool(
876         source_tree_database.get(),
877         source_tree_database->GetValidationErrorCollector()));
878   }
879 
880   descriptor_pool->EnforceWeakDependencies(true);
881   if (!ParseInputFiles(descriptor_pool.get(), &parsed_files)) {
882     return 1;
883   }
884 
885 
886   // We construct a separate GeneratorContext for each output location.  Note
887   // that two code generators may output to the same location, in which case
888   // they should share a single GeneratorContext so that OpenForInsert() works.
889   GeneratorContextMap output_directories;
890 
891   // Generate output.
892   if (mode_ == MODE_COMPILE) {
893     for (int i = 0; i < output_directives_.size(); i++) {
894       std::string output_location = output_directives_[i].output_location;
895       if (!HasSuffixString(output_location, ".zip") &&
896           !HasSuffixString(output_location, ".jar")) {
897         AddTrailingSlash(&output_location);
898       }
899       GeneratorContextImpl** map_slot = &output_directories[output_location];
900 
901       if (*map_slot == NULL) {
902         // First time we've seen this output location.
903         *map_slot = new GeneratorContextImpl(parsed_files);
904       }
905 
906       if (!GenerateOutput(parsed_files, output_directives_[i], *map_slot)) {
907         STLDeleteValues(&output_directories);
908         return 1;
909       }
910     }
911   }
912 
913   // Write all output to disk.
914   for (GeneratorContextMap::iterator iter = output_directories.begin();
915        iter != output_directories.end(); ++iter) {
916     const std::string& location = iter->first;
917     GeneratorContextImpl* directory = iter->second;
918     if (HasSuffixString(location, "/")) {
919       if (!directory->WriteAllToDisk(location)) {
920         STLDeleteValues(&output_directories);
921         return 1;
922       }
923     } else {
924       if (HasSuffixString(location, ".jar")) {
925         directory->AddJarManifest();
926       }
927 
928       if (!directory->WriteAllToZip(location)) {
929         STLDeleteValues(&output_directories);
930         return 1;
931       }
932     }
933   }
934 
935   if (!dependency_out_name_.empty()) {
936     GOOGLE_DCHECK(disk_source_tree.get());
937     if (!GenerateDependencyManifestFile(parsed_files, output_directories,
938                                         disk_source_tree.get())) {
939       return 1;
940     }
941   }
942 
943   STLDeleteValues(&output_directories);
944 
945   if (!descriptor_set_out_name_.empty()) {
946     if (!WriteDescriptorSet(parsed_files)) {
947       return 1;
948     }
949   }
950 
951   if (mode_ == MODE_ENCODE || mode_ == MODE_DECODE) {
952     if (codec_type_.empty()) {
953       // HACK:  Define an EmptyMessage type to use for decoding.
954       DescriptorPool pool;
955       FileDescriptorProto file;
956       file.set_name("empty_message.proto");
957       file.add_message_type()->set_name("EmptyMessage");
958       GOOGLE_CHECK(pool.BuildFile(file) != NULL);
959       codec_type_ = "EmptyMessage";
960       if (!EncodeOrDecode(&pool)) {
961         return 1;
962       }
963     } else {
964       if (!EncodeOrDecode(descriptor_pool.get())) {
965         return 1;
966       }
967     }
968   }
969 
970   if (error_collector->FoundErrors()) {
971     return 1;
972   }
973 
974   if (mode_ == MODE_PRINT) {
975     switch (print_mode_) {
976       case PRINT_FREE_FIELDS:
977         for (int i = 0; i < parsed_files.size(); ++i) {
978           const FileDescriptor* fd = parsed_files[i];
979           for (int j = 0; j < fd->message_type_count(); ++j) {
980             PrintFreeFieldNumbers(fd->message_type(j));
981           }
982         }
983         break;
984       case PRINT_NONE:
985         GOOGLE_LOG(ERROR) << "If the code reaches here, it usually means a bug of "
986                       "flag parsing in the CommandLineInterface.";
987         return 1;
988 
989         // Do not add a default case.
990     }
991   }
992 
993   return 0;
994 }
995 
InitializeDiskSourceTree(DiskSourceTree * source_tree,DescriptorDatabase * fallback_database)996 bool CommandLineInterface::InitializeDiskSourceTree(
997     DiskSourceTree* source_tree, DescriptorDatabase* fallback_database) {
998   AddDefaultProtoPaths(&proto_path_);
999 
1000   // Set up the source tree.
1001   for (int i = 0; i < proto_path_.size(); i++) {
1002     source_tree->MapPath(proto_path_[i].first, proto_path_[i].second);
1003   }
1004 
1005   // Map input files to virtual paths if possible.
1006   if (!MakeInputsBeProtoPathRelative(source_tree, fallback_database)) {
1007     return false;
1008   }
1009 
1010   return true;
1011 }
1012 
PopulateSimpleDescriptorDatabase(SimpleDescriptorDatabase * database)1013 bool CommandLineInterface::PopulateSimpleDescriptorDatabase(
1014     SimpleDescriptorDatabase* database) {
1015   for (int i = 0; i < descriptor_set_in_names_.size(); i++) {
1016     int fd;
1017     do {
1018       fd = open(descriptor_set_in_names_[i].c_str(), O_RDONLY | O_BINARY);
1019     } while (fd < 0 && errno == EINTR);
1020     if (fd < 0) {
1021       std::cerr << descriptor_set_in_names_[i] << ": " << strerror(ENOENT)
1022                 << std::endl;
1023       return false;
1024     }
1025 
1026     FileDescriptorSet file_descriptor_set;
1027     bool parsed = file_descriptor_set.ParseFromFileDescriptor(fd);
1028     if (close(fd) != 0) {
1029       std::cerr << descriptor_set_in_names_[i] << ": close: " << strerror(errno)
1030                 << std::endl;
1031       return false;
1032     }
1033 
1034     if (!parsed) {
1035       std::cerr << descriptor_set_in_names_[i] << ": Unable to parse."
1036                 << std::endl;
1037       return false;
1038     }
1039 
1040     for (int j = 0; j < file_descriptor_set.file_size(); j++) {
1041       FileDescriptorProto previously_added_file_descriptor_proto;
1042       if (database->FindFileByName(file_descriptor_set.file(j).name(),
1043                                    &previously_added_file_descriptor_proto)) {
1044         // already present - skip
1045         continue;
1046       }
1047       if (!database->Add(file_descriptor_set.file(j))) {
1048         return false;
1049       }
1050     }
1051   }
1052   return true;
1053 }
1054 
VerifyInputFilesInDescriptors(DescriptorDatabase * database)1055 bool CommandLineInterface::VerifyInputFilesInDescriptors(
1056     DescriptorDatabase* database) {
1057   for (const auto& input_file : input_files_) {
1058     FileDescriptorProto file_descriptor;
1059     if (!database->FindFileByName(input_file, &file_descriptor)) {
1060       std::cerr << input_file << ": " << strerror(ENOENT) << std::endl;
1061       return false;
1062     }
1063 
1064     // Enforce --disallow_services.
1065     if (disallow_services_ && file_descriptor.service_size() > 0) {
1066       std::cerr << file_descriptor.name()
1067                 << ": This file contains services, but "
1068                    "--disallow_services was used."
1069                 << std::endl;
1070       return false;
1071     }
1072   }
1073   return true;
1074 }
1075 
ParseInputFiles(DescriptorPool * descriptor_pool,std::vector<const FileDescriptor * > * parsed_files)1076 bool CommandLineInterface::ParseInputFiles(
1077     DescriptorPool* descriptor_pool,
1078     std::vector<const FileDescriptor*>* parsed_files) {
1079 
1080   // Parse each file.
1081   for (const auto& input_file : input_files_) {
1082     // Import the file.
1083     descriptor_pool->AddUnusedImportTrackFile(input_file);
1084     const FileDescriptor* parsed_file =
1085         descriptor_pool->FindFileByName(input_file);
1086     descriptor_pool->ClearUnusedImportTrackFiles();
1087     if (parsed_file == NULL) {
1088       return false;
1089     }
1090     parsed_files->push_back(parsed_file);
1091 
1092     // Enforce --disallow_services.
1093     if (disallow_services_ && parsed_file->service_count() > 0) {
1094       std::cerr << parsed_file->name()
1095                 << ": This file contains services, but "
1096                    "--disallow_services was used."
1097                 << std::endl;
1098       return false;
1099     }
1100 
1101     // Enforce --direct_dependencies
1102     if (direct_dependencies_explicitly_set_) {
1103       bool indirect_imports = false;
1104       for (int i = 0; i < parsed_file->dependency_count(); i++) {
1105         if (direct_dependencies_.find(parsed_file->dependency(i)->name()) ==
1106             direct_dependencies_.end()) {
1107           indirect_imports = true;
1108           std::cerr << parsed_file->name() << ": "
1109                     << StringReplace(direct_dependencies_violation_msg_, "%s",
1110                                      parsed_file->dependency(i)->name(),
1111                                      true /* replace_all */)
1112                     << std::endl;
1113         }
1114       }
1115       if (indirect_imports) {
1116         return false;
1117       }
1118     }
1119   }
1120   return true;
1121 }
1122 
Clear()1123 void CommandLineInterface::Clear() {
1124   // Clear all members that are set by Run().  Note that we must not clear
1125   // members which are set by other methods before Run() is called.
1126   executable_name_.clear();
1127   proto_path_.clear();
1128   input_files_.clear();
1129   direct_dependencies_.clear();
1130   direct_dependencies_violation_msg_ = kDefaultDirectDependenciesViolationMsg;
1131   output_directives_.clear();
1132   codec_type_.clear();
1133   descriptor_set_in_names_.clear();
1134   descriptor_set_out_name_.clear();
1135   dependency_out_name_.clear();
1136 
1137 
1138   mode_ = MODE_COMPILE;
1139   print_mode_ = PRINT_NONE;
1140   imports_in_descriptor_set_ = false;
1141   source_info_in_descriptor_set_ = false;
1142   disallow_services_ = false;
1143   direct_dependencies_explicitly_set_ = false;
1144 }
1145 
MakeProtoProtoPathRelative(DiskSourceTree * source_tree,std::string * proto,DescriptorDatabase * fallback_database)1146 bool CommandLineInterface::MakeProtoProtoPathRelative(
1147     DiskSourceTree* source_tree, std::string* proto,
1148     DescriptorDatabase* fallback_database) {
1149   // If it's in the fallback db, don't report non-existent file errors.
1150   FileDescriptorProto fallback_file;
1151   bool in_fallback_database =
1152       fallback_database != nullptr &&
1153       fallback_database->FindFileByName(*proto, &fallback_file);
1154 
1155   // If the input file path is not a physical file path, it must be a virtual
1156   // path.
1157   if (access(proto->c_str(), F_OK) < 0) {
1158     std::string disk_file;
1159     if (source_tree->VirtualFileToDiskFile(*proto, &disk_file) ||
1160         in_fallback_database) {
1161       return true;
1162     } else {
1163       std::cerr << *proto << ": " << strerror(ENOENT) << std::endl;
1164       return false;
1165     }
1166   }
1167 
1168   std::string virtual_file, shadowing_disk_file;
1169   switch (source_tree->DiskFileToVirtualFile(*proto, &virtual_file,
1170                                              &shadowing_disk_file)) {
1171     case DiskSourceTree::SUCCESS:
1172       *proto = virtual_file;
1173       break;
1174     case DiskSourceTree::SHADOWED:
1175       std::cerr << *proto << ": Input is shadowed in the --proto_path by \""
1176                 << shadowing_disk_file
1177                 << "\".  Either use the latter file as your input or reorder "
1178                    "the --proto_path so that the former file's location "
1179                    "comes first."
1180                 << std::endl;
1181       return false;
1182     case DiskSourceTree::CANNOT_OPEN:
1183       if (in_fallback_database) {
1184         return true;
1185       }
1186       std::cerr << *proto << ": " << strerror(errno) << std::endl;
1187       return false;
1188     case DiskSourceTree::NO_MAPPING: {
1189       // Try to interpret the path as a virtual path.
1190       std::string disk_file;
1191       if (source_tree->VirtualFileToDiskFile(*proto, &disk_file) ||
1192           in_fallback_database) {
1193         return true;
1194       } else {
1195         // The input file path can't be mapped to any --proto_path and it also
1196         // can't be interpreted as a virtual path.
1197         std::cerr
1198             << *proto
1199             << ": File does not reside within any path "
1200                "specified using --proto_path (or -I).  You must specify a "
1201                "--proto_path which encompasses this file.  Note that the "
1202                "proto_path must be an exact prefix of the .proto file "
1203                "names -- protoc is too dumb to figure out when two paths "
1204                "(e.g. absolute and relative) are equivalent (it's harder "
1205                "than you think)."
1206             << std::endl;
1207         return false;
1208       }
1209     }
1210   }
1211   return true;
1212 }
1213 
MakeInputsBeProtoPathRelative(DiskSourceTree * source_tree,DescriptorDatabase * fallback_database)1214 bool CommandLineInterface::MakeInputsBeProtoPathRelative(
1215     DiskSourceTree* source_tree, DescriptorDatabase* fallback_database) {
1216   for (auto& input_file : input_files_) {
1217     if (!MakeProtoProtoPathRelative(source_tree, &input_file,
1218                                     fallback_database)) {
1219       return false;
1220     }
1221   }
1222 
1223   return true;
1224 }
1225 
1226 
ExpandArgumentFile(const std::string & file,std::vector<std::string> * arguments)1227 bool CommandLineInterface::ExpandArgumentFile(
1228     const std::string& file, std::vector<std::string>* arguments) {
1229   // The argument file is searched in the working directory only. We don't
1230   // use the proto import path here.
1231   std::ifstream file_stream(file.c_str());
1232   if (!file_stream.is_open()) {
1233     return false;
1234   }
1235   std::string argument;
1236   // We don't support any kind of shell expansion right now.
1237   while (std::getline(file_stream, argument)) {
1238     arguments->push_back(argument);
1239   }
1240   return true;
1241 }
1242 
ParseArguments(int argc,const char * const argv[])1243 CommandLineInterface::ParseArgumentStatus CommandLineInterface::ParseArguments(
1244     int argc, const char* const argv[]) {
1245   executable_name_ = argv[0];
1246 
1247   std::vector<std::string> arguments;
1248   for (int i = 1; i < argc; ++i) {
1249     if (argv[i][0] == '@') {
1250       if (!ExpandArgumentFile(argv[i] + 1, &arguments)) {
1251         std::cerr << "Failed to open argument file: " << (argv[i] + 1)
1252                   << std::endl;
1253         return PARSE_ARGUMENT_FAIL;
1254       }
1255       continue;
1256     }
1257     arguments.push_back(argv[i]);
1258   }
1259 
1260   // if no arguments are given, show help
1261   if (arguments.empty()) {
1262     PrintHelpText();
1263     return PARSE_ARGUMENT_DONE_AND_EXIT;  // Exit without running compiler.
1264   }
1265 
1266   // Iterate through all arguments and parse them.
1267   for (int i = 0; i < arguments.size(); ++i) {
1268     std::string name, value;
1269 
1270     if (ParseArgument(arguments[i].c_str(), &name, &value)) {
1271       // Returned true => Use the next argument as the flag value.
1272       if (i + 1 == arguments.size() || arguments[i + 1][0] == '-') {
1273         std::cerr << "Missing value for flag: " << name << std::endl;
1274         if (name == "--decode") {
1275           std::cerr << "To decode an unknown message, use --decode_raw."
1276                     << std::endl;
1277         }
1278         return PARSE_ARGUMENT_FAIL;
1279       } else {
1280         ++i;
1281         value = arguments[i];
1282       }
1283     }
1284 
1285     ParseArgumentStatus status = InterpretArgument(name, value);
1286     if (status != PARSE_ARGUMENT_DONE_AND_CONTINUE) return status;
1287   }
1288 
1289   // Make sure each plugin option has a matching plugin output.
1290   bool foundUnknownPluginOption = false;
1291   for (std::map<std::string, std::string>::const_iterator i =
1292            plugin_parameters_.begin();
1293        i != plugin_parameters_.end(); ++i) {
1294     if (plugins_.find(i->first) != plugins_.end()) {
1295       continue;
1296     }
1297     bool foundImplicitPlugin = false;
1298     for (std::vector<OutputDirective>::const_iterator j =
1299              output_directives_.begin();
1300          j != output_directives_.end(); ++j) {
1301       if (j->generator == NULL) {
1302         std::string plugin_name = PluginName(plugin_prefix_, j->name);
1303         if (plugin_name == i->first) {
1304           foundImplicitPlugin = true;
1305           break;
1306         }
1307       }
1308     }
1309     if (!foundImplicitPlugin) {
1310       std::cerr << "Unknown flag: "
1311                 // strip prefix + "gen-" and add back "_opt"
1312                 << "--" + i->first.substr(plugin_prefix_.size() + 4) + "_opt"
1313                 << std::endl;
1314       foundUnknownPluginOption = true;
1315     }
1316   }
1317   if (foundUnknownPluginOption) {
1318     return PARSE_ARGUMENT_FAIL;
1319   }
1320 
1321   // The --proto_path & --descriptor_set_in flags both specify places to look
1322   // for proto files. If neither were given, use the current working directory.
1323   if (proto_path_.empty() && descriptor_set_in_names_.empty()) {
1324     // Don't use make_pair as the old/default standard library on Solaris
1325     // doesn't support it without explicit template parameters, which are
1326     // incompatible with C++0x's make_pair.
1327     proto_path_.push_back(std::pair<std::string, std::string>("", "."));
1328   }
1329 
1330   // Check some error cases.
1331   bool decoding_raw = (mode_ == MODE_DECODE) && codec_type_.empty();
1332   if (decoding_raw && !input_files_.empty()) {
1333     std::cerr << "When using --decode_raw, no input files should be given."
1334               << std::endl;
1335     return PARSE_ARGUMENT_FAIL;
1336   } else if (!decoding_raw && input_files_.empty()) {
1337     std::cerr << "Missing input file." << std::endl;
1338     return PARSE_ARGUMENT_FAIL;
1339   }
1340   if (mode_ == MODE_COMPILE && output_directives_.empty() &&
1341       descriptor_set_out_name_.empty()) {
1342     std::cerr << "Missing output directives." << std::endl;
1343     return PARSE_ARGUMENT_FAIL;
1344   }
1345   if (mode_ != MODE_COMPILE && !dependency_out_name_.empty()) {
1346     std::cerr << "Can only use --dependency_out=FILE when generating code."
1347               << std::endl;
1348     return PARSE_ARGUMENT_FAIL;
1349   }
1350   if (!dependency_out_name_.empty() && input_files_.size() > 1) {
1351     std::cerr
1352         << "Can only process one input file when using --dependency_out=FILE."
1353         << std::endl;
1354     return PARSE_ARGUMENT_FAIL;
1355   }
1356   if (imports_in_descriptor_set_ && descriptor_set_out_name_.empty()) {
1357     std::cerr << "--include_imports only makes sense when combined with "
1358                  "--descriptor_set_out."
1359               << std::endl;
1360   }
1361   if (source_info_in_descriptor_set_ && descriptor_set_out_name_.empty()) {
1362     std::cerr << "--include_source_info only makes sense when combined with "
1363                  "--descriptor_set_out."
1364               << std::endl;
1365   }
1366 
1367   return PARSE_ARGUMENT_DONE_AND_CONTINUE;
1368 }
1369 
ParseArgument(const char * arg,std::string * name,std::string * value)1370 bool CommandLineInterface::ParseArgument(const char* arg, std::string* name,
1371                                          std::string* value) {
1372   bool parsed_value = false;
1373 
1374   if (arg[0] != '-') {
1375     // Not a flag.
1376     name->clear();
1377     parsed_value = true;
1378     *value = arg;
1379   } else if (arg[1] == '-') {
1380     // Two dashes:  Multi-character name, with '=' separating name and
1381     //   value.
1382     const char* equals_pos = strchr(arg, '=');
1383     if (equals_pos != NULL) {
1384       *name = std::string(arg, equals_pos - arg);
1385       *value = equals_pos + 1;
1386       parsed_value = true;
1387     } else {
1388       *name = arg;
1389     }
1390   } else {
1391     // One dash:  One-character name, all subsequent characters are the
1392     //   value.
1393     if (arg[1] == '\0') {
1394       // arg is just "-".  We treat this as an input file, except that at
1395       // present this will just lead to a "file not found" error.
1396       name->clear();
1397       *value = arg;
1398       parsed_value = true;
1399     } else {
1400       *name = std::string(arg, 2);
1401       *value = arg + 2;
1402       parsed_value = !value->empty();
1403     }
1404   }
1405 
1406   // Need to return true iff the next arg should be used as the value for this
1407   // one, false otherwise.
1408 
1409   if (parsed_value) {
1410     // We already parsed a value for this flag.
1411     return false;
1412   }
1413 
1414   if (*name == "-h" || *name == "--help" || *name == "--disallow_services" ||
1415       *name == "--include_imports" || *name == "--include_source_info" ||
1416       *name == "--version" || *name == "--decode_raw" ||
1417       *name == "--print_free_field_numbers") {
1418     // HACK:  These are the only flags that don't take a value.
1419     //   They probably should not be hard-coded like this but for now it's
1420     //   not worth doing better.
1421     return false;
1422   }
1423 
1424   // Next argument is the flag value.
1425   return true;
1426 }
1427 
1428 CommandLineInterface::ParseArgumentStatus
InterpretArgument(const std::string & name,const std::string & value)1429 CommandLineInterface::InterpretArgument(const std::string& name,
1430                                         const std::string& value) {
1431   if (name.empty()) {
1432     // Not a flag.  Just a filename.
1433     if (value.empty()) {
1434       std::cerr
1435           << "You seem to have passed an empty string as one of the "
1436              "arguments to "
1437           << executable_name_
1438           << ".  This is actually "
1439              "sort of hard to do.  Congrats.  Unfortunately it is not valid "
1440              "input so the program is going to die now."
1441           << std::endl;
1442       return PARSE_ARGUMENT_FAIL;
1443     }
1444 
1445     input_files_.push_back(value);
1446 
1447   } else if (name == "-I" || name == "--proto_path") {
1448     // Java's -classpath (and some other languages) delimits path components
1449     // with colons.  Let's accept that syntax too just to make things more
1450     // intuitive.
1451     std::vector<std::string> parts = Split(
1452         value, CommandLineInterface::kPathSeparator,
1453         true);
1454 
1455     for (int i = 0; i < parts.size(); i++) {
1456       std::string virtual_path;
1457       std::string disk_path;
1458 
1459       std::string::size_type equals_pos = parts[i].find_first_of('=');
1460       if (equals_pos == std::string::npos) {
1461         virtual_path = "";
1462         disk_path = parts[i];
1463       } else {
1464         virtual_path = parts[i].substr(0, equals_pos);
1465         disk_path = parts[i].substr(equals_pos + 1);
1466       }
1467 
1468       if (disk_path.empty()) {
1469         std::cerr
1470             << "--proto_path passed empty directory name.  (Use \".\" for "
1471                "current directory.)"
1472             << std::endl;
1473         return PARSE_ARGUMENT_FAIL;
1474       }
1475 
1476       // Make sure disk path exists, warn otherwise.
1477       if (access(disk_path.c_str(), F_OK) < 0) {
1478         // Try the original path; it may have just happened to have a '=' in it.
1479         if (access(parts[i].c_str(), F_OK) < 0) {
1480           std::cerr << disk_path << ": warning: directory does not exist."
1481                     << std::endl;
1482         } else {
1483           virtual_path = "";
1484           disk_path = parts[i];
1485         }
1486       }
1487 
1488       // Don't use make_pair as the old/default standard library on Solaris
1489       // doesn't support it without explicit template parameters, which are
1490       // incompatible with C++0x's make_pair.
1491       proto_path_.push_back(
1492           std::pair<std::string, std::string>(virtual_path, disk_path));
1493     }
1494 
1495   } else if (name == "--direct_dependencies") {
1496     if (direct_dependencies_explicitly_set_) {
1497       std::cerr << name
1498                 << " may only be passed once. To specify multiple "
1499                    "direct dependencies, pass them all as a single "
1500                    "parameter separated by ':'."
1501                 << std::endl;
1502       return PARSE_ARGUMENT_FAIL;
1503     }
1504 
1505     direct_dependencies_explicitly_set_ = true;
1506     std::vector<std::string> direct =
1507         Split(value, ":", true);
1508     GOOGLE_DCHECK(direct_dependencies_.empty());
1509     direct_dependencies_.insert(direct.begin(), direct.end());
1510 
1511   } else if (name == "--direct_dependencies_violation_msg") {
1512     direct_dependencies_violation_msg_ = value;
1513 
1514   } else if (name == "--descriptor_set_in") {
1515     if (!descriptor_set_in_names_.empty()) {
1516       std::cerr << name
1517                 << " may only be passed once. To specify multiple "
1518                    "descriptor sets, pass them all as a single "
1519                    "parameter separated by '"
1520                 << CommandLineInterface::kPathSeparator << "'." << std::endl;
1521       return PARSE_ARGUMENT_FAIL;
1522     }
1523     if (value.empty()) {
1524       std::cerr << name << " requires a non-empty value." << std::endl;
1525       return PARSE_ARGUMENT_FAIL;
1526     }
1527     if (!dependency_out_name_.empty()) {
1528       std::cerr << name << " cannot be used with --dependency_out."
1529                 << std::endl;
1530       return PARSE_ARGUMENT_FAIL;
1531     }
1532 
1533     descriptor_set_in_names_ = Split(
1534         value, CommandLineInterface::kPathSeparator,
1535         true);
1536 
1537   } else if (name == "-o" || name == "--descriptor_set_out") {
1538     if (!descriptor_set_out_name_.empty()) {
1539       std::cerr << name << " may only be passed once." << std::endl;
1540       return PARSE_ARGUMENT_FAIL;
1541     }
1542     if (value.empty()) {
1543       std::cerr << name << " requires a non-empty value." << std::endl;
1544       return PARSE_ARGUMENT_FAIL;
1545     }
1546     if (mode_ != MODE_COMPILE) {
1547       std::cerr
1548           << "Cannot use --encode or --decode and generate descriptors at the "
1549              "same time."
1550           << std::endl;
1551       return PARSE_ARGUMENT_FAIL;
1552     }
1553     descriptor_set_out_name_ = value;
1554 
1555   } else if (name == "--dependency_out") {
1556     if (!dependency_out_name_.empty()) {
1557       std::cerr << name << " may only be passed once." << std::endl;
1558       return PARSE_ARGUMENT_FAIL;
1559     }
1560     if (value.empty()) {
1561       std::cerr << name << " requires a non-empty value." << std::endl;
1562       return PARSE_ARGUMENT_FAIL;
1563     }
1564     if (!descriptor_set_in_names_.empty()) {
1565       std::cerr << name << " cannot be used with --descriptor_set_in."
1566                 << std::endl;
1567       return PARSE_ARGUMENT_FAIL;
1568     }
1569     dependency_out_name_ = value;
1570 
1571   } else if (name == "--include_imports") {
1572     if (imports_in_descriptor_set_) {
1573       std::cerr << name << " may only be passed once." << std::endl;
1574       return PARSE_ARGUMENT_FAIL;
1575     }
1576     imports_in_descriptor_set_ = true;
1577 
1578   } else if (name == "--include_source_info") {
1579     if (source_info_in_descriptor_set_) {
1580       std::cerr << name << " may only be passed once." << std::endl;
1581       return PARSE_ARGUMENT_FAIL;
1582     }
1583     source_info_in_descriptor_set_ = true;
1584 
1585   } else if (name == "-h" || name == "--help") {
1586     PrintHelpText();
1587     return PARSE_ARGUMENT_DONE_AND_EXIT;  // Exit without running compiler.
1588 
1589   } else if (name == "--version") {
1590     if (!version_info_.empty()) {
1591       std::cout << version_info_ << std::endl;
1592     }
1593     std::cout << "libprotoc " << internal::VersionString(PROTOBUF_VERSION)
1594               << std::endl;
1595     return PARSE_ARGUMENT_DONE_AND_EXIT;  // Exit without running compiler.
1596 
1597   } else if (name == "--disallow_services") {
1598     disallow_services_ = true;
1599 
1600   } else if (name == "--encode" || name == "--decode" ||
1601              name == "--decode_raw") {
1602     if (mode_ != MODE_COMPILE) {
1603       std::cerr << "Only one of --encode and --decode can be specified."
1604                 << std::endl;
1605       return PARSE_ARGUMENT_FAIL;
1606     }
1607     if (!output_directives_.empty() || !descriptor_set_out_name_.empty()) {
1608       std::cerr << "Cannot use " << name
1609                 << " and generate code or descriptors at the same time."
1610                 << std::endl;
1611       return PARSE_ARGUMENT_FAIL;
1612     }
1613 
1614     mode_ = (name == "--encode") ? MODE_ENCODE : MODE_DECODE;
1615 
1616     if (value.empty() && name != "--decode_raw") {
1617       std::cerr << "Type name for " << name << " cannot be blank." << std::endl;
1618       if (name == "--decode") {
1619         std::cerr << "To decode an unknown message, use --decode_raw."
1620                   << std::endl;
1621       }
1622       return PARSE_ARGUMENT_FAIL;
1623     } else if (!value.empty() && name == "--decode_raw") {
1624       std::cerr << "--decode_raw does not take a parameter." << std::endl;
1625       return PARSE_ARGUMENT_FAIL;
1626     }
1627 
1628     codec_type_ = value;
1629 
1630   } else if (name == "--error_format") {
1631     if (value == "gcc") {
1632       error_format_ = ERROR_FORMAT_GCC;
1633     } else if (value == "msvs") {
1634       error_format_ = ERROR_FORMAT_MSVS;
1635     } else {
1636       std::cerr << "Unknown error format: " << value << std::endl;
1637       return PARSE_ARGUMENT_FAIL;
1638     }
1639 
1640   } else if (name == "--plugin") {
1641     if (plugin_prefix_.empty()) {
1642       std::cerr << "This compiler does not support plugins." << std::endl;
1643       return PARSE_ARGUMENT_FAIL;
1644     }
1645 
1646     std::string plugin_name;
1647     std::string path;
1648 
1649     std::string::size_type equals_pos = value.find_first_of('=');
1650     if (equals_pos == std::string::npos) {
1651       // Use the basename of the file.
1652       std::string::size_type slash_pos = value.find_last_of('/');
1653       if (slash_pos == std::string::npos) {
1654         plugin_name = value;
1655       } else {
1656         plugin_name = value.substr(slash_pos + 1);
1657       }
1658       path = value;
1659     } else {
1660       plugin_name = value.substr(0, equals_pos);
1661       path = value.substr(equals_pos + 1);
1662     }
1663 
1664     plugins_[plugin_name] = path;
1665 
1666   } else if (name == "--print_free_field_numbers") {
1667     if (mode_ != MODE_COMPILE) {
1668       std::cerr << "Cannot use " << name
1669                 << " and use --encode, --decode or print "
1670                 << "other info at the same time." << std::endl;
1671       return PARSE_ARGUMENT_FAIL;
1672     }
1673     if (!output_directives_.empty() || !descriptor_set_out_name_.empty()) {
1674       std::cerr << "Cannot use " << name
1675                 << " and generate code or descriptors at the same time."
1676                 << std::endl;
1677       return PARSE_ARGUMENT_FAIL;
1678     }
1679     mode_ = MODE_PRINT;
1680     print_mode_ = PRINT_FREE_FIELDS;
1681   } else {
1682     // Some other flag.  Look it up in the generators list.
1683     const GeneratorInfo* generator_info =
1684         FindOrNull(generators_by_flag_name_, name);
1685     if (generator_info == NULL &&
1686         (plugin_prefix_.empty() || !HasSuffixString(name, "_out"))) {
1687       // Check if it's a generator option flag.
1688       generator_info = FindOrNull(generators_by_option_name_, name);
1689       if (generator_info != NULL) {
1690         std::string* parameters =
1691             &generator_parameters_[generator_info->flag_name];
1692         if (!parameters->empty()) {
1693           parameters->append(",");
1694         }
1695         parameters->append(value);
1696       } else if (HasPrefixString(name, "--") && HasSuffixString(name, "_opt")) {
1697         std::string* parameters =
1698             &plugin_parameters_[PluginName(plugin_prefix_, name)];
1699         if (!parameters->empty()) {
1700           parameters->append(",");
1701         }
1702         parameters->append(value);
1703       } else {
1704         std::cerr << "Unknown flag: " << name << std::endl;
1705         return PARSE_ARGUMENT_FAIL;
1706       }
1707     } else {
1708       // It's an output flag.  Add it to the output directives.
1709       if (mode_ != MODE_COMPILE) {
1710         std::cerr << "Cannot use --encode, --decode or print .proto info and "
1711                      "generate code at the same time."
1712                   << std::endl;
1713         return PARSE_ARGUMENT_FAIL;
1714       }
1715 
1716       OutputDirective directive;
1717       directive.name = name;
1718       if (generator_info == NULL) {
1719         directive.generator = NULL;
1720       } else {
1721         directive.generator = generator_info->generator;
1722       }
1723 
1724       // Split value at ':' to separate the generator parameter from the
1725       // filename.  However, avoid doing this if the colon is part of a valid
1726       // Windows-style absolute path.
1727       std::string::size_type colon_pos = value.find_first_of(':');
1728       if (colon_pos == std::string::npos || IsWindowsAbsolutePath(value)) {
1729         directive.output_location = value;
1730       } else {
1731         directive.parameter = value.substr(0, colon_pos);
1732         directive.output_location = value.substr(colon_pos + 1);
1733       }
1734 
1735       output_directives_.push_back(directive);
1736     }
1737   }
1738 
1739   return PARSE_ARGUMENT_DONE_AND_CONTINUE;
1740 }
1741 
PrintHelpText()1742 void CommandLineInterface::PrintHelpText() {
1743   // Sorry for indentation here; line wrapping would be uglier.
1744   std::cout
1745       <<
1746       "Usage: " << executable_name_
1747       << " [OPTION] PROTO_FILES\n"
1748          "Parse PROTO_FILES and generate output based on the options given:\n"
1749          "  -IPATH, --proto_path=PATH   Specify the directory in which to "
1750          "search for\n"
1751          "                              imports.  May be specified multiple "
1752          "times;\n"
1753          "                              directories will be searched in order. "
1754          " If not\n"
1755          "                              given, the current working directory "
1756          "is used.\n"
1757          "                              If not found in any of the these "
1758          "directories,\n"
1759          "                              the --descriptor_set_in descriptors "
1760          "will be\n"
1761          "                              checked for required proto file.\n"
1762          "  --version                   Show version info and exit.\n"
1763          "  -h, --help                  Show this text and exit.\n"
1764          "  --encode=MESSAGE_TYPE       Read a text-format message of the "
1765          "given type\n"
1766          "                              from standard input and write it in "
1767          "binary\n"
1768          "                              to standard output.  The message type "
1769          "must\n"
1770          "                              be defined in PROTO_FILES or their "
1771          "imports.\n"
1772          "  --decode=MESSAGE_TYPE       Read a binary message of the given "
1773          "type from\n"
1774          "                              standard input and write it in text "
1775          "format\n"
1776          "                              to standard output.  The message type "
1777          "must\n"
1778          "                              be defined in PROTO_FILES or their "
1779          "imports.\n"
1780          "  --decode_raw                Read an arbitrary protocol message "
1781          "from\n"
1782          "                              standard input and write the raw "
1783          "tag/value\n"
1784          "                              pairs in text format to standard "
1785          "output.  No\n"
1786          "                              PROTO_FILES should be given when using "
1787          "this\n"
1788          "                              flag.\n"
1789          "  --descriptor_set_in=FILES   Specifies a delimited list of FILES\n"
1790          "                              each containing a FileDescriptorSet "
1791          "(a\n"
1792          "                              protocol buffer defined in "
1793          "descriptor.proto).\n"
1794          "                              The FileDescriptor for each of the "
1795          "PROTO_FILES\n"
1796          "                              provided will be loaded from these\n"
1797          "                              FileDescriptorSets. If a "
1798          "FileDescriptor\n"
1799          "                              appears multiple times, the first "
1800          "occurrence\n"
1801          "                              will be used.\n"
1802          "  -oFILE,                     Writes a FileDescriptorSet (a protocol "
1803          "buffer,\n"
1804          "    --descriptor_set_out=FILE defined in descriptor.proto) "
1805          "containing all of\n"
1806          "                              the input files to FILE.\n"
1807          "  --include_imports           When using --descriptor_set_out, also "
1808          "include\n"
1809          "                              all dependencies of the input files in "
1810          "the\n"
1811          "                              set, so that the set is "
1812          "self-contained.\n"
1813          "  --include_source_info       When using --descriptor_set_out, do "
1814          "not strip\n"
1815          "                              SourceCodeInfo from the "
1816          "FileDescriptorProto.\n"
1817          "                              This results in vastly larger "
1818          "descriptors that\n"
1819          "                              include information about the "
1820          "original\n"
1821          "                              location of each decl in the source "
1822          "file as\n"
1823          "                              well as surrounding comments.\n"
1824          "  --dependency_out=FILE       Write a dependency output file in the "
1825          "format\n"
1826          "                              expected by make. This writes the "
1827          "transitive\n"
1828          "                              set of input file paths to FILE\n"
1829          "  --error_format=FORMAT       Set the format in which to print "
1830          "errors.\n"
1831          "                              FORMAT may be 'gcc' (the default) or "
1832          "'msvs'\n"
1833          "                              (Microsoft Visual Studio format).\n"
1834          "  --print_free_field_numbers  Print the free field numbers of the "
1835          "messages\n"
1836          "                              defined in the given proto files. "
1837          "Groups share\n"
1838          "                              the same field number space with the "
1839          "parent \n"
1840          "                              message. Extension ranges are counted "
1841          "as \n"
1842          "                              occupied fields numbers.\n"
1843       << std::endl;
1844   if (!plugin_prefix_.empty()) {
1845     std::cout
1846         << "  --plugin=EXECUTABLE         Specifies a plugin executable to "
1847            "use.\n"
1848            "                              Normally, protoc searches the PATH "
1849            "for\n"
1850            "                              plugins, but you may specify "
1851            "additional\n"
1852            "                              executables not in the path using "
1853            "this flag.\n"
1854            "                              Additionally, EXECUTABLE may be of "
1855            "the form\n"
1856            "                              NAME=PATH, in which case the given "
1857            "plugin name\n"
1858            "                              is mapped to the given executable "
1859            "even if\n"
1860            "                              the executable's own name differs."
1861         << std::endl;
1862   }
1863 
1864   for (GeneratorMap::iterator iter = generators_by_flag_name_.begin();
1865        iter != generators_by_flag_name_.end(); ++iter) {
1866     // FIXME(kenton):  If the text is long enough it will wrap, which is ugly,
1867     //   but fixing this nicely (e.g. splitting on spaces) is probably more
1868     //   trouble than it's worth.
1869     std::cout << "  " << iter->first << "=OUT_DIR "
1870               << std::string(19 - iter->first.size(),
1871                              ' ')  // Spaces for alignment.
1872               << iter->second.help_text << std::endl;
1873   }
1874   std::cout << "  @<filename>                 Read options and filenames from "
1875                "file. If a\n"
1876                "                              relative file path is specified, "
1877                "the file\n"
1878                "                              will be searched in the working "
1879                "directory.\n"
1880                "                              The --proto_path option will not "
1881                "affect how\n"
1882                "                              this argument file is searched. "
1883                "Content of\n"
1884                "                              the file will be expanded in the "
1885                "position of\n"
1886                "                              @<filename> as in the argument "
1887                "list. Note\n"
1888                "                              that shell expansion is not "
1889                "applied to the\n"
1890                "                              content of the file (i.e., you "
1891                "cannot use\n"
1892                "                              quotes, wildcards, escapes, "
1893                "commands, etc.).\n"
1894                "                              Each line corresponds to a "
1895                "single argument,\n"
1896                "                              even if it contains spaces."
1897             << std::endl;
1898 }
1899 
GenerateOutput(const std::vector<const FileDescriptor * > & parsed_files,const OutputDirective & output_directive,GeneratorContext * generator_context)1900 bool CommandLineInterface::GenerateOutput(
1901     const std::vector<const FileDescriptor*>& parsed_files,
1902     const OutputDirective& output_directive,
1903     GeneratorContext* generator_context) {
1904   // Call the generator.
1905   std::string error;
1906   if (output_directive.generator == NULL) {
1907     // This is a plugin.
1908     GOOGLE_CHECK(HasPrefixString(output_directive.name, "--") &&
1909           HasSuffixString(output_directive.name, "_out"))
1910         << "Bad name for plugin generator: " << output_directive.name;
1911 
1912     std::string plugin_name = PluginName(plugin_prefix_, output_directive.name);
1913     std::string parameters = output_directive.parameter;
1914     if (!plugin_parameters_[plugin_name].empty()) {
1915       if (!parameters.empty()) {
1916         parameters.append(",");
1917       }
1918       parameters.append(plugin_parameters_[plugin_name]);
1919     }
1920     if (!GeneratePluginOutput(parsed_files, plugin_name, parameters,
1921                               generator_context, &error)) {
1922       std::cerr << output_directive.name << ": " << error << std::endl;
1923       return false;
1924     }
1925   } else {
1926     // Regular generator.
1927     std::string parameters = output_directive.parameter;
1928     if (!generator_parameters_[output_directive.name].empty()) {
1929       if (!parameters.empty()) {
1930         parameters.append(",");
1931       }
1932       parameters.append(generator_parameters_[output_directive.name]);
1933     }
1934     if (!output_directive.generator->GenerateAll(parsed_files, parameters,
1935                                                  generator_context, &error)) {
1936       // Generator returned an error.
1937       std::cerr << output_directive.name << ": " << error << std::endl;
1938       return false;
1939     }
1940   }
1941 
1942   return true;
1943 }
1944 
GenerateDependencyManifestFile(const std::vector<const FileDescriptor * > & parsed_files,const GeneratorContextMap & output_directories,DiskSourceTree * source_tree)1945 bool CommandLineInterface::GenerateDependencyManifestFile(
1946     const std::vector<const FileDescriptor*>& parsed_files,
1947     const GeneratorContextMap& output_directories,
1948     DiskSourceTree* source_tree) {
1949   FileDescriptorSet file_set;
1950 
1951   std::set<const FileDescriptor*> already_seen;
1952   for (int i = 0; i < parsed_files.size(); i++) {
1953     GetTransitiveDependencies(parsed_files[i], false, false, &already_seen,
1954                               file_set.mutable_file());
1955   }
1956 
1957   std::vector<std::string> output_filenames;
1958   for (GeneratorContextMap::const_iterator iter = output_directories.begin();
1959        iter != output_directories.end(); ++iter) {
1960     const std::string& location = iter->first;
1961     GeneratorContextImpl* directory = iter->second;
1962     std::vector<std::string> relative_output_filenames;
1963     directory->GetOutputFilenames(&relative_output_filenames);
1964     for (int i = 0; i < relative_output_filenames.size(); i++) {
1965       std::string output_filename = location + relative_output_filenames[i];
1966       if (output_filename.compare(0, 2, "./") == 0) {
1967         output_filename = output_filename.substr(2);
1968       }
1969       output_filenames.push_back(output_filename);
1970     }
1971   }
1972 
1973   int fd;
1974   do {
1975     fd = open(dependency_out_name_.c_str(),
1976               O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
1977   } while (fd < 0 && errno == EINTR);
1978 
1979   if (fd < 0) {
1980     perror(dependency_out_name_.c_str());
1981     return false;
1982   }
1983 
1984   io::FileOutputStream out(fd);
1985   io::Printer printer(&out, '$');
1986 
1987   for (int i = 0; i < output_filenames.size(); i++) {
1988     printer.Print(output_filenames[i].c_str());
1989     if (i == output_filenames.size() - 1) {
1990       printer.Print(":");
1991     } else {
1992       printer.Print(" \\\n");
1993     }
1994   }
1995 
1996   for (int i = 0; i < file_set.file_size(); i++) {
1997     const FileDescriptorProto& file = file_set.file(i);
1998     const std::string& virtual_file = file.name();
1999     std::string disk_file;
2000     if (source_tree &&
2001         source_tree->VirtualFileToDiskFile(virtual_file, &disk_file)) {
2002       printer.Print(" $disk_file$", "disk_file", disk_file);
2003       if (i < file_set.file_size() - 1) printer.Print("\\\n");
2004     } else {
2005       std::cerr << "Unable to identify path for file " << virtual_file
2006                 << std::endl;
2007       return false;
2008     }
2009   }
2010 
2011   return true;
2012 }
2013 
GeneratePluginOutput(const std::vector<const FileDescriptor * > & parsed_files,const std::string & plugin_name,const std::string & parameter,GeneratorContext * generator_context,std::string * error)2014 bool CommandLineInterface::GeneratePluginOutput(
2015     const std::vector<const FileDescriptor*>& parsed_files,
2016     const std::string& plugin_name, const std::string& parameter,
2017     GeneratorContext* generator_context, std::string* error) {
2018   CodeGeneratorRequest request;
2019   CodeGeneratorResponse response;
2020   std::string processed_parameter = parameter;
2021 
2022 
2023   // Build the request.
2024   if (!processed_parameter.empty()) {
2025     request.set_parameter(processed_parameter);
2026   }
2027 
2028 
2029   std::set<const FileDescriptor*> already_seen;
2030   for (int i = 0; i < parsed_files.size(); i++) {
2031     request.add_file_to_generate(parsed_files[i]->name());
2032     GetTransitiveDependencies(parsed_files[i],
2033                               true,  // Include json_name for plugins.
2034                               true,  // Include source code info.
2035                               &already_seen, request.mutable_proto_file());
2036   }
2037 
2038   google::protobuf::compiler::Version* version =
2039       request.mutable_compiler_version();
2040   version->set_major(PROTOBUF_VERSION / 1000000);
2041   version->set_minor(PROTOBUF_VERSION / 1000 % 1000);
2042   version->set_patch(PROTOBUF_VERSION % 1000);
2043   version->set_suffix(PROTOBUF_VERSION_SUFFIX);
2044 
2045   // Invoke the plugin.
2046   Subprocess subprocess;
2047 
2048   if (plugins_.count(plugin_name) > 0) {
2049     subprocess.Start(plugins_[plugin_name], Subprocess::EXACT_NAME);
2050   } else {
2051     subprocess.Start(plugin_name, Subprocess::SEARCH_PATH);
2052   }
2053 
2054   std::string communicate_error;
2055   if (!subprocess.Communicate(request, &response, &communicate_error)) {
2056     *error = strings::Substitute("$0: $1", plugin_name, communicate_error);
2057     return false;
2058   }
2059 
2060   // Write the files.  We do this even if there was a generator error in order
2061   // to match the behavior of a compiled-in generator.
2062   std::unique_ptr<io::ZeroCopyOutputStream> current_output;
2063   for (int i = 0; i < response.file_size(); i++) {
2064     const CodeGeneratorResponse::File& output_file = response.file(i);
2065 
2066     if (!output_file.insertion_point().empty()) {
2067       std::string filename = output_file.name();
2068       // Open a file for insert.
2069       // We reset current_output to NULL first so that the old file is closed
2070       // before the new one is opened.
2071       current_output.reset();
2072       current_output.reset(generator_context->OpenForInsert(
2073           filename, output_file.insertion_point()));
2074     } else if (!output_file.name().empty()) {
2075       // Starting a new file.  Open it.
2076       // We reset current_output to NULL first so that the old file is closed
2077       // before the new one is opened.
2078       current_output.reset();
2079       current_output.reset(generator_context->Open(output_file.name()));
2080     } else if (current_output == NULL) {
2081       *error = strings::Substitute(
2082           "$0: First file chunk returned by plugin did not specify a file "
2083           "name.",
2084           plugin_name);
2085       return false;
2086     }
2087 
2088     // Use CodedOutputStream for convenience; otherwise we'd need to provide
2089     // our own buffer-copying loop.
2090     io::CodedOutputStream writer(current_output.get());
2091     writer.WriteString(output_file.content());
2092   }
2093 
2094   // Check for errors.
2095   if (!response.error().empty()) {
2096     // Generator returned an error.
2097     *error = response.error();
2098     return false;
2099   }
2100 
2101   return true;
2102 }
2103 
EncodeOrDecode(const DescriptorPool * pool)2104 bool CommandLineInterface::EncodeOrDecode(const DescriptorPool* pool) {
2105   // Look up the type.
2106   const Descriptor* type = pool->FindMessageTypeByName(codec_type_);
2107   if (type == NULL) {
2108     std::cerr << "Type not defined: " << codec_type_ << std::endl;
2109     return false;
2110   }
2111 
2112   DynamicMessageFactory dynamic_factory(pool);
2113   std::unique_ptr<Message> message(dynamic_factory.GetPrototype(type)->New());
2114 
2115   if (mode_ == MODE_ENCODE) {
2116     SetFdToTextMode(STDIN_FILENO);
2117     SetFdToBinaryMode(STDOUT_FILENO);
2118   } else {
2119     SetFdToBinaryMode(STDIN_FILENO);
2120     SetFdToTextMode(STDOUT_FILENO);
2121   }
2122 
2123   io::FileInputStream in(STDIN_FILENO);
2124   io::FileOutputStream out(STDOUT_FILENO);
2125 
2126   if (mode_ == MODE_ENCODE) {
2127     // Input is text.
2128     ErrorPrinter error_collector(error_format_);
2129     TextFormat::Parser parser;
2130     parser.RecordErrorsTo(&error_collector);
2131     parser.AllowPartialMessage(true);
2132 
2133     if (!parser.Parse(&in, message.get())) {
2134       std::cerr << "Failed to parse input." << std::endl;
2135       return false;
2136     }
2137   } else {
2138     // Input is binary.
2139     if (!message->ParsePartialFromZeroCopyStream(&in)) {
2140       std::cerr << "Failed to parse input." << std::endl;
2141       return false;
2142     }
2143   }
2144 
2145   if (!message->IsInitialized()) {
2146     std::cerr << "warning:  Input message is missing required fields:  "
2147               << message->InitializationErrorString() << std::endl;
2148   }
2149 
2150   if (mode_ == MODE_ENCODE) {
2151     // Output is binary.
2152     if (!message->SerializePartialToZeroCopyStream(&out)) {
2153       std::cerr << "output: I/O error." << std::endl;
2154       return false;
2155     }
2156   } else {
2157     // Output is text.
2158     if (!TextFormat::Print(*message, &out)) {
2159       std::cerr << "output: I/O error." << std::endl;
2160       return false;
2161     }
2162   }
2163 
2164   return true;
2165 }
2166 
WriteDescriptorSet(const std::vector<const FileDescriptor * > & parsed_files)2167 bool CommandLineInterface::WriteDescriptorSet(
2168     const std::vector<const FileDescriptor*>& parsed_files) {
2169   FileDescriptorSet file_set;
2170 
2171   std::set<const FileDescriptor*> already_seen;
2172   if (!imports_in_descriptor_set_) {
2173     // Since we don't want to output transitive dependencies, but we do want
2174     // things to be in dependency order, add all dependencies that aren't in
2175     // parsed_files to already_seen.  This will short circuit the recursion
2176     // in GetTransitiveDependencies.
2177     std::set<const FileDescriptor*> to_output;
2178     to_output.insert(parsed_files.begin(), parsed_files.end());
2179     for (int i = 0; i < parsed_files.size(); i++) {
2180       const FileDescriptor* file = parsed_files[i];
2181       for (int i = 0; i < file->dependency_count(); i++) {
2182         const FileDescriptor* dependency = file->dependency(i);
2183         // if the dependency isn't in parsed files, mark it as already seen
2184         if (to_output.find(dependency) == to_output.end()) {
2185           already_seen.insert(dependency);
2186         }
2187       }
2188     }
2189   }
2190   for (int i = 0; i < parsed_files.size(); i++) {
2191     GetTransitiveDependencies(parsed_files[i],
2192                               true,  // Include json_name
2193                               source_info_in_descriptor_set_, &already_seen,
2194                               file_set.mutable_file());
2195   }
2196 
2197   int fd;
2198   do {
2199     fd = open(descriptor_set_out_name_.c_str(),
2200               O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
2201   } while (fd < 0 && errno == EINTR);
2202 
2203   if (fd < 0) {
2204     perror(descriptor_set_out_name_.c_str());
2205     return false;
2206   }
2207 
2208   io::FileOutputStream out(fd);
2209   if (!file_set.SerializeToZeroCopyStream(&out)) {
2210     std::cerr << descriptor_set_out_name_ << ": " << strerror(out.GetErrno())
2211               << std::endl;
2212     out.Close();
2213     return false;
2214   }
2215   if (!out.Close()) {
2216     std::cerr << descriptor_set_out_name_ << ": " << strerror(out.GetErrno())
2217               << std::endl;
2218     return false;
2219   }
2220 
2221   return true;
2222 }
2223 
GetTransitiveDependencies(const FileDescriptor * file,bool include_json_name,bool include_source_code_info,std::set<const FileDescriptor * > * already_seen,RepeatedPtrField<FileDescriptorProto> * output)2224 void CommandLineInterface::GetTransitiveDependencies(
2225     const FileDescriptor* file, bool include_json_name,
2226     bool include_source_code_info,
2227     std::set<const FileDescriptor*>* already_seen,
2228     RepeatedPtrField<FileDescriptorProto>* output) {
2229   if (!already_seen->insert(file).second) {
2230     // Already saw this file.  Skip.
2231     return;
2232   }
2233 
2234   // Add all dependencies.
2235   for (int i = 0; i < file->dependency_count(); i++) {
2236     GetTransitiveDependencies(file->dependency(i), include_json_name,
2237                               include_source_code_info, already_seen, output);
2238   }
2239 
2240   // Add this file.
2241   FileDescriptorProto* new_descriptor = output->Add();
2242   file->CopyTo(new_descriptor);
2243   if (include_json_name) {
2244     file->CopyJsonNameTo(new_descriptor);
2245   }
2246   if (include_source_code_info) {
2247     file->CopySourceCodeInfoTo(new_descriptor);
2248   }
2249 }
2250 
2251 namespace {
2252 
2253 // Utility function for PrintFreeFieldNumbers.
2254 // Stores occupied ranges into the ranges parameter, and next level of sub
2255 // message types into the nested_messages parameter.  The FieldRange is left
2256 // inclusive, right exclusive. i.e. [a, b).
2257 //
2258 // Nested Messages:
2259 // Note that it only stores the nested message type, iff the nested type is
2260 // either a direct child of the given descriptor, or the nested type is a
2261 // decendent of the given descriptor and all the nodes between the
2262 // nested type and the given descriptor are group types. e.g.
2263 //
2264 // message Foo {
2265 //   message Bar {
2266 //     message NestedBar {}
2267 //   }
2268 //   group Baz = 1 {
2269 //     group NestedBazGroup = 2 {
2270 //       message Quz {
2271 //         message NestedQuz {}
2272 //       }
2273 //     }
2274 //     message NestedBaz {}
2275 //   }
2276 // }
2277 //
2278 // In this case, Bar, Quz and NestedBaz will be added into the nested types.
2279 // Since free field numbers of group types will not be printed, this makes sure
2280 // the nested message types in groups will not be dropped. The nested_messages
2281 // parameter will contain the direct children (when groups are ignored in the
2282 // tree) of the given descriptor for the caller to traverse. The declaration
2283 // order of the nested messages is also preserved.
2284 typedef std::pair<int, int> FieldRange;
GatherOccupiedFieldRanges(const Descriptor * descriptor,std::set<FieldRange> * ranges,std::vector<const Descriptor * > * nested_messages)2285 void GatherOccupiedFieldRanges(
2286     const Descriptor* descriptor, std::set<FieldRange>* ranges,
2287     std::vector<const Descriptor*>* nested_messages) {
2288   std::set<const Descriptor*> groups;
2289   for (int i = 0; i < descriptor->field_count(); ++i) {
2290     const FieldDescriptor* fd = descriptor->field(i);
2291     ranges->insert(FieldRange(fd->number(), fd->number() + 1));
2292     if (fd->type() == FieldDescriptor::TYPE_GROUP) {
2293       groups.insert(fd->message_type());
2294     }
2295   }
2296   for (int i = 0; i < descriptor->extension_range_count(); ++i) {
2297     ranges->insert(FieldRange(descriptor->extension_range(i)->start,
2298                               descriptor->extension_range(i)->end));
2299   }
2300   for (int i = 0; i < descriptor->reserved_range_count(); ++i) {
2301     ranges->insert(FieldRange(descriptor->reserved_range(i)->start,
2302                               descriptor->reserved_range(i)->end));
2303   }
2304   // Handle the nested messages/groups in declaration order to make it
2305   // post-order strict.
2306   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
2307     const Descriptor* nested_desc = descriptor->nested_type(i);
2308     if (groups.find(nested_desc) != groups.end()) {
2309       GatherOccupiedFieldRanges(nested_desc, ranges, nested_messages);
2310     } else {
2311       nested_messages->push_back(nested_desc);
2312     }
2313   }
2314 }
2315 
2316 // Utility function for PrintFreeFieldNumbers.
2317 // Actually prints the formatted free field numbers for given message name and
2318 // occupied ranges.
FormatFreeFieldNumbers(const std::string & name,const std::set<FieldRange> & ranges)2319 void FormatFreeFieldNumbers(const std::string& name,
2320                             const std::set<FieldRange>& ranges) {
2321   std::string output;
2322   StringAppendF(&output, "%-35s free:", name.c_str());
2323   int next_free_number = 1;
2324   for (std::set<FieldRange>::const_iterator i = ranges.begin();
2325        i != ranges.end(); ++i) {
2326     // This happens when groups re-use parent field numbers, in which
2327     // case we skip the FieldRange entirely.
2328     if (next_free_number >= i->second) continue;
2329 
2330     if (next_free_number < i->first) {
2331       if (next_free_number + 1 == i->first) {
2332         // Singleton
2333         StringAppendF(&output, " %d", next_free_number);
2334       } else {
2335         // Range
2336         StringAppendF(&output, " %d-%d", next_free_number, i->first - 1);
2337       }
2338     }
2339     next_free_number = i->second;
2340   }
2341   if (next_free_number <= FieldDescriptor::kMaxNumber) {
2342     StringAppendF(&output, " %d-INF", next_free_number);
2343   }
2344   std::cout << output << std::endl;
2345 }
2346 
2347 }  // namespace
2348 
PrintFreeFieldNumbers(const Descriptor * descriptor)2349 void CommandLineInterface::PrintFreeFieldNumbers(const Descriptor* descriptor) {
2350   std::set<FieldRange> ranges;
2351   std::vector<const Descriptor*> nested_messages;
2352   GatherOccupiedFieldRanges(descriptor, &ranges, &nested_messages);
2353 
2354   for (int i = 0; i < nested_messages.size(); ++i) {
2355     PrintFreeFieldNumbers(nested_messages[i]);
2356   }
2357   FormatFreeFieldNumbers(descriptor->full_name(), ranges);
2358 }
2359 
2360 
2361 }  // namespace compiler
2362 }  // namespace protobuf
2363 }  // namespace google
2364