1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/compiler/command_line_interface.h>
36 #include <google/protobuf/stubs/platform_macros.h>
37 
38 #include <stdio.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <fcntl.h>
42 #ifdef _MSC_VER
43 #include <io.h>
44 #include <direct.h>
45 #else
46 #include <unistd.h>
47 #endif
48 #include <errno.h>
49 #include <fstream>
50 #include <iostream>
51 #include <ctype.h>
52 
53 #ifdef GOOGLE_PROTOBUF_ARCH_SPARC
54 #include <limits.h> //For PATH_MAX
55 #endif
56 
57 #include <memory>
58 #ifndef _SHARED_PTR_H
59 #include <google/protobuf/stubs/shared_ptr.h>
60 #endif
61 
62 #ifdef __APPLE__
63 #include <mach-o/dyld.h>
64 #endif
65 
66 #include <google/protobuf/stubs/common.h>
67 #include <google/protobuf/stubs/stringprintf.h>
68 #include <google/protobuf/compiler/importer.h>
69 #include <google/protobuf/compiler/code_generator.h>
70 #include <google/protobuf/compiler/plugin.pb.h>
71 #include <google/protobuf/compiler/subprocess.h>
72 #include <google/protobuf/compiler/zip_writer.h>
73 #include <google/protobuf/descriptor.h>
74 #include <google/protobuf/text_format.h>
75 #include <google/protobuf/dynamic_message.h>
76 #include <google/protobuf/io/coded_stream.h>
77 #include <google/protobuf/io/zero_copy_stream_impl.h>
78 #include <google/protobuf/io/printer.h>
79 #include <google/protobuf/stubs/logging.h>
80 #include <google/protobuf/stubs/strutil.h>
81 #include <google/protobuf/stubs/substitute.h>
82 #include <google/protobuf/stubs/map_util.h>
83 #include <google/protobuf/stubs/stl_util.h>
84 
85 
86 namespace google {
87 namespace protobuf {
88 namespace compiler {
89 
90 #if defined(_WIN32)
91 #define mkdir(name, mode) mkdir(name)
92 #ifndef W_OK
93 #define W_OK 02  // not defined by MSVC for whatever reason
94 #endif
95 #ifndef F_OK
96 #define F_OK 00  // not defined by MSVC for whatever reason
97 #endif
98 #ifndef STDIN_FILENO
99 #define STDIN_FILENO 0
100 #endif
101 #ifndef STDOUT_FILENO
102 #define STDOUT_FILENO 1
103 #endif
104 #endif
105 
106 #ifndef O_BINARY
107 #ifdef _O_BINARY
108 #define O_BINARY _O_BINARY
109 #else
110 #define O_BINARY 0     // If this isn't defined, the platform doesn't need it.
111 #endif
112 #endif
113 
114 namespace {
115 #if defined(_WIN32) && !defined(__CYGWIN__)
116 static const char* kPathSeparator = ";";
117 #else
118 static const char* kPathSeparator = ":";
119 #endif
120 
121 // Returns true if the text looks like a Windows-style absolute path, starting
122 // with a drive letter.  Example:  "C:\foo".  TODO(kenton):  Share this with
123 // copy in importer.cc?
IsWindowsAbsolutePath(const string & text)124 static bool IsWindowsAbsolutePath(const string& text) {
125 #if defined(_WIN32) || defined(__CYGWIN__)
126   return text.size() >= 3 && text[1] == ':' &&
127          isalpha(text[0]) &&
128          (text[2] == '/' || text[2] == '\\') &&
129          text.find_last_of(':') == 1;
130 #else
131   return false;
132 #endif
133 }
134 
SetFdToTextMode(int fd)135 void SetFdToTextMode(int fd) {
136 #ifdef _WIN32
137   if (_setmode(fd, _O_TEXT) == -1) {
138     // This should never happen, I think.
139     GOOGLE_LOG(WARNING) << "_setmode(" << fd << ", _O_TEXT): " << strerror(errno);
140   }
141 #endif
142   // (Text and binary are the same on non-Windows platforms.)
143 }
144 
SetFdToBinaryMode(int fd)145 void SetFdToBinaryMode(int fd) {
146 #ifdef _WIN32
147   if (_setmode(fd, _O_BINARY) == -1) {
148     // This should never happen, I think.
149     GOOGLE_LOG(WARNING) << "_setmode(" << fd << ", _O_BINARY): " << strerror(errno);
150   }
151 #endif
152   // (Text and binary are the same on non-Windows platforms.)
153 }
154 
AddTrailingSlash(string * path)155 void AddTrailingSlash(string* path) {
156   if (!path->empty() && path->at(path->size() - 1) != '/') {
157     path->push_back('/');
158   }
159 }
160 
VerifyDirectoryExists(const string & path)161 bool VerifyDirectoryExists(const string& path) {
162   if (path.empty()) return true;
163 
164   if (access(path.c_str(), F_OK) == -1) {
165     std::cerr << path << ": " << strerror(errno) << std::endl;
166     return false;
167   } else {
168     return true;
169   }
170 }
171 
172 // Try to create the parent directory of the given file, creating the parent's
173 // parent if necessary, and so on.  The full file name is actually
174 // (prefix + filename), but we assume |prefix| already exists and only create
175 // directories listed in |filename|.
TryCreateParentDirectory(const string & prefix,const string & filename)176 bool TryCreateParentDirectory(const string& prefix, const string& filename) {
177   // Recursively create parent directories to the output file.
178   vector<string> parts = Split(filename, "/", true);
179   string path_so_far = prefix;
180   for (int i = 0; i < parts.size() - 1; i++) {
181     path_so_far += parts[i];
182     if (mkdir(path_so_far.c_str(), 0777) != 0) {
183       if (errno != EEXIST) {
184         std::cerr << filename << ": while trying to create directory "
185                   << path_so_far << ": " << strerror(errno) << std::endl;
186         return false;
187       }
188     }
189     path_so_far += '/';
190   }
191 
192   return true;
193 }
194 
195 // Get the absolute path of this protoc binary.
GetProtocAbsolutePath(string * path)196 bool GetProtocAbsolutePath(string* path) {
197 #ifdef _WIN32
198   char buffer[MAX_PATH];
199   int len = GetModuleFileNameA(NULL, buffer, MAX_PATH);
200 #elif __APPLE__
201   char buffer[PATH_MAX];
202   int len = 0;
203 
204   char dirtybuffer[PATH_MAX];
205   uint32_t size = sizeof(dirtybuffer);
206   if (_NSGetExecutablePath(dirtybuffer, &size) == 0) {
207     realpath(dirtybuffer, buffer);
208     len = strlen(buffer);
209   }
210 #else
211   char buffer[PATH_MAX];
212   int len = readlink("/proc/self/exe", buffer, PATH_MAX);
213 #endif
214   if (len > 0) {
215     path->assign(buffer, len);
216     return true;
217   } else {
218     return false;
219   }
220 }
221 
222 // Whether a path is where google/protobuf/descriptor.proto and other well-known
223 // type protos are installed.
IsInstalledProtoPath(const string & path)224 bool IsInstalledProtoPath(const string& path) {
225   // Checking the descriptor.proto file should be good enough.
226   string file_path = path + "/google/protobuf/descriptor.proto";
227   return access(file_path.c_str(), F_OK) != -1;
228 }
229 
230 // Add the paths where google/protobuf/descritor.proto and other well-known
231 // type protos are installed.
AddDefaultProtoPaths(vector<pair<string,string>> * paths)232 void AddDefaultProtoPaths(vector<pair<string, string> >* paths) {
233   // TODO(xiaofeng): The code currently only checks relative paths of where
234   // the protoc binary is installed. We probably should make it handle more
235   // cases than that.
236   string path;
237   if (!GetProtocAbsolutePath(&path)) {
238     return;
239   }
240   // Strip the binary name.
241   size_t pos = path.find_last_of("/\\");
242   if (pos == string::npos || pos == 0) {
243     return;
244   }
245   path = path.substr(0, pos);
246   // Check the binary's directory.
247   if (IsInstalledProtoPath(path)) {
248     paths->push_back(pair<string, string>("", path));
249     return;
250   }
251   // Check if there is an include subdirectory.
252   if (IsInstalledProtoPath(path + "/include")) {
253     paths->push_back(pair<string, string>("", path + "/include"));
254     return;
255   }
256   // Check if the upper level directory has an "include" subdirectory.
257   pos = path.find_last_of("/\\");
258   if (pos == string::npos || pos == 0) {
259     return;
260   }
261   path = path.substr(0, pos);
262   if (IsInstalledProtoPath(path + "/include")) {
263     paths->push_back(pair<string, string>("", path + "/include"));
264     return;
265   }
266 }
267 }  // namespace
268 
269 // A MultiFileErrorCollector that prints errors to stderr.
270 class CommandLineInterface::ErrorPrinter : public MultiFileErrorCollector,
271                                            public io::ErrorCollector {
272  public:
ErrorPrinter(ErrorFormat format,DiskSourceTree * tree=NULL)273   ErrorPrinter(ErrorFormat format, DiskSourceTree *tree = NULL)
274     : format_(format), tree_(tree) {}
~ErrorPrinter()275   ~ErrorPrinter() {}
276 
277   // implements MultiFileErrorCollector ------------------------------
AddError(const string & filename,int line,int column,const string & message)278   void AddError(const string& filename, int line, int column,
279                 const string& message) {
280     AddErrorOrWarning(filename, line, column, message, "error", std::cerr);
281   }
282 
AddWarning(const string & filename,int line,int column,const string & message)283   void AddWarning(const string& filename, int line, int column,
284                   const string& message) {
285     AddErrorOrWarning(filename, line, column, message, "warning", std::clog);
286   }
287 
288   // implements io::ErrorCollector -----------------------------------
AddError(int line,int column,const string & message)289   void AddError(int line, int column, const string& message) {
290     AddError("input", line, column, message);
291   }
292 
AddWarning(int line,int column,const string & message)293   void AddWarning(int line, int column, const string& message) {
294     AddErrorOrWarning("input", line, column, message, "warning", std::clog);
295   }
296 
297  private:
AddErrorOrWarning(const string & filename,int line,int column,const string & message,const string & type,ostream & out)298   void AddErrorOrWarning(
299       const string& filename, int line, int column,
300       const string& message, const string& type, ostream& out) {
301     // Print full path when running under MSVS
302     string dfile;
303     if (format_ == CommandLineInterface::ERROR_FORMAT_MSVS &&
304         tree_ != NULL &&
305         tree_->VirtualFileToDiskFile(filename, &dfile)) {
306       out << dfile;
307     } else {
308       out << filename;
309     }
310 
311     // Users typically expect 1-based line/column numbers, so we add 1
312     // to each here.
313     if (line != -1) {
314       // Allow for both GCC- and Visual-Studio-compatible output.
315       switch (format_) {
316         case CommandLineInterface::ERROR_FORMAT_GCC:
317           out << ":" << (line + 1) << ":" << (column + 1);
318           break;
319         case CommandLineInterface::ERROR_FORMAT_MSVS:
320           out << "(" << (line + 1) << ") : "
321               << type << " in column=" << (column + 1);
322           break;
323       }
324     }
325 
326     if (type == "warning") {
327       out << ": warning: " << message << std::endl;
328     } else {
329       out << ": " << message << std::endl;
330     }
331   }
332 
333   const ErrorFormat format_;
334   DiskSourceTree *tree_;
335 };
336 
337 // -------------------------------------------------------------------
338 
339 // A GeneratorContext implementation that buffers files in memory, then dumps
340 // them all to disk on demand.
341 class CommandLineInterface::GeneratorContextImpl : public GeneratorContext {
342  public:
343   GeneratorContextImpl(const vector<const FileDescriptor*>& parsed_files);
344   ~GeneratorContextImpl();
345 
346   // Write all files in the directory to disk at the given output location,
347   // which must end in a '/'.
348   bool WriteAllToDisk(const string& prefix);
349 
350   // Write the contents of this directory to a ZIP-format archive with the
351   // given name.
352   bool WriteAllToZip(const string& filename);
353 
354   // Add a boilerplate META-INF/MANIFEST.MF file as required by the Java JAR
355   // format, unless one has already been written.
356   void AddJarManifest();
357 
358   // Get name of all output files.
359   void GetOutputFilenames(vector<string>* output_filenames);
360 
361   // implements GeneratorContext --------------------------------------
362   io::ZeroCopyOutputStream* Open(const string& filename);
363   io::ZeroCopyOutputStream* OpenForAppend(const string& filename);
364   io::ZeroCopyOutputStream* OpenForInsert(
365       const string& filename, const string& insertion_point);
ListParsedFiles(vector<const FileDescriptor * > * output)366   void ListParsedFiles(vector<const FileDescriptor*>* output) {
367     *output = parsed_files_;
368   }
369 
370  private:
371   friend class MemoryOutputStream;
372 
373   // map instead of hash_map so that files are written in order (good when
374   // writing zips).
375   map<string, string*> files_;
376   const vector<const FileDescriptor*>& parsed_files_;
377   bool had_error_;
378 };
379 
380 class CommandLineInterface::MemoryOutputStream
381     : public io::ZeroCopyOutputStream {
382  public:
383   MemoryOutputStream(GeneratorContextImpl* directory, const string& filename,
384                      bool append_mode);
385   MemoryOutputStream(GeneratorContextImpl* directory, const string& filename,
386                      const string& insertion_point);
387   virtual ~MemoryOutputStream();
388 
389   // implements ZeroCopyOutputStream ---------------------------------
Next(void ** data,int * size)390   virtual bool Next(void** data, int* size) { return inner_->Next(data, size); }
BackUp(int count)391   virtual void BackUp(int count)            {        inner_->BackUp(count);    }
ByteCount() const392   virtual int64 ByteCount() const           { return inner_->ByteCount();      }
393 
394  private:
395   // Where to insert the string when it's done.
396   GeneratorContextImpl* directory_;
397   string filename_;
398   string insertion_point_;
399 
400   // The string we're building.
401   string data_;
402 
403   // Whether we should append the output stream to the existing file.
404   bool append_mode_;
405 
406   // StringOutputStream writing to data_.
407   google::protobuf::scoped_ptr<io::StringOutputStream> inner_;
408 };
409 
410 // -------------------------------------------------------------------
411 
GeneratorContextImpl(const vector<const FileDescriptor * > & parsed_files)412 CommandLineInterface::GeneratorContextImpl::GeneratorContextImpl(
413     const vector<const FileDescriptor*>& parsed_files)
414     : parsed_files_(parsed_files),
415       had_error_(false) {
416 }
417 
~GeneratorContextImpl()418 CommandLineInterface::GeneratorContextImpl::~GeneratorContextImpl() {
419   STLDeleteValues(&files_);
420 }
421 
WriteAllToDisk(const string & prefix)422 bool CommandLineInterface::GeneratorContextImpl::WriteAllToDisk(
423     const string& prefix) {
424   if (had_error_) {
425     return false;
426   }
427 
428   if (!VerifyDirectoryExists(prefix)) {
429     return false;
430   }
431 
432   for (map<string, string*>::const_iterator iter = files_.begin();
433        iter != files_.end(); ++iter) {
434     const string& relative_filename = iter->first;
435     const char* data = iter->second->data();
436     int size = iter->second->size();
437 
438     if (!TryCreateParentDirectory(prefix, relative_filename)) {
439       return false;
440     }
441     string filename = prefix + relative_filename;
442 
443     // Create the output file.
444     int file_descriptor;
445     do {
446       file_descriptor =
447         open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
448     } while (file_descriptor < 0 && errno == EINTR);
449 
450     if (file_descriptor < 0) {
451       int error = errno;
452       std::cerr << filename << ": " << strerror(error);
453       return false;
454     }
455 
456     // Write the file.
457     while (size > 0) {
458       int write_result;
459       do {
460         write_result = write(file_descriptor, data, size);
461       } while (write_result < 0 && errno == EINTR);
462 
463       if (write_result <= 0) {
464         // Write error.
465 
466         // FIXME(kenton):  According to the man page, if write() returns zero,
467         //   there was no error; write() simply did not write anything.  It's
468         //   unclear under what circumstances this might happen, but presumably
469         //   errno won't be set in this case.  I am confused as to how such an
470         //   event should be handled.  For now I'm treating it as an error,
471         //   since retrying seems like it could lead to an infinite loop.  I
472         //   suspect this never actually happens anyway.
473 
474         if (write_result < 0) {
475           int error = errno;
476           std::cerr << filename << ": write: " << strerror(error);
477         } else {
478           std::cerr << filename << ": write() returned zero?" << std::endl;
479         }
480         return false;
481       }
482 
483       data += write_result;
484       size -= write_result;
485     }
486 
487     if (close(file_descriptor) != 0) {
488       int error = errno;
489       std::cerr << filename << ": close: " << strerror(error);
490       return false;
491     }
492   }
493 
494   return true;
495 }
496 
WriteAllToZip(const string & filename)497 bool CommandLineInterface::GeneratorContextImpl::WriteAllToZip(
498     const string& filename) {
499   if (had_error_) {
500     return false;
501   }
502 
503   // Create the output file.
504   int file_descriptor;
505   do {
506     file_descriptor =
507       open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
508   } while (file_descriptor < 0 && errno == EINTR);
509 
510   if (file_descriptor < 0) {
511     int error = errno;
512     std::cerr << filename << ": " << strerror(error);
513     return false;
514   }
515 
516   // Create the ZipWriter
517   io::FileOutputStream stream(file_descriptor);
518   ZipWriter zip_writer(&stream);
519 
520   for (map<string, string*>::const_iterator iter = files_.begin();
521        iter != files_.end(); ++iter) {
522     zip_writer.Write(iter->first, *iter->second);
523   }
524 
525   zip_writer.WriteDirectory();
526 
527   if (stream.GetErrno() != 0) {
528     std::cerr << filename << ": " << strerror(stream.GetErrno()) << std::endl;
529   }
530 
531   if (!stream.Close()) {
532     std::cerr << filename << ": " << strerror(stream.GetErrno()) << std::endl;
533   }
534 
535   return true;
536 }
537 
AddJarManifest()538 void CommandLineInterface::GeneratorContextImpl::AddJarManifest() {
539   string** map_slot = &files_["META-INF/MANIFEST.MF"];
540   if (*map_slot == NULL) {
541     *map_slot = new string(
542         "Manifest-Version: 1.0\n"
543         "Created-By: 1.6.0 (protoc)\n"
544         "\n");
545   }
546 }
547 
GetOutputFilenames(vector<string> * output_filenames)548 void CommandLineInterface::GeneratorContextImpl::GetOutputFilenames(
549     vector<string>* output_filenames) {
550   for (map<string, string*>::iterator iter = files_.begin();
551        iter != files_.end(); ++iter) {
552     output_filenames->push_back(iter->first);
553   }
554 }
555 
Open(const string & filename)556 io::ZeroCopyOutputStream* CommandLineInterface::GeneratorContextImpl::Open(
557     const string& filename) {
558   return new MemoryOutputStream(this, filename, false);
559 }
560 
561 io::ZeroCopyOutputStream*
OpenForAppend(const string & filename)562 CommandLineInterface::GeneratorContextImpl::OpenForAppend(
563     const string& filename) {
564   return new MemoryOutputStream(this, filename, true);
565 }
566 
567 io::ZeroCopyOutputStream*
OpenForInsert(const string & filename,const string & insertion_point)568 CommandLineInterface::GeneratorContextImpl::OpenForInsert(
569     const string& filename, const string& insertion_point) {
570   return new MemoryOutputStream(this, filename, insertion_point);
571 }
572 
573 // -------------------------------------------------------------------
574 
MemoryOutputStream(GeneratorContextImpl * directory,const string & filename,bool append_mode)575 CommandLineInterface::MemoryOutputStream::MemoryOutputStream(
576     GeneratorContextImpl* directory, const string& filename, bool append_mode)
577     : directory_(directory),
578       filename_(filename),
579       append_mode_(append_mode),
580       inner_(new io::StringOutputStream(&data_)) {
581 }
582 
MemoryOutputStream(GeneratorContextImpl * directory,const string & filename,const string & insertion_point)583 CommandLineInterface::MemoryOutputStream::MemoryOutputStream(
584     GeneratorContextImpl* directory, const string& filename,
585     const string& insertion_point)
586     : directory_(directory),
587       filename_(filename),
588       insertion_point_(insertion_point),
589       inner_(new io::StringOutputStream(&data_)) {
590 }
591 
~MemoryOutputStream()592 CommandLineInterface::MemoryOutputStream::~MemoryOutputStream() {
593   // Make sure all data has been written.
594   inner_.reset();
595 
596   // Insert into the directory.
597   string** map_slot = &directory_->files_[filename_];
598 
599   if (insertion_point_.empty()) {
600     // This was just a regular Open().
601     if (*map_slot != NULL) {
602       if (append_mode_) {
603         (*map_slot)->append(data_);
604       } else {
605         std::cerr << filename_ << ": Tried to write the same file twice."
606                   << std::endl;
607         directory_->had_error_ = true;
608       }
609       return;
610     }
611 
612     *map_slot = new string;
613     (*map_slot)->swap(data_);
614   } else {
615     // This was an OpenForInsert().
616 
617     // If the data doens't end with a clean line break, add one.
618     if (!data_.empty() && data_[data_.size() - 1] != '\n') {
619       data_.push_back('\n');
620     }
621 
622     // Find the file we are going to insert into.
623     if (*map_slot == NULL) {
624       std::cerr << filename_
625                 << ": Tried to insert into file that doesn't exist."
626                 << std::endl;
627       directory_->had_error_ = true;
628       return;
629     }
630     string* target = *map_slot;
631 
632     // Find the insertion point.
633     string magic_string = strings::Substitute(
634         "@@protoc_insertion_point($0)", insertion_point_);
635     string::size_type pos = target->find(magic_string);
636 
637     if (pos == string::npos) {
638       std::cerr << filename_ << ": insertion point \"" << insertion_point_
639                 << "\" not found." << std::endl;
640       directory_->had_error_ = true;
641       return;
642     }
643 
644     // Seek backwards to the beginning of the line, which is where we will
645     // insert the data.  Note that this has the effect of pushing the insertion
646     // point down, so the data is inserted before it.  This is intentional
647     // because it means that multiple insertions at the same point will end
648     // up in the expected order in the final output.
649     pos = target->find_last_of('\n', pos);
650     if (pos == string::npos) {
651       // Insertion point is on the first line.
652       pos = 0;
653     } else {
654       // Advance to character after '\n'.
655       ++pos;
656     }
657 
658     // Extract indent.
659     string indent_(*target, pos, target->find_first_not_of(" \t", pos) - pos);
660 
661     if (indent_.empty()) {
662       // No indent.  This makes things easier.
663       target->insert(pos, data_);
664     } else {
665       // Calculate how much space we need.
666       int indent_size = 0;
667       for (int i = 0; i < data_.size(); i++) {
668         if (data_[i] == '\n') indent_size += indent_.size();
669       }
670 
671       // Make a hole for it.
672       target->insert(pos, data_.size() + indent_size, '\0');
673 
674       // Now copy in the data.
675       string::size_type data_pos = 0;
676       char* target_ptr = string_as_array(target) + pos;
677       while (data_pos < data_.size()) {
678         // Copy indent.
679         memcpy(target_ptr, indent_.data(), indent_.size());
680         target_ptr += indent_.size();
681 
682         // Copy line from data_.
683         // We already guaranteed that data_ ends with a newline (above), so this
684         // search can't fail.
685         string::size_type line_length =
686             data_.find_first_of('\n', data_pos) + 1 - data_pos;
687         memcpy(target_ptr, data_.data() + data_pos, line_length);
688         target_ptr += line_length;
689         data_pos += line_length;
690       }
691 
692       GOOGLE_CHECK_EQ(target_ptr,
693           string_as_array(target) + pos + data_.size() + indent_size);
694     }
695   }
696 }
697 
698 // ===================================================================
699 
CommandLineInterface()700 CommandLineInterface::CommandLineInterface()
701   : mode_(MODE_COMPILE),
702     print_mode_(PRINT_NONE),
703     error_format_(ERROR_FORMAT_GCC),
704     imports_in_descriptor_set_(false),
705     source_info_in_descriptor_set_(false),
706     disallow_services_(false),
707     inputs_are_proto_path_relative_(false) {}
~CommandLineInterface()708 CommandLineInterface::~CommandLineInterface() {}
709 
RegisterGenerator(const string & flag_name,CodeGenerator * generator,const string & help_text)710 void CommandLineInterface::RegisterGenerator(const string& flag_name,
711                                              CodeGenerator* generator,
712                                              const string& help_text) {
713   GeneratorInfo info;
714   info.flag_name = flag_name;
715   info.generator = generator;
716   info.help_text = help_text;
717   generators_by_flag_name_[flag_name] = info;
718 }
719 
RegisterGenerator(const string & flag_name,const string & option_flag_name,CodeGenerator * generator,const string & help_text)720 void CommandLineInterface::RegisterGenerator(const string& flag_name,
721                                              const string& option_flag_name,
722                                              CodeGenerator* generator,
723                                              const string& help_text) {
724   GeneratorInfo info;
725   info.flag_name = flag_name;
726   info.option_flag_name = option_flag_name;
727   info.generator = generator;
728   info.help_text = help_text;
729   generators_by_flag_name_[flag_name] = info;
730   generators_by_option_name_[option_flag_name] = info;
731 }
732 
AllowPlugins(const string & exe_name_prefix)733 void CommandLineInterface::AllowPlugins(const string& exe_name_prefix) {
734   plugin_prefix_ = exe_name_prefix;
735 }
736 
Run(int argc,const char * const argv[])737 int CommandLineInterface::Run(int argc, const char* const argv[]) {
738   Clear();
739   switch (ParseArguments(argc, argv)) {
740     case PARSE_ARGUMENT_DONE_AND_EXIT:
741       return 0;
742     case PARSE_ARGUMENT_FAIL:
743       return 1;
744     case PARSE_ARGUMENT_DONE_AND_CONTINUE:
745       break;
746   }
747 
748   AddDefaultProtoPaths(&proto_path_);
749 
750   // Set up the source tree.
751   DiskSourceTree source_tree;
752   for (int i = 0; i < proto_path_.size(); i++) {
753     source_tree.MapPath(proto_path_[i].first, proto_path_[i].second);
754   }
755 
756   // Map input files to virtual paths if necessary.
757   if (!inputs_are_proto_path_relative_) {
758     if (!MakeInputsBeProtoPathRelative(&source_tree)) {
759       return 1;
760     }
761   }
762 
763   // Allocate the Importer.
764   ErrorPrinter error_collector(error_format_, &source_tree);
765   Importer importer(&source_tree, &error_collector);
766 
767   vector<const FileDescriptor*> parsed_files;
768 
769   // Parse each file.
770   for (int i = 0; i < input_files_.size(); i++) {
771     // Import the file.
772     importer.AddUnusedImportTrackFile(input_files_[i]);
773     const FileDescriptor* parsed_file = importer.Import(input_files_[i]);
774     importer.ClearUnusedImportTrackFiles();
775     if (parsed_file == NULL) return 1;
776     parsed_files.push_back(parsed_file);
777 
778     // Enforce --disallow_services.
779     if (disallow_services_ && parsed_file->service_count() > 0) {
780       cerr << parsed_file->name() << ": This file contains services, but "
781               "--disallow_services was used." << endl;
782       return 1;
783     }
784   }
785 
786   // We construct a separate GeneratorContext for each output location.  Note
787   // that two code generators may output to the same location, in which case
788   // they should share a single GeneratorContext so that OpenForInsert() works.
789   GeneratorContextMap output_directories;
790 
791   // Generate output.
792   if (mode_ == MODE_COMPILE) {
793     for (int i = 0; i < output_directives_.size(); i++) {
794       string output_location = output_directives_[i].output_location;
795       if (!HasSuffixString(output_location, ".zip") &&
796           !HasSuffixString(output_location, ".jar")) {
797         AddTrailingSlash(&output_location);
798       }
799       GeneratorContextImpl** map_slot = &output_directories[output_location];
800 
801       if (*map_slot == NULL) {
802         // First time we've seen this output location.
803         *map_slot = new GeneratorContextImpl(parsed_files);
804       }
805 
806       if (!GenerateOutput(parsed_files, output_directives_[i], *map_slot)) {
807         STLDeleteValues(&output_directories);
808         return 1;
809       }
810     }
811   }
812 
813   // Write all output to disk.
814   for (GeneratorContextMap::iterator iter = output_directories.begin();
815        iter != output_directories.end(); ++iter) {
816     const string& location = iter->first;
817     GeneratorContextImpl* directory = iter->second;
818     if (HasSuffixString(location, "/")) {
819       if (!directory->WriteAllToDisk(location)) {
820         STLDeleteValues(&output_directories);
821         return 1;
822       }
823     } else {
824       if (HasSuffixString(location, ".jar")) {
825         directory->AddJarManifest();
826       }
827 
828       if (!directory->WriteAllToZip(location)) {
829         STLDeleteValues(&output_directories);
830         return 1;
831       }
832     }
833   }
834 
835   if (!dependency_out_name_.empty()) {
836     if (!GenerateDependencyManifestFile(parsed_files, output_directories,
837                                         &source_tree)) {
838       return 1;
839     }
840   }
841 
842   STLDeleteValues(&output_directories);
843 
844   if (!descriptor_set_name_.empty()) {
845     if (!WriteDescriptorSet(parsed_files)) {
846       return 1;
847     }
848   }
849 
850   if (mode_ == MODE_ENCODE || mode_ == MODE_DECODE) {
851     if (codec_type_.empty()) {
852       // HACK:  Define an EmptyMessage type to use for decoding.
853       DescriptorPool pool;
854       FileDescriptorProto file;
855       file.set_name("empty_message.proto");
856       file.add_message_type()->set_name("EmptyMessage");
857       GOOGLE_CHECK(pool.BuildFile(file) != NULL);
858       codec_type_ = "EmptyMessage";
859       if (!EncodeOrDecode(&pool)) {
860         return 1;
861       }
862     } else {
863       if (!EncodeOrDecode(importer.pool())) {
864         return 1;
865       }
866     }
867   }
868 
869   if (mode_ == MODE_PRINT) {
870     switch (print_mode_) {
871       case PRINT_FREE_FIELDS:
872         for (int i = 0; i < parsed_files.size(); ++i) {
873           const FileDescriptor* fd = parsed_files[i];
874           for (int j = 0; j < fd->message_type_count(); ++j) {
875             PrintFreeFieldNumbers(fd->message_type(j));
876           }
877         }
878         break;
879       case PRINT_NONE:
880         GOOGLE_LOG(ERROR) << "If the code reaches here, it usually means a bug of "
881                      "flag parsing in the CommonadLineInterface.";
882         return 1;
883 
884       // Do not add a default case.
885     }
886   }
887 
888   return 0;
889 }
890 
Clear()891 void CommandLineInterface::Clear() {
892   // Clear all members that are set by Run().  Note that we must not clear
893   // members which are set by other methods before Run() is called.
894   executable_name_.clear();
895   proto_path_.clear();
896   input_files_.clear();
897   output_directives_.clear();
898   codec_type_.clear();
899   descriptor_set_name_.clear();
900   dependency_out_name_.clear();
901 
902   mode_ = MODE_COMPILE;
903   print_mode_ = PRINT_NONE;
904   imports_in_descriptor_set_ = false;
905   source_info_in_descriptor_set_ = false;
906   disallow_services_ = false;
907 }
908 
MakeInputsBeProtoPathRelative(DiskSourceTree * source_tree)909 bool CommandLineInterface::MakeInputsBeProtoPathRelative(
910     DiskSourceTree* source_tree) {
911   for (int i = 0; i < input_files_.size(); i++) {
912     string virtual_file, shadowing_disk_file;
913     switch (source_tree->DiskFileToVirtualFile(
914         input_files_[i], &virtual_file, &shadowing_disk_file)) {
915       case DiskSourceTree::SUCCESS:
916         input_files_[i] = virtual_file;
917         break;
918       case DiskSourceTree::SHADOWED:
919         std::cerr << input_files_[i]
920                   << ": Input is shadowed in the --proto_path by \""
921                   << shadowing_disk_file
922                   << "\".  Either use the latter file as your input or reorder "
923                      "the --proto_path so that the former file's location "
924                      "comes first." << std::endl;
925         return false;
926       case DiskSourceTree::CANNOT_OPEN:
927         std::cerr << input_files_[i] << ": " << strerror(errno) << std::endl;
928         return false;
929       case DiskSourceTree::NO_MAPPING:
930         // First check if the file exists at all.
931         if (access(input_files_[i].c_str(), F_OK) < 0) {
932           // File does not even exist.
933           std::cerr << input_files_[i] << ": " << strerror(ENOENT) << std::endl;
934         } else {
935           std::cerr
936               << input_files_[i]
937               << ": File does not reside within any path "
938                  "specified using --proto_path (or -I).  You must specify a "
939                  "--proto_path which encompasses this file.  Note that the "
940                  "proto_path must be an exact prefix of the .proto file "
941                  "names -- protoc is too dumb to figure out when two paths "
942                  "(e.g. absolute and relative) are equivalent (it's harder "
943                  "than you think)." << std::endl;
944         }
945         return false;
946     }
947   }
948 
949   return true;
950 }
951 
952 
953 CommandLineInterface::ParseArgumentStatus
ParseArguments(int argc,const char * const argv[])954 CommandLineInterface::ParseArguments(int argc, const char* const argv[]) {
955   executable_name_ = argv[0];
956 
957   vector<string> arguments;
958   for (int i = 1; i < argc; ++i) {
959     arguments.push_back(argv[i]);
960   }
961 
962   // Iterate through all arguments and parse them.
963   for (int i = 0; i < arguments.size(); ++i) {
964     string name, value;
965 
966     if (ParseArgument(arguments[i].c_str(), &name, &value)) {
967       // Returned true => Use the next argument as the flag value.
968       if (i + 1 == arguments.size() || arguments[i + 1][0] == '-') {
969         std::cerr << "Missing value for flag: " << name << std::endl;
970         if (name == "--decode") {
971           std::cerr << "To decode an unknown message, use --decode_raw."
972                     << std::endl;
973         }
974         return PARSE_ARGUMENT_FAIL;
975       } else {
976         ++i;
977         value = arguments[i];
978       }
979     }
980 
981     ParseArgumentStatus status = InterpretArgument(name, value);
982     if (status != PARSE_ARGUMENT_DONE_AND_CONTINUE)
983       return status;
984   }
985 
986   // If no --proto_path was given, use the current working directory.
987   if (proto_path_.empty()) {
988     // Don't use make_pair as the old/default standard library on Solaris
989     // doesn't support it without explicit template parameters, which are
990     // incompatible with C++0x's make_pair.
991     proto_path_.push_back(pair<string, string>("", "."));
992   }
993 
994   // Check some errror cases.
995   bool decoding_raw = (mode_ == MODE_DECODE) && codec_type_.empty();
996   if (decoding_raw && !input_files_.empty()) {
997     std::cerr << "When using --decode_raw, no input files should be given."
998               << std::endl;
999     return PARSE_ARGUMENT_FAIL;
1000   } else if (!decoding_raw && input_files_.empty()) {
1001     std::cerr << "Missing input file." << std::endl;
1002     return PARSE_ARGUMENT_FAIL;
1003   }
1004   if (mode_ == MODE_COMPILE && output_directives_.empty() &&
1005       descriptor_set_name_.empty()) {
1006     std::cerr << "Missing output directives." << std::endl;
1007     return PARSE_ARGUMENT_FAIL;
1008   }
1009   if (mode_ != MODE_COMPILE && !dependency_out_name_.empty()) {
1010     std::cerr << "Can only use --dependency_out=FILE when generating code."
1011               << std::endl;
1012     return PARSE_ARGUMENT_FAIL;
1013   }
1014   if (!dependency_out_name_.empty() && input_files_.size() > 1) {
1015     std::cerr
1016         << "Can only process one input file when using --dependency_out=FILE."
1017         << std::endl;
1018     return PARSE_ARGUMENT_FAIL;
1019   }
1020   if (imports_in_descriptor_set_ && descriptor_set_name_.empty()) {
1021     std::cerr << "--include_imports only makes sense when combined with "
1022                  "--descriptor_set_out." << std::endl;
1023   }
1024   if (source_info_in_descriptor_set_ && descriptor_set_name_.empty()) {
1025     std::cerr << "--include_source_info only makes sense when combined with "
1026                  "--descriptor_set_out." << std::endl;
1027   }
1028 
1029   return PARSE_ARGUMENT_DONE_AND_CONTINUE;
1030 }
1031 
ParseArgument(const char * arg,string * name,string * value)1032 bool CommandLineInterface::ParseArgument(const char* arg,
1033                                          string* name, string* value) {
1034   bool parsed_value = false;
1035 
1036   if (arg[0] != '-') {
1037     // Not a flag.
1038     name->clear();
1039     parsed_value = true;
1040     *value = arg;
1041   } else if (arg[1] == '-') {
1042     // Two dashes:  Multi-character name, with '=' separating name and
1043     //   value.
1044     const char* equals_pos = strchr(arg, '=');
1045     if (equals_pos != NULL) {
1046       *name = string(arg, equals_pos - arg);
1047       *value = equals_pos + 1;
1048       parsed_value = true;
1049     } else {
1050       *name = arg;
1051     }
1052   } else {
1053     // One dash:  One-character name, all subsequent characters are the
1054     //   value.
1055     if (arg[1] == '\0') {
1056       // arg is just "-".  We treat this as an input file, except that at
1057       // present this will just lead to a "file not found" error.
1058       name->clear();
1059       *value = arg;
1060       parsed_value = true;
1061     } else {
1062       *name = string(arg, 2);
1063       *value = arg + 2;
1064       parsed_value = !value->empty();
1065     }
1066   }
1067 
1068   // Need to return true iff the next arg should be used as the value for this
1069   // one, false otherwise.
1070 
1071   if (parsed_value) {
1072     // We already parsed a value for this flag.
1073     return false;
1074   }
1075 
1076   if (*name == "-h" || *name == "--help" ||
1077       *name == "--disallow_services" ||
1078       *name == "--include_imports" ||
1079       *name == "--include_source_info" ||
1080       *name == "--version" ||
1081       *name == "--decode_raw" ||
1082       *name == "--print_free_field_numbers") {
1083     // HACK:  These are the only flags that don't take a value.
1084     //   They probably should not be hard-coded like this but for now it's
1085     //   not worth doing better.
1086     return false;
1087   }
1088 
1089   // Next argument is the flag value.
1090   return true;
1091 }
1092 
1093 CommandLineInterface::ParseArgumentStatus
InterpretArgument(const string & name,const string & value)1094 CommandLineInterface::InterpretArgument(const string& name,
1095                                         const string& value) {
1096   if (name.empty()) {
1097     // Not a flag.  Just a filename.
1098     if (value.empty()) {
1099       std::cerr
1100           << "You seem to have passed an empty string as one of the "
1101              "arguments to " << executable_name_
1102           << ".  This is actually "
1103              "sort of hard to do.  Congrats.  Unfortunately it is not valid "
1104              "input so the program is going to die now." << std::endl;
1105       return PARSE_ARGUMENT_FAIL;
1106     }
1107 
1108     input_files_.push_back(value);
1109 
1110   } else if (name == "-I" || name == "--proto_path") {
1111     // Java's -classpath (and some other languages) delimits path components
1112     // with colons.  Let's accept that syntax too just to make things more
1113     // intuitive.
1114     vector<string> parts = Split(
1115         value, kPathSeparator, true);
1116 
1117     for (int i = 0; i < parts.size(); i++) {
1118       string virtual_path;
1119       string disk_path;
1120 
1121       string::size_type equals_pos = parts[i].find_first_of('=');
1122       if (equals_pos == string::npos) {
1123         virtual_path = "";
1124         disk_path = parts[i];
1125       } else {
1126         virtual_path = parts[i].substr(0, equals_pos);
1127         disk_path = parts[i].substr(equals_pos + 1);
1128       }
1129 
1130       if (disk_path.empty()) {
1131         std::cerr
1132             << "--proto_path passed empty directory name.  (Use \".\" for "
1133                "current directory.)" << std::endl;
1134         return PARSE_ARGUMENT_FAIL;
1135       }
1136 
1137       // Make sure disk path exists, warn otherwise.
1138       if (access(disk_path.c_str(), F_OK) < 0) {
1139         std::cerr << disk_path << ": warning: directory does not exist."
1140                   << std::endl;
1141       }
1142 
1143       // Don't use make_pair as the old/default standard library on Solaris
1144       // doesn't support it without explicit template parameters, which are
1145       // incompatible with C++0x's make_pair.
1146       proto_path_.push_back(pair<string, string>(virtual_path, disk_path));
1147     }
1148 
1149   } else if (name == "-o" || name == "--descriptor_set_out") {
1150     if (!descriptor_set_name_.empty()) {
1151       std::cerr << name << " may only be passed once." << std::endl;
1152       return PARSE_ARGUMENT_FAIL;
1153     }
1154     if (value.empty()) {
1155       std::cerr << name << " requires a non-empty value." << std::endl;
1156       return PARSE_ARGUMENT_FAIL;
1157     }
1158     if (mode_ != MODE_COMPILE) {
1159       std::cerr
1160           << "Cannot use --encode or --decode and generate descriptors at the "
1161              "same time." << std::endl;
1162       return PARSE_ARGUMENT_FAIL;
1163     }
1164     descriptor_set_name_ = value;
1165 
1166   } else if (name == "--dependency_out") {
1167     if (!dependency_out_name_.empty()) {
1168       std::cerr << name << " may only be passed once." << std::endl;
1169       return PARSE_ARGUMENT_FAIL;
1170     }
1171     if (value.empty()) {
1172       std::cerr << name << " requires a non-empty value." << std::endl;
1173       return PARSE_ARGUMENT_FAIL;
1174     }
1175     dependency_out_name_ = value;
1176 
1177   } else if (name == "--include_imports") {
1178     if (imports_in_descriptor_set_) {
1179       std::cerr << name << " may only be passed once." << std::endl;
1180       return PARSE_ARGUMENT_FAIL;
1181     }
1182     imports_in_descriptor_set_ = true;
1183 
1184   } else if (name == "--include_source_info") {
1185     if (source_info_in_descriptor_set_) {
1186       std::cerr << name << " may only be passed once." << std::endl;
1187       return PARSE_ARGUMENT_FAIL;
1188     }
1189     source_info_in_descriptor_set_ = true;
1190 
1191   } else if (name == "-h" || name == "--help") {
1192     PrintHelpText();
1193     return PARSE_ARGUMENT_DONE_AND_EXIT;  // Exit without running compiler.
1194 
1195   } else if (name == "--version") {
1196     if (!version_info_.empty()) {
1197       std::cout << version_info_ << std::endl;
1198     }
1199     cout << "libprotoc "
1200          << protobuf::internal::VersionString(GOOGLE_PROTOBUF_VERSION)
1201          << endl;
1202     return PARSE_ARGUMENT_DONE_AND_EXIT;  // Exit without running compiler.
1203 
1204   } else if (name == "--disallow_services") {
1205     disallow_services_ = true;
1206 
1207   } else if (name == "--encode" || name == "--decode" ||
1208              name == "--decode_raw") {
1209     if (mode_ != MODE_COMPILE) {
1210       std::cerr << "Only one of --encode and --decode can be specified."
1211                 << std::endl;
1212       return PARSE_ARGUMENT_FAIL;
1213     }
1214     if (!output_directives_.empty() || !descriptor_set_name_.empty()) {
1215       std::cerr << "Cannot use " << name
1216                 << " and generate code or descriptors at the same time."
1217                 << std::endl;
1218       return PARSE_ARGUMENT_FAIL;
1219     }
1220 
1221     mode_ = (name == "--encode") ? MODE_ENCODE : MODE_DECODE;
1222 
1223     if (value.empty() && name != "--decode_raw") {
1224       std::cerr << "Type name for " << name << " cannot be blank." << std::endl;
1225       if (name == "--decode") {
1226         std::cerr << "To decode an unknown message, use --decode_raw."
1227                   << std::endl;
1228       }
1229       return PARSE_ARGUMENT_FAIL;
1230     } else if (!value.empty() && name == "--decode_raw") {
1231       std::cerr << "--decode_raw does not take a parameter." << std::endl;
1232       return PARSE_ARGUMENT_FAIL;
1233     }
1234 
1235     codec_type_ = value;
1236 
1237   } else if (name == "--error_format") {
1238     if (value == "gcc") {
1239       error_format_ = ERROR_FORMAT_GCC;
1240     } else if (value == "msvs") {
1241       error_format_ = ERROR_FORMAT_MSVS;
1242     } else {
1243       std::cerr << "Unknown error format: " << value << std::endl;
1244       return PARSE_ARGUMENT_FAIL;
1245     }
1246 
1247   } else if (name == "--plugin") {
1248     if (plugin_prefix_.empty()) {
1249       std::cerr << "This compiler does not support plugins." << std::endl;
1250       return PARSE_ARGUMENT_FAIL;
1251     }
1252 
1253     string plugin_name;
1254     string path;
1255 
1256     string::size_type equals_pos = value.find_first_of('=');
1257     if (equals_pos == string::npos) {
1258       // Use the basename of the file.
1259       string::size_type slash_pos = value.find_last_of('/');
1260       if (slash_pos == string::npos) {
1261         plugin_name = value;
1262       } else {
1263         plugin_name = value.substr(slash_pos + 1);
1264       }
1265       path = value;
1266     } else {
1267       plugin_name = value.substr(0, equals_pos);
1268       path = value.substr(equals_pos + 1);
1269     }
1270 
1271     plugins_[plugin_name] = path;
1272 
1273   } else if (name == "--print_free_field_numbers") {
1274     if (mode_ != MODE_COMPILE) {
1275       std::cerr << "Cannot use " << name
1276                 << " and use --encode, --decode or print "
1277                 << "other info at the same time." << std::endl;
1278       return PARSE_ARGUMENT_FAIL;
1279     }
1280     if (!output_directives_.empty() || !descriptor_set_name_.empty()) {
1281       std::cerr << "Cannot use " << name
1282                 << " and generate code or descriptors at the same time."
1283                 << std::endl;
1284       return PARSE_ARGUMENT_FAIL;
1285     }
1286     mode_ = MODE_PRINT;
1287     print_mode_ = PRINT_FREE_FIELDS;
1288   } else {
1289     // Some other flag.  Look it up in the generators list.
1290     const GeneratorInfo* generator_info =
1291         FindOrNull(generators_by_flag_name_, name);
1292     if (generator_info == NULL &&
1293         (plugin_prefix_.empty() || !HasSuffixString(name, "_out"))) {
1294       // Check if it's a generator option flag.
1295       generator_info = FindOrNull(generators_by_option_name_, name);
1296       if (generator_info == NULL) {
1297         std::cerr << "Unknown flag: " << name << std::endl;
1298         return PARSE_ARGUMENT_FAIL;
1299       } else {
1300         string* parameters = &generator_parameters_[generator_info->flag_name];
1301         if (!parameters->empty()) {
1302           parameters->append(",");
1303         }
1304         parameters->append(value);
1305       }
1306     } else {
1307       // It's an output flag.  Add it to the output directives.
1308       if (mode_ != MODE_COMPILE) {
1309         std::cerr << "Cannot use --encode, --decode or print .proto info and "
1310                      "generate code at the same time." << std::endl;
1311         return PARSE_ARGUMENT_FAIL;
1312       }
1313 
1314       OutputDirective directive;
1315       directive.name = name;
1316       if (generator_info == NULL) {
1317         directive.generator = NULL;
1318       } else {
1319         directive.generator = generator_info->generator;
1320       }
1321 
1322       // Split value at ':' to separate the generator parameter from the
1323       // filename.  However, avoid doing this if the colon is part of a valid
1324       // Windows-style absolute path.
1325       string::size_type colon_pos = value.find_first_of(':');
1326       if (colon_pos == string::npos || IsWindowsAbsolutePath(value)) {
1327         directive.output_location = value;
1328       } else {
1329         directive.parameter = value.substr(0, colon_pos);
1330         directive.output_location = value.substr(colon_pos + 1);
1331       }
1332 
1333       output_directives_.push_back(directive);
1334     }
1335   }
1336 
1337   return PARSE_ARGUMENT_DONE_AND_CONTINUE;
1338 }
1339 
PrintHelpText()1340 void CommandLineInterface::PrintHelpText() {
1341   // Sorry for indentation here; line wrapping would be uglier.
1342   std::cerr <<
1343 "Usage: " << executable_name_ << " [OPTION] PROTO_FILES\n"
1344 "Parse PROTO_FILES and generate output based on the options given:\n"
1345 "  -IPATH, --proto_path=PATH   Specify the directory in which to search for\n"
1346 "                              imports.  May be specified multiple times;\n"
1347 "                              directories will be searched in order.  If not\n"
1348 "                              given, the current working directory is used.\n"
1349 "  --version                   Show version info and exit.\n"
1350 "  -h, --help                  Show this text and exit.\n"
1351 "  --encode=MESSAGE_TYPE       Read a text-format message of the given type\n"
1352 "                              from standard input and write it in binary\n"
1353 "                              to standard output.  The message type must\n"
1354 "                              be defined in PROTO_FILES or their imports.\n"
1355 "  --decode=MESSAGE_TYPE       Read a binary message of the given type from\n"
1356 "                              standard input and write it in text format\n"
1357 "                              to standard output.  The message type must\n"
1358 "                              be defined in PROTO_FILES or their imports.\n"
1359 "  --decode_raw                Read an arbitrary protocol message from\n"
1360 "                              standard input and write the raw tag/value\n"
1361 "                              pairs in text format to standard output.  No\n"
1362 "                              PROTO_FILES should be given when using this\n"
1363 "                              flag.\n"
1364 "  -oFILE,                     Writes a FileDescriptorSet (a protocol buffer,\n"
1365 "    --descriptor_set_out=FILE defined in descriptor.proto) containing all of\n"
1366 "                              the input files to FILE.\n"
1367 "  --include_imports           When using --descriptor_set_out, also include\n"
1368 "                              all dependencies of the input files in the\n"
1369 "                              set, so that the set is self-contained.\n"
1370 "  --include_source_info       When using --descriptor_set_out, do not strip\n"
1371 "                              SourceCodeInfo from the FileDescriptorProto.\n"
1372 "                              This results in vastly larger descriptors that\n"
1373 "                              include information about the original\n"
1374 "                              location of each decl in the source file as\n"
1375 "                              well as surrounding comments.\n"
1376 "  --dependency_out=FILE       Write a dependency output file in the format\n"
1377 "                              expected by make. This writes the transitive\n"
1378 "                              set of input file paths to FILE\n"
1379 "  --error_format=FORMAT       Set the format in which to print errors.\n"
1380 "                              FORMAT may be 'gcc' (the default) or 'msvs'\n"
1381 "                              (Microsoft Visual Studio format).\n"
1382 "  --print_free_field_numbers  Print the free field numbers of the messages\n"
1383 "                              defined in the given proto files. Groups share\n"
1384 "                              the same field number space with the parent \n"
1385 "                              message. Extension ranges are counted as \n"
1386 "                              occupied fields numbers.\n"
1387       << std::endl;
1388   if (!plugin_prefix_.empty()) {
1389     std::cerr <<
1390 "  --plugin=EXECUTABLE         Specifies a plugin executable to use.\n"
1391 "                              Normally, protoc searches the PATH for\n"
1392 "                              plugins, but you may specify additional\n"
1393 "                              executables not in the path using this flag.\n"
1394 "                              Additionally, EXECUTABLE may be of the form\n"
1395 "                              NAME=PATH, in which case the given plugin name\n"
1396 "                              is mapped to the given executable even if\n"
1397 "                              the executable's own name differs." << std::endl;
1398   }
1399 
1400   for (GeneratorMap::iterator iter = generators_by_flag_name_.begin();
1401        iter != generators_by_flag_name_.end(); ++iter) {
1402     // FIXME(kenton):  If the text is long enough it will wrap, which is ugly,
1403     //   but fixing this nicely (e.g. splitting on spaces) is probably more
1404     //   trouble than it's worth.
1405     std::cerr << "  " << iter->first << "=OUT_DIR "
1406               << string(19 - iter->first.size(), ' ')  // Spaces for alignment.
1407               << iter->second.help_text << std::endl;
1408   }
1409 }
1410 
GenerateOutput(const vector<const FileDescriptor * > & parsed_files,const OutputDirective & output_directive,GeneratorContext * generator_context)1411 bool CommandLineInterface::GenerateOutput(
1412     const vector<const FileDescriptor*>& parsed_files,
1413     const OutputDirective& output_directive,
1414     GeneratorContext* generator_context) {
1415   // Call the generator.
1416   string error;
1417   if (output_directive.generator == NULL) {
1418     // This is a plugin.
1419     GOOGLE_CHECK(HasPrefixString(output_directive.name, "--") &&
1420           HasSuffixString(output_directive.name, "_out"))
1421         << "Bad name for plugin generator: " << output_directive.name;
1422 
1423     // Strip the "--" and "_out" and add the plugin prefix.
1424     string plugin_name = plugin_prefix_ + "gen-" +
1425         output_directive.name.substr(2, output_directive.name.size() - 6);
1426 
1427     if (!GeneratePluginOutput(parsed_files, plugin_name,
1428                               output_directive.parameter,
1429                               generator_context, &error)) {
1430       std::cerr << output_directive.name << ": " << error << std::endl;
1431       return false;
1432     }
1433   } else {
1434     // Regular generator.
1435     string parameters = output_directive.parameter;
1436     if (!generator_parameters_[output_directive.name].empty()) {
1437       if (!parameters.empty()) {
1438         parameters.append(",");
1439       }
1440       parameters.append(generator_parameters_[output_directive.name]);
1441     }
1442     if (output_directive.generator->HasGenerateAll()) {
1443       if (!output_directive.generator->GenerateAll(
1444           parsed_files, parameters, generator_context, &error)) {
1445           // Generator returned an error.
1446           std::cerr << output_directive.name << ": "
1447                     << ": " << error << std::endl;
1448           return false;
1449       }
1450     } else {
1451       for (int i = 0; i < parsed_files.size(); i++) {
1452         if (!output_directive.generator->Generate(parsed_files[i], parameters,
1453                                                   generator_context, &error)) {
1454           // Generator returned an error.
1455           std::cerr << output_directive.name << ": " << parsed_files[i]->name()
1456                     << ": " << error << std::endl;
1457           return false;
1458         }
1459       }
1460     }
1461   }
1462 
1463   return true;
1464 }
1465 
GenerateDependencyManifestFile(const vector<const FileDescriptor * > & parsed_files,const GeneratorContextMap & output_directories,DiskSourceTree * source_tree)1466 bool CommandLineInterface::GenerateDependencyManifestFile(
1467     const vector<const FileDescriptor*>& parsed_files,
1468     const GeneratorContextMap& output_directories,
1469     DiskSourceTree* source_tree) {
1470   FileDescriptorSet file_set;
1471 
1472   set<const FileDescriptor*> already_seen;
1473   for (int i = 0; i < parsed_files.size(); i++) {
1474     GetTransitiveDependencies(parsed_files[i],
1475                               false,
1476                               false,
1477                               &already_seen,
1478                               file_set.mutable_file());
1479   }
1480 
1481   vector<string> output_filenames;
1482   for (GeneratorContextMap::const_iterator iter = output_directories.begin();
1483        iter != output_directories.end(); ++iter) {
1484     const string& location = iter->first;
1485     GeneratorContextImpl* directory = iter->second;
1486     vector<string> relative_output_filenames;
1487     directory->GetOutputFilenames(&relative_output_filenames);
1488     for (int i = 0; i < relative_output_filenames.size(); i++) {
1489       string output_filename = location + relative_output_filenames[i];
1490       if (output_filename.compare(0, 2, "./") == 0) {
1491         output_filename = output_filename.substr(2);
1492       }
1493       output_filenames.push_back(output_filename);
1494     }
1495   }
1496 
1497   int fd;
1498   do {
1499     fd = open(dependency_out_name_.c_str(),
1500               O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
1501   } while (fd < 0 && errno == EINTR);
1502 
1503   if (fd < 0) {
1504     perror(dependency_out_name_.c_str());
1505     return false;
1506   }
1507 
1508   io::FileOutputStream out(fd);
1509   io::Printer printer(&out, '$');
1510 
1511   for (int i = 0; i < output_filenames.size(); i++) {
1512     printer.Print(output_filenames[i].c_str());
1513     if (i == output_filenames.size() - 1) {
1514       printer.Print(":");
1515     } else {
1516       printer.Print(" \\\n");
1517     }
1518   }
1519 
1520   for (int i = 0; i < file_set.file_size(); i++) {
1521     const FileDescriptorProto& file = file_set.file(i);
1522     const string& virtual_file = file.name();
1523     string disk_file;
1524     if (source_tree &&
1525         source_tree->VirtualFileToDiskFile(virtual_file, &disk_file)) {
1526       printer.Print(" $disk_file$", "disk_file", disk_file);
1527       if (i < file_set.file_size() - 1) printer.Print("\\\n");
1528     } else {
1529       std::cerr << "Unable to identify path for file " << virtual_file
1530                 << std::endl;
1531       return false;
1532     }
1533   }
1534 
1535   return true;
1536 }
1537 
GeneratePluginOutput(const vector<const FileDescriptor * > & parsed_files,const string & plugin_name,const string & parameter,GeneratorContext * generator_context,string * error)1538 bool CommandLineInterface::GeneratePluginOutput(
1539     const vector<const FileDescriptor*>& parsed_files,
1540     const string& plugin_name,
1541     const string& parameter,
1542     GeneratorContext* generator_context,
1543     string* error) {
1544   CodeGeneratorRequest request;
1545   CodeGeneratorResponse response;
1546 
1547   // Build the request.
1548   if (!parameter.empty()) {
1549     request.set_parameter(parameter);
1550   }
1551 
1552   set<const FileDescriptor*> already_seen;
1553   for (int i = 0; i < parsed_files.size(); i++) {
1554     request.add_file_to_generate(parsed_files[i]->name());
1555     GetTransitiveDependencies(parsed_files[i],
1556                               true,  // Include json_name for plugins.
1557                               true,  // Include source code info.
1558                               &already_seen, request.mutable_proto_file());
1559   }
1560 
1561   // Invoke the plugin.
1562   Subprocess subprocess;
1563 
1564   if (plugins_.count(plugin_name) > 0) {
1565     subprocess.Start(plugins_[plugin_name], Subprocess::EXACT_NAME);
1566   } else {
1567     subprocess.Start(plugin_name, Subprocess::SEARCH_PATH);
1568   }
1569 
1570   string communicate_error;
1571   if (!subprocess.Communicate(request, &response, &communicate_error)) {
1572     *error = strings::Substitute("$0: $1", plugin_name, communicate_error);
1573     return false;
1574   }
1575 
1576   // Write the files.  We do this even if there was a generator error in order
1577   // to match the behavior of a compiled-in generator.
1578   google::protobuf::scoped_ptr<io::ZeroCopyOutputStream> current_output;
1579   for (int i = 0; i < response.file_size(); i++) {
1580     const CodeGeneratorResponse::File& output_file = response.file(i);
1581 
1582     if (!output_file.insertion_point().empty()) {
1583       // Open a file for insert.
1584       // We reset current_output to NULL first so that the old file is closed
1585       // before the new one is opened.
1586       current_output.reset();
1587       current_output.reset(generator_context->OpenForInsert(
1588           output_file.name(), output_file.insertion_point()));
1589     } else if (!output_file.name().empty()) {
1590       // Starting a new file.  Open it.
1591       // We reset current_output to NULL first so that the old file is closed
1592       // before the new one is opened.
1593       current_output.reset();
1594       current_output.reset(generator_context->Open(output_file.name()));
1595     } else if (current_output == NULL) {
1596       *error = strings::Substitute(
1597         "$0: First file chunk returned by plugin did not specify a file name.",
1598         plugin_name);
1599       return false;
1600     }
1601 
1602     // Use CodedOutputStream for convenience; otherwise we'd need to provide
1603     // our own buffer-copying loop.
1604     io::CodedOutputStream writer(current_output.get());
1605     writer.WriteString(output_file.content());
1606   }
1607 
1608   // Check for errors.
1609   if (!response.error().empty()) {
1610     // Generator returned an error.
1611     *error = response.error();
1612     return false;
1613   }
1614 
1615   return true;
1616 }
1617 
EncodeOrDecode(const DescriptorPool * pool)1618 bool CommandLineInterface::EncodeOrDecode(const DescriptorPool* pool) {
1619   // Look up the type.
1620   const Descriptor* type = pool->FindMessageTypeByName(codec_type_);
1621   if (type == NULL) {
1622     std::cerr << "Type not defined: " << codec_type_ << std::endl;
1623     return false;
1624   }
1625 
1626   DynamicMessageFactory dynamic_factory(pool);
1627   google::protobuf::scoped_ptr<Message> message(dynamic_factory.GetPrototype(type)->New());
1628 
1629   if (mode_ == MODE_ENCODE) {
1630     SetFdToTextMode(STDIN_FILENO);
1631     SetFdToBinaryMode(STDOUT_FILENO);
1632   } else {
1633     SetFdToBinaryMode(STDIN_FILENO);
1634     SetFdToTextMode(STDOUT_FILENO);
1635   }
1636 
1637   io::FileInputStream in(STDIN_FILENO);
1638   io::FileOutputStream out(STDOUT_FILENO);
1639 
1640   if (mode_ == MODE_ENCODE) {
1641     // Input is text.
1642     ErrorPrinter error_collector(error_format_);
1643     TextFormat::Parser parser;
1644     parser.RecordErrorsTo(&error_collector);
1645     parser.AllowPartialMessage(true);
1646 
1647     if (!parser.Parse(&in, message.get())) {
1648       std::cerr << "Failed to parse input." << std::endl;
1649       return false;
1650     }
1651   } else {
1652     // Input is binary.
1653     if (!message->ParsePartialFromZeroCopyStream(&in)) {
1654       std::cerr << "Failed to parse input." << std::endl;
1655       return false;
1656     }
1657   }
1658 
1659   if (!message->IsInitialized()) {
1660     std::cerr << "warning:  Input message is missing required fields:  "
1661               << message->InitializationErrorString() << std::endl;
1662   }
1663 
1664   if (mode_ == MODE_ENCODE) {
1665     // Output is binary.
1666     if (!message->SerializePartialToZeroCopyStream(&out)) {
1667       std::cerr << "output: I/O error." << std::endl;
1668       return false;
1669     }
1670   } else {
1671     // Output is text.
1672     if (!TextFormat::Print(*message, &out)) {
1673       std::cerr << "output: I/O error." << std::endl;
1674       return false;
1675     }
1676   }
1677 
1678   return true;
1679 }
1680 
WriteDescriptorSet(const vector<const FileDescriptor * > parsed_files)1681 bool CommandLineInterface::WriteDescriptorSet(
1682     const vector<const FileDescriptor*> parsed_files) {
1683   FileDescriptorSet file_set;
1684 
1685   if (imports_in_descriptor_set_) {
1686     set<const FileDescriptor*> already_seen;
1687     for (int i = 0; i < parsed_files.size(); i++) {
1688       GetTransitiveDependencies(parsed_files[i],
1689                                 true,  // Include json_name
1690                                 source_info_in_descriptor_set_,
1691                                 &already_seen, file_set.mutable_file());
1692     }
1693   } else {
1694     set<const FileDescriptor*> already_seen;
1695     for (int i = 0; i < parsed_files.size(); i++) {
1696       if (!already_seen.insert(parsed_files[i]).second) {
1697         continue;
1698       }
1699       FileDescriptorProto* file_proto = file_set.add_file();
1700       parsed_files[i]->CopyTo(file_proto);
1701       parsed_files[i]->CopyJsonNameTo(file_proto);
1702       if (source_info_in_descriptor_set_) {
1703         parsed_files[i]->CopySourceCodeInfoTo(file_proto);
1704       }
1705     }
1706   }
1707 
1708   int fd;
1709   do {
1710     fd = open(descriptor_set_name_.c_str(),
1711               O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
1712   } while (fd < 0 && errno == EINTR);
1713 
1714   if (fd < 0) {
1715     perror(descriptor_set_name_.c_str());
1716     return false;
1717   }
1718 
1719   io::FileOutputStream out(fd);
1720   if (!file_set.SerializeToZeroCopyStream(&out)) {
1721     std::cerr << descriptor_set_name_ << ": " << strerror(out.GetErrno())
1722               << std::endl;
1723     out.Close();
1724     return false;
1725   }
1726   if (!out.Close()) {
1727     std::cerr << descriptor_set_name_ << ": " << strerror(out.GetErrno())
1728               << std::endl;
1729     return false;
1730   }
1731 
1732   return true;
1733 }
1734 
GetTransitiveDependencies(const FileDescriptor * file,bool include_json_name,bool include_source_code_info,set<const FileDescriptor * > * already_seen,RepeatedPtrField<FileDescriptorProto> * output)1735 void CommandLineInterface::GetTransitiveDependencies(
1736     const FileDescriptor* file,
1737     bool include_json_name,
1738     bool include_source_code_info,
1739     set<const FileDescriptor*>* already_seen,
1740     RepeatedPtrField<FileDescriptorProto>* output) {
1741   if (!already_seen->insert(file).second) {
1742     // Already saw this file.  Skip.
1743     return;
1744   }
1745 
1746   // Add all dependencies.
1747   for (int i = 0; i < file->dependency_count(); i++) {
1748     GetTransitiveDependencies(file->dependency(i),
1749                               include_json_name,
1750                               include_source_code_info,
1751                               already_seen, output);
1752   }
1753 
1754   // Add this file.
1755   FileDescriptorProto* new_descriptor = output->Add();
1756   file->CopyTo(new_descriptor);
1757   if (include_json_name) {
1758     file->CopyJsonNameTo(new_descriptor);
1759   }
1760   if (include_source_code_info) {
1761     file->CopySourceCodeInfoTo(new_descriptor);
1762   }
1763 }
1764 
1765 namespace {
1766 
1767 // Utility function for PrintFreeFieldNumbers.
1768 // Stores occupied ranges into the ranges parameter, and next level of sub
1769 // message types into the nested_messages parameter.  The FieldRange is left
1770 // inclusive, right exclusive. i.e. [a, b).
1771 //
1772 // Nested Messages:
1773 // Note that it only stores the nested message type, iff the nested type is
1774 // either a direct child of the given descriptor, or the nested type is a
1775 // decendent of the given descriptor and all the nodes between the
1776 // nested type and the given descriptor are group types. e.g.
1777 //
1778 // message Foo {
1779 //   message Bar {
1780 //     message NestedBar {}
1781 //   }
1782 //   group Baz = 1 {
1783 //     group NestedBazGroup = 2 {
1784 //       message Quz {
1785 //         message NestedQuz {}
1786 //       }
1787 //     }
1788 //     message NestedBaz {}
1789 //   }
1790 // }
1791 //
1792 // In this case, Bar, Quz and NestedBaz will be added into the nested types.
1793 // Since free field numbers of group types will not be printed, this makes sure
1794 // the nested message types in groups will not be dropped. The nested_messages
1795 // parameter will contain the direct children (when groups are ignored in the
1796 // tree) of the given descriptor for the caller to traverse. The declaration
1797 // order of the nested messages is also preserved.
1798 typedef pair<int, int> FieldRange;
GatherOccupiedFieldRanges(const Descriptor * descriptor,set<FieldRange> * ranges,vector<const Descriptor * > * nested_messages)1799 void GatherOccupiedFieldRanges(const Descriptor* descriptor,
1800                                set<FieldRange>* ranges,
1801                                vector<const Descriptor*>* nested_messages) {
1802   set<const Descriptor*> groups;
1803   for (int i = 0; i < descriptor->field_count(); ++i) {
1804     const FieldDescriptor* fd = descriptor->field(i);
1805     ranges->insert(FieldRange(fd->number(), fd->number() + 1));
1806     if (fd->type() == FieldDescriptor::TYPE_GROUP) {
1807       groups.insert(fd->message_type());
1808     }
1809   }
1810   for (int i = 0; i < descriptor->extension_range_count(); ++i) {
1811     ranges->insert(FieldRange(descriptor->extension_range(i)->start,
1812                               descriptor->extension_range(i)->end));
1813   }
1814   for (int i = 0; i < descriptor->reserved_range_count(); ++i) {
1815     ranges->insert(FieldRange(descriptor->reserved_range(i)->start,
1816                               descriptor->reserved_range(i)->end));
1817   }
1818   // Handle the nested messages/groups in declaration order to make it
1819   // post-order strict.
1820   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
1821     const Descriptor* nested_desc = descriptor->nested_type(i);
1822     if (groups.find(nested_desc) != groups.end()) {
1823       GatherOccupiedFieldRanges(nested_desc, ranges, nested_messages);
1824     } else {
1825       nested_messages->push_back(nested_desc);
1826     }
1827   }
1828 }
1829 
1830 // Utility function for PrintFreeFieldNumbers.
1831 // Actually prints the formatted free field numbers for given message name and
1832 // occupied ranges.
FormatFreeFieldNumbers(const string & name,const set<FieldRange> & ranges)1833 void FormatFreeFieldNumbers(const string& name,
1834                             const set<FieldRange>& ranges) {
1835   string output;
1836   StringAppendF(&output, "%-35s free:", name.c_str());
1837   int next_free_number = 1;
1838   for (set<FieldRange>::const_iterator i = ranges.begin();
1839        i != ranges.end(); ++i) {
1840     // This happens when groups re-use parent field numbers, in which
1841     // case we skip the FieldRange entirely.
1842     if (next_free_number >= i->second) continue;
1843 
1844     if (next_free_number < i->first) {
1845       if (next_free_number + 1 == i->first) {
1846         // Singleton
1847         StringAppendF(&output, " %d", next_free_number);
1848       } else {
1849         // Range
1850         StringAppendF(&output, " %d-%d", next_free_number, i->first - 1);
1851       }
1852     }
1853     next_free_number = i->second;
1854   }
1855   if (next_free_number <= FieldDescriptor::kMaxNumber) {
1856     StringAppendF(&output, " %d-INF", next_free_number);
1857   }
1858   std::cout << output << std::endl;
1859 }
1860 
1861 }  // namespace
1862 
PrintFreeFieldNumbers(const Descriptor * descriptor)1863 void CommandLineInterface::PrintFreeFieldNumbers(
1864     const Descriptor* descriptor) {
1865   set<FieldRange> ranges;
1866   vector<const Descriptor*> nested_messages;
1867   GatherOccupiedFieldRanges(descriptor, &ranges, &nested_messages);
1868 
1869   for (int i = 0; i < nested_messages.size(); ++i) {
1870     PrintFreeFieldNumbers(nested_messages[i]);
1871   }
1872   FormatFreeFieldNumbers(descriptor->full_name(), ranges);
1873 }
1874 
1875 
1876 
1877 }  // namespace compiler
1878 }  // namespace protobuf
1879 }  // namespace google
1880