1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: kenton@google.com (Kenton Varda)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/compiler/command_line_interface.h>
36 #include <google/protobuf/stubs/platform_macros.h>
37 
38 #include <stdio.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <fcntl.h>
42 #ifdef _MSC_VER
43 #include <io.h>
44 #include <direct.h>
45 #else
46 #include <unistd.h>
47 #endif
48 #include <errno.h>
49 #include <fstream>
50 #include <iostream>
51 #include <ctype.h>
52 
53 #include <limits.h> //For PATH_MAX
54 
55 #include <memory>
56 #ifndef _SHARED_PTR_H
57 #include <google/protobuf/stubs/shared_ptr.h>
58 #endif
59 
60 #ifdef __APPLE__
61 #include <mach-o/dyld.h>
62 #endif
63 
64 #include <google/protobuf/stubs/common.h>
65 #include <google/protobuf/stubs/stringprintf.h>
66 #include <google/protobuf/compiler/importer.h>
67 #include <google/protobuf/compiler/code_generator.h>
68 #include <google/protobuf/compiler/plugin.pb.h>
69 #include <google/protobuf/compiler/subprocess.h>
70 #include <google/protobuf/compiler/zip_writer.h>
71 #include <google/protobuf/descriptor.h>
72 #include <google/protobuf/text_format.h>
73 #include <google/protobuf/dynamic_message.h>
74 #include <google/protobuf/io/coded_stream.h>
75 #include <google/protobuf/io/zero_copy_stream_impl.h>
76 #include <google/protobuf/io/printer.h>
77 #include <google/protobuf/stubs/logging.h>
78 #include <google/protobuf/stubs/strutil.h>
79 #include <google/protobuf/stubs/substitute.h>
80 #include <google/protobuf/stubs/map_util.h>
81 #include <google/protobuf/stubs/stl_util.h>
82 
83 
84 namespace google {
85 namespace protobuf {
86 namespace compiler {
87 
88 #if defined(_WIN32)
89 #define mkdir(name, mode) mkdir(name)
90 #ifndef W_OK
91 #define W_OK 02  // not defined by MSVC for whatever reason
92 #endif
93 #ifndef F_OK
94 #define F_OK 00  // not defined by MSVC for whatever reason
95 #endif
96 #ifndef STDIN_FILENO
97 #define STDIN_FILENO 0
98 #endif
99 #ifndef STDOUT_FILENO
100 #define STDOUT_FILENO 1
101 #endif
102 #endif
103 
104 #ifndef O_BINARY
105 #ifdef _O_BINARY
106 #define O_BINARY _O_BINARY
107 #else
108 #define O_BINARY 0     // If this isn't defined, the platform doesn't need it.
109 #endif
110 #endif
111 
112 namespace {
113 #if defined(_WIN32) && !defined(__CYGWIN__)
114 static const char* kPathSeparator = ";";
115 #else
116 static const char* kPathSeparator = ":";
117 #endif
118 
119 // Returns true if the text looks like a Windows-style absolute path, starting
120 // with a drive letter.  Example:  "C:\foo".  TODO(kenton):  Share this with
121 // copy in importer.cc?
IsWindowsAbsolutePath(const string & text)122 static bool IsWindowsAbsolutePath(const string& text) {
123 #if defined(_WIN32) || defined(__CYGWIN__)
124   return text.size() >= 3 && text[1] == ':' &&
125          isalpha(text[0]) &&
126          (text[2] == '/' || text[2] == '\\') &&
127          text.find_last_of(':') == 1;
128 #else
129   return false;
130 #endif
131 }
132 
SetFdToTextMode(int fd)133 void SetFdToTextMode(int fd) {
134 #ifdef _WIN32
135   if (_setmode(fd, _O_TEXT) == -1) {
136     // This should never happen, I think.
137     GOOGLE_LOG(WARNING) << "_setmode(" << fd << ", _O_TEXT): " << strerror(errno);
138   }
139 #endif
140   // (Text and binary are the same on non-Windows platforms.)
141 }
142 
SetFdToBinaryMode(int fd)143 void SetFdToBinaryMode(int fd) {
144 #ifdef _WIN32
145   if (_setmode(fd, _O_BINARY) == -1) {
146     // This should never happen, I think.
147     GOOGLE_LOG(WARNING) << "_setmode(" << fd << ", _O_BINARY): " << strerror(errno);
148   }
149 #endif
150   // (Text and binary are the same on non-Windows platforms.)
151 }
152 
AddTrailingSlash(string * path)153 void AddTrailingSlash(string* path) {
154   if (!path->empty() && path->at(path->size() - 1) != '/') {
155     path->push_back('/');
156   }
157 }
158 
VerifyDirectoryExists(const string & path)159 bool VerifyDirectoryExists(const string& path) {
160   if (path.empty()) return true;
161 
162   if (access(path.c_str(), F_OK) == -1) {
163     std::cerr << path << ": " << strerror(errno) << std::endl;
164     return false;
165   } else {
166     return true;
167   }
168 }
169 
170 // Try to create the parent directory of the given file, creating the parent's
171 // parent if necessary, and so on.  The full file name is actually
172 // (prefix + filename), but we assume |prefix| already exists and only create
173 // directories listed in |filename|.
TryCreateParentDirectory(const string & prefix,const string & filename)174 bool TryCreateParentDirectory(const string& prefix, const string& filename) {
175   // Recursively create parent directories to the output file.
176   vector<string> parts = Split(filename, "/", true);
177   string path_so_far = prefix;
178   for (int i = 0; i < parts.size() - 1; i++) {
179     path_so_far += parts[i];
180     if (mkdir(path_so_far.c_str(), 0777) != 0) {
181       if (errno != EEXIST) {
182         std::cerr << filename << ": while trying to create directory "
183                   << path_so_far << ": " << strerror(errno) << std::endl;
184         return false;
185       }
186     }
187     path_so_far += '/';
188   }
189 
190   return true;
191 }
192 
193 // Get the absolute path of this protoc binary.
GetProtocAbsolutePath(string * path)194 bool GetProtocAbsolutePath(string* path) {
195 #ifdef _WIN32
196   char buffer[MAX_PATH];
197   int len = GetModuleFileNameA(NULL, buffer, MAX_PATH);
198 #elif __APPLE__
199   char buffer[PATH_MAX];
200   int len = 0;
201 
202   char dirtybuffer[PATH_MAX];
203   uint32_t size = sizeof(dirtybuffer);
204   if (_NSGetExecutablePath(dirtybuffer, &size) == 0) {
205     realpath(dirtybuffer, buffer);
206     len = strlen(buffer);
207   }
208 #else
209   char buffer[PATH_MAX];
210   int len = readlink("/proc/self/exe", buffer, PATH_MAX);
211 #endif
212   if (len > 0) {
213     path->assign(buffer, len);
214     return true;
215   } else {
216     return false;
217   }
218 }
219 
220 // Whether a path is where google/protobuf/descriptor.proto and other well-known
221 // type protos are installed.
IsInstalledProtoPath(const string & path)222 bool IsInstalledProtoPath(const string& path) {
223   // Checking the descriptor.proto file should be good enough.
224   string file_path = path + "/google/protobuf/descriptor.proto";
225   return access(file_path.c_str(), F_OK) != -1;
226 }
227 
228 // Add the paths where google/protobuf/descritor.proto and other well-known
229 // type protos are installed.
AddDefaultProtoPaths(vector<pair<string,string>> * paths)230 void AddDefaultProtoPaths(vector<pair<string, string> >* paths) {
231   // TODO(xiaofeng): The code currently only checks relative paths of where
232   // the protoc binary is installed. We probably should make it handle more
233   // cases than that.
234   string path;
235   if (!GetProtocAbsolutePath(&path)) {
236     return;
237   }
238   // Strip the binary name.
239   size_t pos = path.find_last_of("/\\");
240   if (pos == string::npos || pos == 0) {
241     return;
242   }
243   path = path.substr(0, pos);
244   // Check the binary's directory.
245   if (IsInstalledProtoPath(path)) {
246     paths->push_back(pair<string, string>("", path));
247     return;
248   }
249   // Check if there is an include subdirectory.
250   if (IsInstalledProtoPath(path + "/include")) {
251     paths->push_back(pair<string, string>("", path + "/include"));
252     return;
253   }
254   // Check if the upper level directory has an "include" subdirectory.
255   pos = path.find_last_of("/\\");
256   if (pos == string::npos || pos == 0) {
257     return;
258   }
259   path = path.substr(0, pos);
260   if (IsInstalledProtoPath(path + "/include")) {
261     paths->push_back(pair<string, string>("", path + "/include"));
262     return;
263   }
264 }
265 }  // namespace
266 
267 // A MultiFileErrorCollector that prints errors to stderr.
268 class CommandLineInterface::ErrorPrinter : public MultiFileErrorCollector,
269                                            public io::ErrorCollector {
270  public:
ErrorPrinter(ErrorFormat format,DiskSourceTree * tree=NULL)271   ErrorPrinter(ErrorFormat format, DiskSourceTree *tree = NULL)
272     : format_(format), tree_(tree) {}
~ErrorPrinter()273   ~ErrorPrinter() {}
274 
275   // implements MultiFileErrorCollector ------------------------------
AddError(const string & filename,int line,int column,const string & message)276   void AddError(const string& filename, int line, int column,
277                 const string& message) {
278     AddErrorOrWarning(filename, line, column, message, "error", std::cerr);
279   }
280 
AddWarning(const string & filename,int line,int column,const string & message)281   void AddWarning(const string& filename, int line, int column,
282                   const string& message) {
283     AddErrorOrWarning(filename, line, column, message, "warning", std::clog);
284   }
285 
286   // implements io::ErrorCollector -----------------------------------
AddError(int line,int column,const string & message)287   void AddError(int line, int column, const string& message) {
288     AddError("input", line, column, message);
289   }
290 
AddWarning(int line,int column,const string & message)291   void AddWarning(int line, int column, const string& message) {
292     AddErrorOrWarning("input", line, column, message, "warning", std::clog);
293   }
294 
295  private:
AddErrorOrWarning(const string & filename,int line,int column,const string & message,const string & type,ostream & out)296   void AddErrorOrWarning(
297       const string& filename, int line, int column,
298       const string& message, const string& type, ostream& out) {
299     // Print full path when running under MSVS
300     string dfile;
301     if (format_ == CommandLineInterface::ERROR_FORMAT_MSVS &&
302         tree_ != NULL &&
303         tree_->VirtualFileToDiskFile(filename, &dfile)) {
304       out << dfile;
305     } else {
306       out << filename;
307     }
308 
309     // Users typically expect 1-based line/column numbers, so we add 1
310     // to each here.
311     if (line != -1) {
312       // Allow for both GCC- and Visual-Studio-compatible output.
313       switch (format_) {
314         case CommandLineInterface::ERROR_FORMAT_GCC:
315           out << ":" << (line + 1) << ":" << (column + 1);
316           break;
317         case CommandLineInterface::ERROR_FORMAT_MSVS:
318           out << "(" << (line + 1) << ") : "
319               << type << " in column=" << (column + 1);
320           break;
321       }
322     }
323 
324     if (type == "warning") {
325       out << ": warning: " << message << std::endl;
326     } else {
327       out << ": " << message << std::endl;
328     }
329   }
330 
331   const ErrorFormat format_;
332   DiskSourceTree *tree_;
333 };
334 
335 // -------------------------------------------------------------------
336 
337 // A GeneratorContext implementation that buffers files in memory, then dumps
338 // them all to disk on demand.
339 class CommandLineInterface::GeneratorContextImpl : public GeneratorContext {
340  public:
341   GeneratorContextImpl(const vector<const FileDescriptor*>& parsed_files);
342   ~GeneratorContextImpl();
343 
344   // Write all files in the directory to disk at the given output location,
345   // which must end in a '/'.
346   bool WriteAllToDisk(const string& prefix);
347 
348   // Write the contents of this directory to a ZIP-format archive with the
349   // given name.
350   bool WriteAllToZip(const string& filename);
351 
352   // Add a boilerplate META-INF/MANIFEST.MF file as required by the Java JAR
353   // format, unless one has already been written.
354   void AddJarManifest();
355 
356   // Get name of all output files.
357   void GetOutputFilenames(vector<string>* output_filenames);
358 
359   // implements GeneratorContext --------------------------------------
360   io::ZeroCopyOutputStream* Open(const string& filename);
361   io::ZeroCopyOutputStream* OpenForAppend(const string& filename);
362   io::ZeroCopyOutputStream* OpenForInsert(
363       const string& filename, const string& insertion_point);
ListParsedFiles(vector<const FileDescriptor * > * output)364   void ListParsedFiles(vector<const FileDescriptor*>* output) {
365     *output = parsed_files_;
366   }
367 
368  private:
369   friend class MemoryOutputStream;
370 
371   // map instead of hash_map so that files are written in order (good when
372   // writing zips).
373   map<string, string*> files_;
374   const vector<const FileDescriptor*>& parsed_files_;
375   bool had_error_;
376 };
377 
378 class CommandLineInterface::MemoryOutputStream
379     : public io::ZeroCopyOutputStream {
380  public:
381   MemoryOutputStream(GeneratorContextImpl* directory, const string& filename,
382                      bool append_mode);
383   MemoryOutputStream(GeneratorContextImpl* directory, const string& filename,
384                      const string& insertion_point);
385   virtual ~MemoryOutputStream();
386 
387   // implements ZeroCopyOutputStream ---------------------------------
Next(void ** data,int * size)388   virtual bool Next(void** data, int* size) { return inner_->Next(data, size); }
BackUp(int count)389   virtual void BackUp(int count)            {        inner_->BackUp(count);    }
ByteCount() const390   virtual int64 ByteCount() const           { return inner_->ByteCount();      }
391 
392  private:
393   // Where to insert the string when it's done.
394   GeneratorContextImpl* directory_;
395   string filename_;
396   string insertion_point_;
397 
398   // The string we're building.
399   string data_;
400 
401   // Whether we should append the output stream to the existing file.
402   bool append_mode_;
403 
404   // StringOutputStream writing to data_.
405   google::protobuf::scoped_ptr<io::StringOutputStream> inner_;
406 };
407 
408 // -------------------------------------------------------------------
409 
GeneratorContextImpl(const vector<const FileDescriptor * > & parsed_files)410 CommandLineInterface::GeneratorContextImpl::GeneratorContextImpl(
411     const vector<const FileDescriptor*>& parsed_files)
412     : parsed_files_(parsed_files),
413       had_error_(false) {
414 }
415 
~GeneratorContextImpl()416 CommandLineInterface::GeneratorContextImpl::~GeneratorContextImpl() {
417   STLDeleteValues(&files_);
418 }
419 
WriteAllToDisk(const string & prefix)420 bool CommandLineInterface::GeneratorContextImpl::WriteAllToDisk(
421     const string& prefix) {
422   if (had_error_) {
423     return false;
424   }
425 
426   if (!VerifyDirectoryExists(prefix)) {
427     return false;
428   }
429 
430   for (map<string, string*>::const_iterator iter = files_.begin();
431        iter != files_.end(); ++iter) {
432     const string& relative_filename = iter->first;
433     const char* data = iter->second->data();
434     int size = iter->second->size();
435 
436     if (!TryCreateParentDirectory(prefix, relative_filename)) {
437       return false;
438     }
439     string filename = prefix + relative_filename;
440 
441     // Create the output file.
442     int file_descriptor;
443     do {
444       file_descriptor =
445         open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
446     } while (file_descriptor < 0 && errno == EINTR);
447 
448     if (file_descriptor < 0) {
449       int error = errno;
450       std::cerr << filename << ": " << strerror(error);
451       return false;
452     }
453 
454     // Write the file.
455     while (size > 0) {
456       int write_result;
457       do {
458         write_result = write(file_descriptor, data, size);
459       } while (write_result < 0 && errno == EINTR);
460 
461       if (write_result <= 0) {
462         // Write error.
463 
464         // FIXME(kenton):  According to the man page, if write() returns zero,
465         //   there was no error; write() simply did not write anything.  It's
466         //   unclear under what circumstances this might happen, but presumably
467         //   errno won't be set in this case.  I am confused as to how such an
468         //   event should be handled.  For now I'm treating it as an error,
469         //   since retrying seems like it could lead to an infinite loop.  I
470         //   suspect this never actually happens anyway.
471 
472         if (write_result < 0) {
473           int error = errno;
474           std::cerr << filename << ": write: " << strerror(error);
475         } else {
476           std::cerr << filename << ": write() returned zero?" << std::endl;
477         }
478         return false;
479       }
480 
481       data += write_result;
482       size -= write_result;
483     }
484 
485     if (close(file_descriptor) != 0) {
486       int error = errno;
487       std::cerr << filename << ": close: " << strerror(error);
488       return false;
489     }
490   }
491 
492   return true;
493 }
494 
WriteAllToZip(const string & filename)495 bool CommandLineInterface::GeneratorContextImpl::WriteAllToZip(
496     const string& filename) {
497   if (had_error_) {
498     return false;
499   }
500 
501   // Create the output file.
502   int file_descriptor;
503   do {
504     file_descriptor =
505       open(filename.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
506   } while (file_descriptor < 0 && errno == EINTR);
507 
508   if (file_descriptor < 0) {
509     int error = errno;
510     std::cerr << filename << ": " << strerror(error);
511     return false;
512   }
513 
514   // Create the ZipWriter
515   io::FileOutputStream stream(file_descriptor);
516   ZipWriter zip_writer(&stream);
517 
518   for (map<string, string*>::const_iterator iter = files_.begin();
519        iter != files_.end(); ++iter) {
520     zip_writer.Write(iter->first, *iter->second);
521   }
522 
523   zip_writer.WriteDirectory();
524 
525   if (stream.GetErrno() != 0) {
526     std::cerr << filename << ": " << strerror(stream.GetErrno()) << std::endl;
527   }
528 
529   if (!stream.Close()) {
530     std::cerr << filename << ": " << strerror(stream.GetErrno()) << std::endl;
531   }
532 
533   return true;
534 }
535 
AddJarManifest()536 void CommandLineInterface::GeneratorContextImpl::AddJarManifest() {
537   string** map_slot = &files_["META-INF/MANIFEST.MF"];
538   if (*map_slot == NULL) {
539     *map_slot = new string(
540         "Manifest-Version: 1.0\n"
541         "Created-By: 1.6.0 (protoc)\n"
542         "\n");
543   }
544 }
545 
GetOutputFilenames(vector<string> * output_filenames)546 void CommandLineInterface::GeneratorContextImpl::GetOutputFilenames(
547     vector<string>* output_filenames) {
548   for (map<string, string*>::iterator iter = files_.begin();
549        iter != files_.end(); ++iter) {
550     output_filenames->push_back(iter->first);
551   }
552 }
553 
Open(const string & filename)554 io::ZeroCopyOutputStream* CommandLineInterface::GeneratorContextImpl::Open(
555     const string& filename) {
556   return new MemoryOutputStream(this, filename, false);
557 }
558 
559 io::ZeroCopyOutputStream*
OpenForAppend(const string & filename)560 CommandLineInterface::GeneratorContextImpl::OpenForAppend(
561     const string& filename) {
562   return new MemoryOutputStream(this, filename, true);
563 }
564 
565 io::ZeroCopyOutputStream*
OpenForInsert(const string & filename,const string & insertion_point)566 CommandLineInterface::GeneratorContextImpl::OpenForInsert(
567     const string& filename, const string& insertion_point) {
568   return new MemoryOutputStream(this, filename, insertion_point);
569 }
570 
571 // -------------------------------------------------------------------
572 
MemoryOutputStream(GeneratorContextImpl * directory,const string & filename,bool append_mode)573 CommandLineInterface::MemoryOutputStream::MemoryOutputStream(
574     GeneratorContextImpl* directory, const string& filename, bool append_mode)
575     : directory_(directory),
576       filename_(filename),
577       append_mode_(append_mode),
578       inner_(new io::StringOutputStream(&data_)) {
579 }
580 
MemoryOutputStream(GeneratorContextImpl * directory,const string & filename,const string & insertion_point)581 CommandLineInterface::MemoryOutputStream::MemoryOutputStream(
582     GeneratorContextImpl* directory, const string& filename,
583     const string& insertion_point)
584     : directory_(directory),
585       filename_(filename),
586       insertion_point_(insertion_point),
587       inner_(new io::StringOutputStream(&data_)) {
588 }
589 
~MemoryOutputStream()590 CommandLineInterface::MemoryOutputStream::~MemoryOutputStream() {
591   // Make sure all data has been written.
592   inner_.reset();
593 
594   // Insert into the directory.
595   string** map_slot = &directory_->files_[filename_];
596 
597   if (insertion_point_.empty()) {
598     // This was just a regular Open().
599     if (*map_slot != NULL) {
600       if (append_mode_) {
601         (*map_slot)->append(data_);
602       } else {
603         std::cerr << filename_ << ": Tried to write the same file twice."
604                   << std::endl;
605         directory_->had_error_ = true;
606       }
607       return;
608     }
609 
610     *map_slot = new string;
611     (*map_slot)->swap(data_);
612   } else {
613     // This was an OpenForInsert().
614 
615     // If the data doesn't end with a clean line break, add one.
616     if (!data_.empty() && data_[data_.size() - 1] != '\n') {
617       data_.push_back('\n');
618     }
619 
620     // Find the file we are going to insert into.
621     if (*map_slot == NULL) {
622       std::cerr << filename_
623                 << ": Tried to insert into file that doesn't exist."
624                 << std::endl;
625       directory_->had_error_ = true;
626       return;
627     }
628     string* target = *map_slot;
629 
630     // Find the insertion point.
631     string magic_string = strings::Substitute(
632         "@@protoc_insertion_point($0)", insertion_point_);
633     string::size_type pos = target->find(magic_string);
634 
635     if (pos == string::npos) {
636       std::cerr << filename_ << ": insertion point \"" << insertion_point_
637                 << "\" not found." << std::endl;
638       directory_->had_error_ = true;
639       return;
640     }
641 
642     if ((pos > 3) && (target->substr(pos - 3, 2) == "/*")) {
643       // Support for inline "/* @@protoc_insertion_point() */"
644       pos = pos - 3;
645     } else {
646       // Seek backwards to the beginning of the line, which is where we will
647       // insert the data.  Note that this has the effect of pushing the
648       // insertion point down, so the data is inserted before it.  This is
649       // intentional because it means that multiple insertions at the same point
650       // will end up in the expected order in the final output.
651       pos = target->find_last_of('\n', pos);
652       if (pos == string::npos) {
653         // Insertion point is on the first line.
654         pos = 0;
655       } else {
656         // Advance to character after '\n'.
657         ++pos;
658       }
659     }
660 
661     // Extract indent.
662     string indent_(*target, pos, target->find_first_not_of(" \t", pos) - pos);
663 
664     if (indent_.empty()) {
665       // No indent.  This makes things easier.
666       target->insert(pos, data_);
667     } else {
668       // Calculate how much space we need.
669       int indent_size = 0;
670       for (int i = 0; i < data_.size(); i++) {
671         if (data_[i] == '\n') indent_size += indent_.size();
672       }
673 
674       // Make a hole for it.
675       target->insert(pos, data_.size() + indent_size, '\0');
676 
677       // Now copy in the data.
678       string::size_type data_pos = 0;
679       char* target_ptr = string_as_array(target) + pos;
680       while (data_pos < data_.size()) {
681         // Copy indent.
682         memcpy(target_ptr, indent_.data(), indent_.size());
683         target_ptr += indent_.size();
684 
685         // Copy line from data_.
686         // We already guaranteed that data_ ends with a newline (above), so this
687         // search can't fail.
688         string::size_type line_length =
689             data_.find_first_of('\n', data_pos) + 1 - data_pos;
690         memcpy(target_ptr, data_.data() + data_pos, line_length);
691         target_ptr += line_length;
692         data_pos += line_length;
693       }
694 
695       GOOGLE_CHECK_EQ(target_ptr,
696           string_as_array(target) + pos + data_.size() + indent_size);
697     }
698   }
699 }
700 
701 // ===================================================================
702 
CommandLineInterface()703 CommandLineInterface::CommandLineInterface()
704   : mode_(MODE_COMPILE),
705     print_mode_(PRINT_NONE),
706     error_format_(ERROR_FORMAT_GCC),
707     imports_in_descriptor_set_(false),
708     source_info_in_descriptor_set_(false),
709     disallow_services_(false),
710     inputs_are_proto_path_relative_(false) {}
~CommandLineInterface()711 CommandLineInterface::~CommandLineInterface() {}
712 
RegisterGenerator(const string & flag_name,CodeGenerator * generator,const string & help_text)713 void CommandLineInterface::RegisterGenerator(const string& flag_name,
714                                              CodeGenerator* generator,
715                                              const string& help_text) {
716   GeneratorInfo info;
717   info.flag_name = flag_name;
718   info.generator = generator;
719   info.help_text = help_text;
720   generators_by_flag_name_[flag_name] = info;
721 }
722 
RegisterGenerator(const string & flag_name,const string & option_flag_name,CodeGenerator * generator,const string & help_text)723 void CommandLineInterface::RegisterGenerator(const string& flag_name,
724                                              const string& option_flag_name,
725                                              CodeGenerator* generator,
726                                              const string& help_text) {
727   GeneratorInfo info;
728   info.flag_name = flag_name;
729   info.option_flag_name = option_flag_name;
730   info.generator = generator;
731   info.help_text = help_text;
732   generators_by_flag_name_[flag_name] = info;
733   generators_by_option_name_[option_flag_name] = info;
734 }
735 
AllowPlugins(const string & exe_name_prefix)736 void CommandLineInterface::AllowPlugins(const string& exe_name_prefix) {
737   plugin_prefix_ = exe_name_prefix;
738 }
739 
Run(int argc,const char * const argv[])740 int CommandLineInterface::Run(int argc, const char* const argv[]) {
741   Clear();
742   switch (ParseArguments(argc, argv)) {
743     case PARSE_ARGUMENT_DONE_AND_EXIT:
744       return 0;
745     case PARSE_ARGUMENT_FAIL:
746       return 1;
747     case PARSE_ARGUMENT_DONE_AND_CONTINUE:
748       break;
749   }
750 
751   AddDefaultProtoPaths(&proto_path_);
752 
753   // Set up the source tree.
754   DiskSourceTree source_tree;
755   for (int i = 0; i < proto_path_.size(); i++) {
756     source_tree.MapPath(proto_path_[i].first, proto_path_[i].second);
757   }
758 
759   // Map input files to virtual paths if necessary.
760   if (!inputs_are_proto_path_relative_) {
761     if (!MakeInputsBeProtoPathRelative(&source_tree)) {
762       return 1;
763     }
764   }
765 
766   // Allocate the Importer.
767   ErrorPrinter error_collector(error_format_, &source_tree);
768   Importer importer(&source_tree, &error_collector);
769 
770   vector<const FileDescriptor*> parsed_files;
771 
772   // Parse each file.
773   for (int i = 0; i < input_files_.size(); i++) {
774     // Import the file.
775     importer.AddUnusedImportTrackFile(input_files_[i]);
776     const FileDescriptor* parsed_file = importer.Import(input_files_[i]);
777     importer.ClearUnusedImportTrackFiles();
778     if (parsed_file == NULL) return 1;
779     parsed_files.push_back(parsed_file);
780 
781     // Enforce --disallow_services.
782     if (disallow_services_ && parsed_file->service_count() > 0) {
783       cerr << parsed_file->name() << ": This file contains services, but "
784               "--disallow_services was used." << endl;
785       return 1;
786     }
787   }
788 
789   // We construct a separate GeneratorContext for each output location.  Note
790   // that two code generators may output to the same location, in which case
791   // they should share a single GeneratorContext so that OpenForInsert() works.
792   GeneratorContextMap output_directories;
793 
794   // Generate output.
795   if (mode_ == MODE_COMPILE) {
796     for (int i = 0; i < output_directives_.size(); i++) {
797       string output_location = output_directives_[i].output_location;
798       if (!HasSuffixString(output_location, ".zip") &&
799           !HasSuffixString(output_location, ".jar")) {
800         AddTrailingSlash(&output_location);
801       }
802       GeneratorContextImpl** map_slot = &output_directories[output_location];
803 
804       if (*map_slot == NULL) {
805         // First time we've seen this output location.
806         *map_slot = new GeneratorContextImpl(parsed_files);
807       }
808 
809       if (!GenerateOutput(parsed_files, output_directives_[i], *map_slot)) {
810         STLDeleteValues(&output_directories);
811         return 1;
812       }
813     }
814   }
815 
816   // Write all output to disk.
817   for (GeneratorContextMap::iterator iter = output_directories.begin();
818        iter != output_directories.end(); ++iter) {
819     const string& location = iter->first;
820     GeneratorContextImpl* directory = iter->second;
821     if (HasSuffixString(location, "/")) {
822       if (!directory->WriteAllToDisk(location)) {
823         STLDeleteValues(&output_directories);
824         return 1;
825       }
826     } else {
827       if (HasSuffixString(location, ".jar")) {
828         directory->AddJarManifest();
829       }
830 
831       if (!directory->WriteAllToZip(location)) {
832         STLDeleteValues(&output_directories);
833         return 1;
834       }
835     }
836   }
837 
838   if (!dependency_out_name_.empty()) {
839     if (!GenerateDependencyManifestFile(parsed_files, output_directories,
840                                         &source_tree)) {
841       return 1;
842     }
843   }
844 
845   STLDeleteValues(&output_directories);
846 
847   if (!descriptor_set_name_.empty()) {
848     if (!WriteDescriptorSet(parsed_files)) {
849       return 1;
850     }
851   }
852 
853   if (mode_ == MODE_ENCODE || mode_ == MODE_DECODE) {
854     if (codec_type_.empty()) {
855       // HACK:  Define an EmptyMessage type to use for decoding.
856       DescriptorPool pool;
857       FileDescriptorProto file;
858       file.set_name("empty_message.proto");
859       file.add_message_type()->set_name("EmptyMessage");
860       GOOGLE_CHECK(pool.BuildFile(file) != NULL);
861       codec_type_ = "EmptyMessage";
862       if (!EncodeOrDecode(&pool)) {
863         return 1;
864       }
865     } else {
866       if (!EncodeOrDecode(importer.pool())) {
867         return 1;
868       }
869     }
870   }
871 
872   if (mode_ == MODE_PRINT) {
873     switch (print_mode_) {
874       case PRINT_FREE_FIELDS:
875         for (int i = 0; i < parsed_files.size(); ++i) {
876           const FileDescriptor* fd = parsed_files[i];
877           for (int j = 0; j < fd->message_type_count(); ++j) {
878             PrintFreeFieldNumbers(fd->message_type(j));
879           }
880         }
881         break;
882       case PRINT_NONE:
883         GOOGLE_LOG(ERROR) << "If the code reaches here, it usually means a bug of "
884                      "flag parsing in the CommonadLineInterface.";
885         return 1;
886 
887       // Do not add a default case.
888     }
889   }
890 
891   return 0;
892 }
893 
Clear()894 void CommandLineInterface::Clear() {
895   // Clear all members that are set by Run().  Note that we must not clear
896   // members which are set by other methods before Run() is called.
897   executable_name_.clear();
898   proto_path_.clear();
899   input_files_.clear();
900   output_directives_.clear();
901   codec_type_.clear();
902   descriptor_set_name_.clear();
903   dependency_out_name_.clear();
904 
905   mode_ = MODE_COMPILE;
906   print_mode_ = PRINT_NONE;
907   imports_in_descriptor_set_ = false;
908   source_info_in_descriptor_set_ = false;
909   disallow_services_ = false;
910 }
911 
MakeInputsBeProtoPathRelative(DiskSourceTree * source_tree)912 bool CommandLineInterface::MakeInputsBeProtoPathRelative(
913     DiskSourceTree* source_tree) {
914   for (int i = 0; i < input_files_.size(); i++) {
915     string virtual_file, shadowing_disk_file;
916     switch (source_tree->DiskFileToVirtualFile(
917         input_files_[i], &virtual_file, &shadowing_disk_file)) {
918       case DiskSourceTree::SUCCESS:
919         input_files_[i] = virtual_file;
920         break;
921       case DiskSourceTree::SHADOWED:
922         std::cerr << input_files_[i]
923                   << ": Input is shadowed in the --proto_path by \""
924                   << shadowing_disk_file
925                   << "\".  Either use the latter file as your input or reorder "
926                      "the --proto_path so that the former file's location "
927                      "comes first." << std::endl;
928         return false;
929       case DiskSourceTree::CANNOT_OPEN:
930         std::cerr << input_files_[i] << ": " << strerror(errno) << std::endl;
931         return false;
932       case DiskSourceTree::NO_MAPPING:
933         // First check if the file exists at all.
934         if (access(input_files_[i].c_str(), F_OK) < 0) {
935           // File does not even exist.
936           std::cerr << input_files_[i] << ": " << strerror(ENOENT) << std::endl;
937         } else {
938           std::cerr
939               << input_files_[i]
940               << ": File does not reside within any path "
941                  "specified using --proto_path (or -I).  You must specify a "
942                  "--proto_path which encompasses this file.  Note that the "
943                  "proto_path must be an exact prefix of the .proto file "
944                  "names -- protoc is too dumb to figure out when two paths "
945                  "(e.g. absolute and relative) are equivalent (it's harder "
946                  "than you think)." << std::endl;
947         }
948         return false;
949     }
950   }
951 
952   return true;
953 }
954 
955 
956 CommandLineInterface::ParseArgumentStatus
ParseArguments(int argc,const char * const argv[])957 CommandLineInterface::ParseArguments(int argc, const char* const argv[]) {
958   executable_name_ = argv[0];
959 
960   vector<string> arguments;
961   for (int i = 1; i < argc; ++i) {
962     arguments.push_back(argv[i]);
963   }
964 
965   // Iterate through all arguments and parse them.
966   for (int i = 0; i < arguments.size(); ++i) {
967     string name, value;
968 
969     if (ParseArgument(arguments[i].c_str(), &name, &value)) {
970       // Returned true => Use the next argument as the flag value.
971       if (i + 1 == arguments.size() || arguments[i + 1][0] == '-') {
972         std::cerr << "Missing value for flag: " << name << std::endl;
973         if (name == "--decode") {
974           std::cerr << "To decode an unknown message, use --decode_raw."
975                     << std::endl;
976         }
977         return PARSE_ARGUMENT_FAIL;
978       } else {
979         ++i;
980         value = arguments[i];
981       }
982     }
983 
984     ParseArgumentStatus status = InterpretArgument(name, value);
985     if (status != PARSE_ARGUMENT_DONE_AND_CONTINUE)
986       return status;
987   }
988 
989   // If no --proto_path was given, use the current working directory.
990   if (proto_path_.empty()) {
991     // Don't use make_pair as the old/default standard library on Solaris
992     // doesn't support it without explicit template parameters, which are
993     // incompatible with C++0x's make_pair.
994     proto_path_.push_back(pair<string, string>("", "."));
995   }
996 
997   // Check some errror cases.
998   bool decoding_raw = (mode_ == MODE_DECODE) && codec_type_.empty();
999   if (decoding_raw && !input_files_.empty()) {
1000     std::cerr << "When using --decode_raw, no input files should be given."
1001               << std::endl;
1002     return PARSE_ARGUMENT_FAIL;
1003   } else if (!decoding_raw && input_files_.empty()) {
1004     std::cerr << "Missing input file." << std::endl;
1005     return PARSE_ARGUMENT_FAIL;
1006   }
1007   if (mode_ == MODE_COMPILE && output_directives_.empty() &&
1008       descriptor_set_name_.empty()) {
1009     std::cerr << "Missing output directives." << std::endl;
1010     return PARSE_ARGUMENT_FAIL;
1011   }
1012   if (mode_ != MODE_COMPILE && !dependency_out_name_.empty()) {
1013     std::cerr << "Can only use --dependency_out=FILE when generating code."
1014               << std::endl;
1015     return PARSE_ARGUMENT_FAIL;
1016   }
1017   if (!dependency_out_name_.empty() && input_files_.size() > 1) {
1018     std::cerr
1019         << "Can only process one input file when using --dependency_out=FILE."
1020         << std::endl;
1021     return PARSE_ARGUMENT_FAIL;
1022   }
1023   if (imports_in_descriptor_set_ && descriptor_set_name_.empty()) {
1024     std::cerr << "--include_imports only makes sense when combined with "
1025                  "--descriptor_set_out." << std::endl;
1026   }
1027   if (source_info_in_descriptor_set_ && descriptor_set_name_.empty()) {
1028     std::cerr << "--include_source_info only makes sense when combined with "
1029                  "--descriptor_set_out." << std::endl;
1030   }
1031 
1032   return PARSE_ARGUMENT_DONE_AND_CONTINUE;
1033 }
1034 
ParseArgument(const char * arg,string * name,string * value)1035 bool CommandLineInterface::ParseArgument(const char* arg,
1036                                          string* name, string* value) {
1037   bool parsed_value = false;
1038 
1039   if (arg[0] != '-') {
1040     // Not a flag.
1041     name->clear();
1042     parsed_value = true;
1043     *value = arg;
1044   } else if (arg[1] == '-') {
1045     // Two dashes:  Multi-character name, with '=' separating name and
1046     //   value.
1047     const char* equals_pos = strchr(arg, '=');
1048     if (equals_pos != NULL) {
1049       *name = string(arg, equals_pos - arg);
1050       *value = equals_pos + 1;
1051       parsed_value = true;
1052     } else {
1053       *name = arg;
1054     }
1055   } else {
1056     // One dash:  One-character name, all subsequent characters are the
1057     //   value.
1058     if (arg[1] == '\0') {
1059       // arg is just "-".  We treat this as an input file, except that at
1060       // present this will just lead to a "file not found" error.
1061       name->clear();
1062       *value = arg;
1063       parsed_value = true;
1064     } else {
1065       *name = string(arg, 2);
1066       *value = arg + 2;
1067       parsed_value = !value->empty();
1068     }
1069   }
1070 
1071   // Need to return true iff the next arg should be used as the value for this
1072   // one, false otherwise.
1073 
1074   if (parsed_value) {
1075     // We already parsed a value for this flag.
1076     return false;
1077   }
1078 
1079   if (*name == "-h" || *name == "--help" ||
1080       *name == "--disallow_services" ||
1081       *name == "--include_imports" ||
1082       *name == "--include_source_info" ||
1083       *name == "--version" ||
1084       *name == "--decode_raw" ||
1085       *name == "--print_free_field_numbers") {
1086     // HACK:  These are the only flags that don't take a value.
1087     //   They probably should not be hard-coded like this but for now it's
1088     //   not worth doing better.
1089     return false;
1090   }
1091 
1092   // Next argument is the flag value.
1093   return true;
1094 }
1095 
1096 CommandLineInterface::ParseArgumentStatus
InterpretArgument(const string & name,const string & value)1097 CommandLineInterface::InterpretArgument(const string& name,
1098                                         const string& value) {
1099   if (name.empty()) {
1100     // Not a flag.  Just a filename.
1101     if (value.empty()) {
1102       std::cerr
1103           << "You seem to have passed an empty string as one of the "
1104              "arguments to " << executable_name_
1105           << ".  This is actually "
1106              "sort of hard to do.  Congrats.  Unfortunately it is not valid "
1107              "input so the program is going to die now." << std::endl;
1108       return PARSE_ARGUMENT_FAIL;
1109     }
1110 
1111     input_files_.push_back(value);
1112 
1113   } else if (name == "-I" || name == "--proto_path") {
1114     // Java's -classpath (and some other languages) delimits path components
1115     // with colons.  Let's accept that syntax too just to make things more
1116     // intuitive.
1117     vector<string> parts = Split(
1118         value, kPathSeparator, true);
1119 
1120     for (int i = 0; i < parts.size(); i++) {
1121       string virtual_path;
1122       string disk_path;
1123 
1124       string::size_type equals_pos = parts[i].find_first_of('=');
1125       if (equals_pos == string::npos) {
1126         virtual_path = "";
1127         disk_path = parts[i];
1128       } else {
1129         virtual_path = parts[i].substr(0, equals_pos);
1130         disk_path = parts[i].substr(equals_pos + 1);
1131       }
1132 
1133       if (disk_path.empty()) {
1134         std::cerr
1135             << "--proto_path passed empty directory name.  (Use \".\" for "
1136                "current directory.)" << std::endl;
1137         return PARSE_ARGUMENT_FAIL;
1138       }
1139 
1140       // Make sure disk path exists, warn otherwise.
1141       if (access(disk_path.c_str(), F_OK) < 0) {
1142         // Try the original path; it may have just happed to have a '=' in it.
1143         if (access(parts[i].c_str(), F_OK) < 0) {
1144           cerr << disk_path << ": warning: directory does not exist." << endl;
1145         } else {
1146           virtual_path = "";
1147           disk_path = parts[i];
1148         }
1149       }
1150 
1151       // Don't use make_pair as the old/default standard library on Solaris
1152       // doesn't support it without explicit template parameters, which are
1153       // incompatible with C++0x's make_pair.
1154       proto_path_.push_back(pair<string, string>(virtual_path, disk_path));
1155     }
1156 
1157   } else if (name == "-o" || name == "--descriptor_set_out") {
1158     if (!descriptor_set_name_.empty()) {
1159       std::cerr << name << " may only be passed once." << std::endl;
1160       return PARSE_ARGUMENT_FAIL;
1161     }
1162     if (value.empty()) {
1163       std::cerr << name << " requires a non-empty value." << std::endl;
1164       return PARSE_ARGUMENT_FAIL;
1165     }
1166     if (mode_ != MODE_COMPILE) {
1167       std::cerr
1168           << "Cannot use --encode or --decode and generate descriptors at the "
1169              "same time." << std::endl;
1170       return PARSE_ARGUMENT_FAIL;
1171     }
1172     descriptor_set_name_ = value;
1173 
1174   } else if (name == "--dependency_out") {
1175     if (!dependency_out_name_.empty()) {
1176       std::cerr << name << " may only be passed once." << std::endl;
1177       return PARSE_ARGUMENT_FAIL;
1178     }
1179     if (value.empty()) {
1180       std::cerr << name << " requires a non-empty value." << std::endl;
1181       return PARSE_ARGUMENT_FAIL;
1182     }
1183     dependency_out_name_ = value;
1184 
1185   } else if (name == "--include_imports") {
1186     if (imports_in_descriptor_set_) {
1187       std::cerr << name << " may only be passed once." << std::endl;
1188       return PARSE_ARGUMENT_FAIL;
1189     }
1190     imports_in_descriptor_set_ = true;
1191 
1192   } else if (name == "--include_source_info") {
1193     if (source_info_in_descriptor_set_) {
1194       std::cerr << name << " may only be passed once." << std::endl;
1195       return PARSE_ARGUMENT_FAIL;
1196     }
1197     source_info_in_descriptor_set_ = true;
1198 
1199   } else if (name == "-h" || name == "--help") {
1200     PrintHelpText();
1201     return PARSE_ARGUMENT_DONE_AND_EXIT;  // Exit without running compiler.
1202 
1203   } else if (name == "--version") {
1204     if (!version_info_.empty()) {
1205       std::cout << version_info_ << std::endl;
1206     }
1207     cout << "libprotoc "
1208          << protobuf::internal::VersionString(GOOGLE_PROTOBUF_VERSION)
1209          << endl;
1210     return PARSE_ARGUMENT_DONE_AND_EXIT;  // Exit without running compiler.
1211 
1212   } else if (name == "--disallow_services") {
1213     disallow_services_ = true;
1214 
1215   } else if (name == "--encode" || name == "--decode" ||
1216              name == "--decode_raw") {
1217     if (mode_ != MODE_COMPILE) {
1218       std::cerr << "Only one of --encode and --decode can be specified."
1219                 << std::endl;
1220       return PARSE_ARGUMENT_FAIL;
1221     }
1222     if (!output_directives_.empty() || !descriptor_set_name_.empty()) {
1223       std::cerr << "Cannot use " << name
1224                 << " and generate code or descriptors at the same time."
1225                 << std::endl;
1226       return PARSE_ARGUMENT_FAIL;
1227     }
1228 
1229     mode_ = (name == "--encode") ? MODE_ENCODE : MODE_DECODE;
1230 
1231     if (value.empty() && name != "--decode_raw") {
1232       std::cerr << "Type name for " << name << " cannot be blank." << std::endl;
1233       if (name == "--decode") {
1234         std::cerr << "To decode an unknown message, use --decode_raw."
1235                   << std::endl;
1236       }
1237       return PARSE_ARGUMENT_FAIL;
1238     } else if (!value.empty() && name == "--decode_raw") {
1239       std::cerr << "--decode_raw does not take a parameter." << std::endl;
1240       return PARSE_ARGUMENT_FAIL;
1241     }
1242 
1243     codec_type_ = value;
1244 
1245   } else if (name == "--error_format") {
1246     if (value == "gcc") {
1247       error_format_ = ERROR_FORMAT_GCC;
1248     } else if (value == "msvs") {
1249       error_format_ = ERROR_FORMAT_MSVS;
1250     } else {
1251       std::cerr << "Unknown error format: " << value << std::endl;
1252       return PARSE_ARGUMENT_FAIL;
1253     }
1254 
1255   } else if (name == "--plugin") {
1256     if (plugin_prefix_.empty()) {
1257       std::cerr << "This compiler does not support plugins." << std::endl;
1258       return PARSE_ARGUMENT_FAIL;
1259     }
1260 
1261     string plugin_name;
1262     string path;
1263 
1264     string::size_type equals_pos = value.find_first_of('=');
1265     if (equals_pos == string::npos) {
1266       // Use the basename of the file.
1267       string::size_type slash_pos = value.find_last_of('/');
1268       if (slash_pos == string::npos) {
1269         plugin_name = value;
1270       } else {
1271         plugin_name = value.substr(slash_pos + 1);
1272       }
1273       path = value;
1274     } else {
1275       plugin_name = value.substr(0, equals_pos);
1276       path = value.substr(equals_pos + 1);
1277     }
1278 
1279     plugins_[plugin_name] = path;
1280 
1281   } else if (name == "--print_free_field_numbers") {
1282     if (mode_ != MODE_COMPILE) {
1283       std::cerr << "Cannot use " << name
1284                 << " and use --encode, --decode or print "
1285                 << "other info at the same time." << std::endl;
1286       return PARSE_ARGUMENT_FAIL;
1287     }
1288     if (!output_directives_.empty() || !descriptor_set_name_.empty()) {
1289       std::cerr << "Cannot use " << name
1290                 << " and generate code or descriptors at the same time."
1291                 << std::endl;
1292       return PARSE_ARGUMENT_FAIL;
1293     }
1294     mode_ = MODE_PRINT;
1295     print_mode_ = PRINT_FREE_FIELDS;
1296   } else {
1297     // Some other flag.  Look it up in the generators list.
1298     const GeneratorInfo* generator_info =
1299         FindOrNull(generators_by_flag_name_, name);
1300     if (generator_info == NULL &&
1301         (plugin_prefix_.empty() || !HasSuffixString(name, "_out"))) {
1302       // Check if it's a generator option flag.
1303       generator_info = FindOrNull(generators_by_option_name_, name);
1304       if (generator_info == NULL) {
1305         std::cerr << "Unknown flag: " << name << std::endl;
1306         return PARSE_ARGUMENT_FAIL;
1307       } else {
1308         string* parameters = &generator_parameters_[generator_info->flag_name];
1309         if (!parameters->empty()) {
1310           parameters->append(",");
1311         }
1312         parameters->append(value);
1313       }
1314     } else {
1315       // It's an output flag.  Add it to the output directives.
1316       if (mode_ != MODE_COMPILE) {
1317         std::cerr << "Cannot use --encode, --decode or print .proto info and "
1318                      "generate code at the same time." << std::endl;
1319         return PARSE_ARGUMENT_FAIL;
1320       }
1321 
1322       OutputDirective directive;
1323       directive.name = name;
1324       if (generator_info == NULL) {
1325         directive.generator = NULL;
1326       } else {
1327         directive.generator = generator_info->generator;
1328       }
1329 
1330       // Split value at ':' to separate the generator parameter from the
1331       // filename.  However, avoid doing this if the colon is part of a valid
1332       // Windows-style absolute path.
1333       string::size_type colon_pos = value.find_first_of(':');
1334       if (colon_pos == string::npos || IsWindowsAbsolutePath(value)) {
1335         directive.output_location = value;
1336       } else {
1337         directive.parameter = value.substr(0, colon_pos);
1338         directive.output_location = value.substr(colon_pos + 1);
1339       }
1340 
1341       output_directives_.push_back(directive);
1342     }
1343   }
1344 
1345   return PARSE_ARGUMENT_DONE_AND_CONTINUE;
1346 }
1347 
PrintHelpText()1348 void CommandLineInterface::PrintHelpText() {
1349   // Sorry for indentation here; line wrapping would be uglier.
1350   std::cerr <<
1351 "Usage: " << executable_name_ << " [OPTION] PROTO_FILES\n"
1352 "Parse PROTO_FILES and generate output based on the options given:\n"
1353 "  -IPATH, --proto_path=PATH   Specify the directory in which to search for\n"
1354 "                              imports.  May be specified multiple times;\n"
1355 "                              directories will be searched in order.  If not\n"
1356 "                              given, the current working directory is used.\n"
1357 "  --version                   Show version info and exit.\n"
1358 "  -h, --help                  Show this text and exit.\n"
1359 "  --encode=MESSAGE_TYPE       Read a text-format message of the given type\n"
1360 "                              from standard input and write it in binary\n"
1361 "                              to standard output.  The message type must\n"
1362 "                              be defined in PROTO_FILES or their imports.\n"
1363 "  --decode=MESSAGE_TYPE       Read a binary message of the given type from\n"
1364 "                              standard input and write it in text format\n"
1365 "                              to standard output.  The message type must\n"
1366 "                              be defined in PROTO_FILES or their imports.\n"
1367 "  --decode_raw                Read an arbitrary protocol message from\n"
1368 "                              standard input and write the raw tag/value\n"
1369 "                              pairs in text format to standard output.  No\n"
1370 "                              PROTO_FILES should be given when using this\n"
1371 "                              flag.\n"
1372 "  -oFILE,                     Writes a FileDescriptorSet (a protocol buffer,\n"
1373 "    --descriptor_set_out=FILE defined in descriptor.proto) containing all of\n"
1374 "                              the input files to FILE.\n"
1375 "  --include_imports           When using --descriptor_set_out, also include\n"
1376 "                              all dependencies of the input files in the\n"
1377 "                              set, so that the set is self-contained.\n"
1378 "  --include_source_info       When using --descriptor_set_out, do not strip\n"
1379 "                              SourceCodeInfo from the FileDescriptorProto.\n"
1380 "                              This results in vastly larger descriptors that\n"
1381 "                              include information about the original\n"
1382 "                              location of each decl in the source file as\n"
1383 "                              well as surrounding comments.\n"
1384 "  --dependency_out=FILE       Write a dependency output file in the format\n"
1385 "                              expected by make. This writes the transitive\n"
1386 "                              set of input file paths to FILE\n"
1387 "  --error_format=FORMAT       Set the format in which to print errors.\n"
1388 "                              FORMAT may be 'gcc' (the default) or 'msvs'\n"
1389 "                              (Microsoft Visual Studio format).\n"
1390 "  --print_free_field_numbers  Print the free field numbers of the messages\n"
1391 "                              defined in the given proto files. Groups share\n"
1392 "                              the same field number space with the parent \n"
1393 "                              message. Extension ranges are counted as \n"
1394 "                              occupied fields numbers.\n"
1395       << std::endl;
1396   if (!plugin_prefix_.empty()) {
1397     std::cerr <<
1398 "  --plugin=EXECUTABLE         Specifies a plugin executable to use.\n"
1399 "                              Normally, protoc searches the PATH for\n"
1400 "                              plugins, but you may specify additional\n"
1401 "                              executables not in the path using this flag.\n"
1402 "                              Additionally, EXECUTABLE may be of the form\n"
1403 "                              NAME=PATH, in which case the given plugin name\n"
1404 "                              is mapped to the given executable even if\n"
1405 "                              the executable's own name differs." << std::endl;
1406   }
1407 
1408   for (GeneratorMap::iterator iter = generators_by_flag_name_.begin();
1409        iter != generators_by_flag_name_.end(); ++iter) {
1410     // FIXME(kenton):  If the text is long enough it will wrap, which is ugly,
1411     //   but fixing this nicely (e.g. splitting on spaces) is probably more
1412     //   trouble than it's worth.
1413     std::cerr << "  " << iter->first << "=OUT_DIR "
1414               << string(19 - iter->first.size(), ' ')  // Spaces for alignment.
1415               << iter->second.help_text << std::endl;
1416   }
1417 }
1418 
GenerateOutput(const vector<const FileDescriptor * > & parsed_files,const OutputDirective & output_directive,GeneratorContext * generator_context)1419 bool CommandLineInterface::GenerateOutput(
1420     const vector<const FileDescriptor*>& parsed_files,
1421     const OutputDirective& output_directive,
1422     GeneratorContext* generator_context) {
1423   // Call the generator.
1424   string error;
1425   if (output_directive.generator == NULL) {
1426     // This is a plugin.
1427     GOOGLE_CHECK(HasPrefixString(output_directive.name, "--") &&
1428           HasSuffixString(output_directive.name, "_out"))
1429         << "Bad name for plugin generator: " << output_directive.name;
1430 
1431     // Strip the "--" and "_out" and add the plugin prefix.
1432     string plugin_name = plugin_prefix_ + "gen-" +
1433         output_directive.name.substr(2, output_directive.name.size() - 6);
1434 
1435     if (!GeneratePluginOutput(parsed_files, plugin_name,
1436                               output_directive.parameter,
1437                               generator_context, &error)) {
1438       std::cerr << output_directive.name << ": " << error << std::endl;
1439       return false;
1440     }
1441   } else {
1442     // Regular generator.
1443     string parameters = output_directive.parameter;
1444     if (!generator_parameters_[output_directive.name].empty()) {
1445       if (!parameters.empty()) {
1446         parameters.append(",");
1447       }
1448       parameters.append(generator_parameters_[output_directive.name]);
1449     }
1450     if (output_directive.generator->HasGenerateAll()) {
1451       if (!output_directive.generator->GenerateAll(
1452           parsed_files, parameters, generator_context, &error)) {
1453           // Generator returned an error.
1454           std::cerr << output_directive.name << ": "
1455                     << ": " << error << std::endl;
1456           return false;
1457       }
1458     } else {
1459       for (int i = 0; i < parsed_files.size(); i++) {
1460         if (!output_directive.generator->Generate(parsed_files[i], parameters,
1461                                                   generator_context, &error)) {
1462           // Generator returned an error.
1463           std::cerr << output_directive.name << ": " << parsed_files[i]->name()
1464                     << ": " << error << std::endl;
1465           return false;
1466         }
1467       }
1468     }
1469   }
1470 
1471   return true;
1472 }
1473 
GenerateDependencyManifestFile(const vector<const FileDescriptor * > & parsed_files,const GeneratorContextMap & output_directories,DiskSourceTree * source_tree)1474 bool CommandLineInterface::GenerateDependencyManifestFile(
1475     const vector<const FileDescriptor*>& parsed_files,
1476     const GeneratorContextMap& output_directories,
1477     DiskSourceTree* source_tree) {
1478   FileDescriptorSet file_set;
1479 
1480   set<const FileDescriptor*> already_seen;
1481   for (int i = 0; i < parsed_files.size(); i++) {
1482     GetTransitiveDependencies(parsed_files[i],
1483                               false,
1484                               false,
1485                               &already_seen,
1486                               file_set.mutable_file());
1487   }
1488 
1489   vector<string> output_filenames;
1490   for (GeneratorContextMap::const_iterator iter = output_directories.begin();
1491        iter != output_directories.end(); ++iter) {
1492     const string& location = iter->first;
1493     GeneratorContextImpl* directory = iter->second;
1494     vector<string> relative_output_filenames;
1495     directory->GetOutputFilenames(&relative_output_filenames);
1496     for (int i = 0; i < relative_output_filenames.size(); i++) {
1497       string output_filename = location + relative_output_filenames[i];
1498       if (output_filename.compare(0, 2, "./") == 0) {
1499         output_filename = output_filename.substr(2);
1500       }
1501       output_filenames.push_back(output_filename);
1502     }
1503   }
1504 
1505   int fd;
1506   do {
1507     fd = open(dependency_out_name_.c_str(),
1508               O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
1509   } while (fd < 0 && errno == EINTR);
1510 
1511   if (fd < 0) {
1512     perror(dependency_out_name_.c_str());
1513     return false;
1514   }
1515 
1516   io::FileOutputStream out(fd);
1517   io::Printer printer(&out, '$');
1518 
1519   for (int i = 0; i < output_filenames.size(); i++) {
1520     printer.Print(output_filenames[i].c_str());
1521     if (i == output_filenames.size() - 1) {
1522       printer.Print(":");
1523     } else {
1524       printer.Print(" \\\n");
1525     }
1526   }
1527 
1528   for (int i = 0; i < file_set.file_size(); i++) {
1529     const FileDescriptorProto& file = file_set.file(i);
1530     const string& virtual_file = file.name();
1531     string disk_file;
1532     if (source_tree &&
1533         source_tree->VirtualFileToDiskFile(virtual_file, &disk_file)) {
1534       printer.Print(" $disk_file$", "disk_file", disk_file);
1535       if (i < file_set.file_size() - 1) printer.Print("\\\n");
1536     } else {
1537       std::cerr << "Unable to identify path for file " << virtual_file
1538                 << std::endl;
1539       return false;
1540     }
1541   }
1542 
1543   return true;
1544 }
1545 
GeneratePluginOutput(const vector<const FileDescriptor * > & parsed_files,const string & plugin_name,const string & parameter,GeneratorContext * generator_context,string * error)1546 bool CommandLineInterface::GeneratePluginOutput(
1547     const vector<const FileDescriptor*>& parsed_files,
1548     const string& plugin_name,
1549     const string& parameter,
1550     GeneratorContext* generator_context,
1551     string* error) {
1552   CodeGeneratorRequest request;
1553   CodeGeneratorResponse response;
1554 
1555   // Build the request.
1556   if (!parameter.empty()) {
1557     request.set_parameter(parameter);
1558   }
1559 
1560   set<const FileDescriptor*> already_seen;
1561   for (int i = 0; i < parsed_files.size(); i++) {
1562     request.add_file_to_generate(parsed_files[i]->name());
1563     GetTransitiveDependencies(parsed_files[i],
1564                               true,  // Include json_name for plugins.
1565                               true,  // Include source code info.
1566                               &already_seen, request.mutable_proto_file());
1567   }
1568 
1569   // Invoke the plugin.
1570   Subprocess subprocess;
1571 
1572   if (plugins_.count(plugin_name) > 0) {
1573     subprocess.Start(plugins_[plugin_name], Subprocess::EXACT_NAME);
1574   } else {
1575     subprocess.Start(plugin_name, Subprocess::SEARCH_PATH);
1576   }
1577 
1578   string communicate_error;
1579   if (!subprocess.Communicate(request, &response, &communicate_error)) {
1580     *error = strings::Substitute("$0: $1", plugin_name, communicate_error);
1581     return false;
1582   }
1583 
1584   // Write the files.  We do this even if there was a generator error in order
1585   // to match the behavior of a compiled-in generator.
1586   google::protobuf::scoped_ptr<io::ZeroCopyOutputStream> current_output;
1587   for (int i = 0; i < response.file_size(); i++) {
1588     const CodeGeneratorResponse::File& output_file = response.file(i);
1589 
1590     if (!output_file.insertion_point().empty()) {
1591       // Open a file for insert.
1592       // We reset current_output to NULL first so that the old file is closed
1593       // before the new one is opened.
1594       current_output.reset();
1595       current_output.reset(generator_context->OpenForInsert(
1596           output_file.name(), output_file.insertion_point()));
1597     } else if (!output_file.name().empty()) {
1598       // Starting a new file.  Open it.
1599       // We reset current_output to NULL first so that the old file is closed
1600       // before the new one is opened.
1601       current_output.reset();
1602       current_output.reset(generator_context->Open(output_file.name()));
1603     } else if (current_output == NULL) {
1604       *error = strings::Substitute(
1605         "$0: First file chunk returned by plugin did not specify a file name.",
1606         plugin_name);
1607       return false;
1608     }
1609 
1610     // Use CodedOutputStream for convenience; otherwise we'd need to provide
1611     // our own buffer-copying loop.
1612     io::CodedOutputStream writer(current_output.get());
1613     writer.WriteString(output_file.content());
1614   }
1615 
1616   // Check for errors.
1617   if (!response.error().empty()) {
1618     // Generator returned an error.
1619     *error = response.error();
1620     return false;
1621   }
1622 
1623   return true;
1624 }
1625 
EncodeOrDecode(const DescriptorPool * pool)1626 bool CommandLineInterface::EncodeOrDecode(const DescriptorPool* pool) {
1627   // Look up the type.
1628   const Descriptor* type = pool->FindMessageTypeByName(codec_type_);
1629   if (type == NULL) {
1630     std::cerr << "Type not defined: " << codec_type_ << std::endl;
1631     return false;
1632   }
1633 
1634   DynamicMessageFactory dynamic_factory(pool);
1635   google::protobuf::scoped_ptr<Message> message(dynamic_factory.GetPrototype(type)->New());
1636 
1637   if (mode_ == MODE_ENCODE) {
1638     SetFdToTextMode(STDIN_FILENO);
1639     SetFdToBinaryMode(STDOUT_FILENO);
1640   } else {
1641     SetFdToBinaryMode(STDIN_FILENO);
1642     SetFdToTextMode(STDOUT_FILENO);
1643   }
1644 
1645   io::FileInputStream in(STDIN_FILENO);
1646   io::FileOutputStream out(STDOUT_FILENO);
1647 
1648   if (mode_ == MODE_ENCODE) {
1649     // Input is text.
1650     ErrorPrinter error_collector(error_format_);
1651     TextFormat::Parser parser;
1652     parser.RecordErrorsTo(&error_collector);
1653     parser.AllowPartialMessage(true);
1654 
1655     if (!parser.Parse(&in, message.get())) {
1656       std::cerr << "Failed to parse input." << std::endl;
1657       return false;
1658     }
1659   } else {
1660     // Input is binary.
1661     if (!message->ParsePartialFromZeroCopyStream(&in)) {
1662       std::cerr << "Failed to parse input." << std::endl;
1663       return false;
1664     }
1665   }
1666 
1667   if (!message->IsInitialized()) {
1668     std::cerr << "warning:  Input message is missing required fields:  "
1669               << message->InitializationErrorString() << std::endl;
1670   }
1671 
1672   if (mode_ == MODE_ENCODE) {
1673     // Output is binary.
1674     if (!message->SerializePartialToZeroCopyStream(&out)) {
1675       std::cerr << "output: I/O error." << std::endl;
1676       return false;
1677     }
1678   } else {
1679     // Output is text.
1680     if (!TextFormat::Print(*message, &out)) {
1681       std::cerr << "output: I/O error." << std::endl;
1682       return false;
1683     }
1684   }
1685 
1686   return true;
1687 }
1688 
WriteDescriptorSet(const vector<const FileDescriptor * > parsed_files)1689 bool CommandLineInterface::WriteDescriptorSet(
1690     const vector<const FileDescriptor*> parsed_files) {
1691   FileDescriptorSet file_set;
1692 
1693   if (imports_in_descriptor_set_) {
1694     set<const FileDescriptor*> already_seen;
1695     for (int i = 0; i < parsed_files.size(); i++) {
1696       GetTransitiveDependencies(parsed_files[i],
1697                                 true,  // Include json_name
1698                                 source_info_in_descriptor_set_,
1699                                 &already_seen, file_set.mutable_file());
1700     }
1701   } else {
1702     set<const FileDescriptor*> already_seen;
1703     for (int i = 0; i < parsed_files.size(); i++) {
1704       if (!already_seen.insert(parsed_files[i]).second) {
1705         continue;
1706       }
1707       FileDescriptorProto* file_proto = file_set.add_file();
1708       parsed_files[i]->CopyTo(file_proto);
1709       parsed_files[i]->CopyJsonNameTo(file_proto);
1710       if (source_info_in_descriptor_set_) {
1711         parsed_files[i]->CopySourceCodeInfoTo(file_proto);
1712       }
1713     }
1714   }
1715 
1716   int fd;
1717   do {
1718     fd = open(descriptor_set_name_.c_str(),
1719               O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
1720   } while (fd < 0 && errno == EINTR);
1721 
1722   if (fd < 0) {
1723     perror(descriptor_set_name_.c_str());
1724     return false;
1725   }
1726 
1727   io::FileOutputStream out(fd);
1728   if (!file_set.SerializeToZeroCopyStream(&out)) {
1729     std::cerr << descriptor_set_name_ << ": " << strerror(out.GetErrno())
1730               << std::endl;
1731     out.Close();
1732     return false;
1733   }
1734   if (!out.Close()) {
1735     std::cerr << descriptor_set_name_ << ": " << strerror(out.GetErrno())
1736               << std::endl;
1737     return false;
1738   }
1739 
1740   return true;
1741 }
1742 
GetTransitiveDependencies(const FileDescriptor * file,bool include_json_name,bool include_source_code_info,set<const FileDescriptor * > * already_seen,RepeatedPtrField<FileDescriptorProto> * output)1743 void CommandLineInterface::GetTransitiveDependencies(
1744     const FileDescriptor* file,
1745     bool include_json_name,
1746     bool include_source_code_info,
1747     set<const FileDescriptor*>* already_seen,
1748     RepeatedPtrField<FileDescriptorProto>* output) {
1749   if (!already_seen->insert(file).second) {
1750     // Already saw this file.  Skip.
1751     return;
1752   }
1753 
1754   // Add all dependencies.
1755   for (int i = 0; i < file->dependency_count(); i++) {
1756     GetTransitiveDependencies(file->dependency(i),
1757                               include_json_name,
1758                               include_source_code_info,
1759                               already_seen, output);
1760   }
1761 
1762   // Add this file.
1763   FileDescriptorProto* new_descriptor = output->Add();
1764   file->CopyTo(new_descriptor);
1765   if (include_json_name) {
1766     file->CopyJsonNameTo(new_descriptor);
1767   }
1768   if (include_source_code_info) {
1769     file->CopySourceCodeInfoTo(new_descriptor);
1770   }
1771 }
1772 
1773 namespace {
1774 
1775 // Utility function for PrintFreeFieldNumbers.
1776 // Stores occupied ranges into the ranges parameter, and next level of sub
1777 // message types into the nested_messages parameter.  The FieldRange is left
1778 // inclusive, right exclusive. i.e. [a, b).
1779 //
1780 // Nested Messages:
1781 // Note that it only stores the nested message type, iff the nested type is
1782 // either a direct child of the given descriptor, or the nested type is a
1783 // decendent of the given descriptor and all the nodes between the
1784 // nested type and the given descriptor are group types. e.g.
1785 //
1786 // message Foo {
1787 //   message Bar {
1788 //     message NestedBar {}
1789 //   }
1790 //   group Baz = 1 {
1791 //     group NestedBazGroup = 2 {
1792 //       message Quz {
1793 //         message NestedQuz {}
1794 //       }
1795 //     }
1796 //     message NestedBaz {}
1797 //   }
1798 // }
1799 //
1800 // In this case, Bar, Quz and NestedBaz will be added into the nested types.
1801 // Since free field numbers of group types will not be printed, this makes sure
1802 // the nested message types in groups will not be dropped. The nested_messages
1803 // parameter will contain the direct children (when groups are ignored in the
1804 // tree) of the given descriptor for the caller to traverse. The declaration
1805 // order of the nested messages is also preserved.
1806 typedef pair<int, int> FieldRange;
GatherOccupiedFieldRanges(const Descriptor * descriptor,set<FieldRange> * ranges,vector<const Descriptor * > * nested_messages)1807 void GatherOccupiedFieldRanges(const Descriptor* descriptor,
1808                                set<FieldRange>* ranges,
1809                                vector<const Descriptor*>* nested_messages) {
1810   set<const Descriptor*> groups;
1811   for (int i = 0; i < descriptor->field_count(); ++i) {
1812     const FieldDescriptor* fd = descriptor->field(i);
1813     ranges->insert(FieldRange(fd->number(), fd->number() + 1));
1814     if (fd->type() == FieldDescriptor::TYPE_GROUP) {
1815       groups.insert(fd->message_type());
1816     }
1817   }
1818   for (int i = 0; i < descriptor->extension_range_count(); ++i) {
1819     ranges->insert(FieldRange(descriptor->extension_range(i)->start,
1820                               descriptor->extension_range(i)->end));
1821   }
1822   for (int i = 0; i < descriptor->reserved_range_count(); ++i) {
1823     ranges->insert(FieldRange(descriptor->reserved_range(i)->start,
1824                               descriptor->reserved_range(i)->end));
1825   }
1826   // Handle the nested messages/groups in declaration order to make it
1827   // post-order strict.
1828   for (int i = 0; i < descriptor->nested_type_count(); ++i) {
1829     const Descriptor* nested_desc = descriptor->nested_type(i);
1830     if (groups.find(nested_desc) != groups.end()) {
1831       GatherOccupiedFieldRanges(nested_desc, ranges, nested_messages);
1832     } else {
1833       nested_messages->push_back(nested_desc);
1834     }
1835   }
1836 }
1837 
1838 // Utility function for PrintFreeFieldNumbers.
1839 // Actually prints the formatted free field numbers for given message name and
1840 // occupied ranges.
FormatFreeFieldNumbers(const string & name,const set<FieldRange> & ranges)1841 void FormatFreeFieldNumbers(const string& name,
1842                             const set<FieldRange>& ranges) {
1843   string output;
1844   StringAppendF(&output, "%-35s free:", name.c_str());
1845   int next_free_number = 1;
1846   for (set<FieldRange>::const_iterator i = ranges.begin();
1847        i != ranges.end(); ++i) {
1848     // This happens when groups re-use parent field numbers, in which
1849     // case we skip the FieldRange entirely.
1850     if (next_free_number >= i->second) continue;
1851 
1852     if (next_free_number < i->first) {
1853       if (next_free_number + 1 == i->first) {
1854         // Singleton
1855         StringAppendF(&output, " %d", next_free_number);
1856       } else {
1857         // Range
1858         StringAppendF(&output, " %d-%d", next_free_number, i->first - 1);
1859       }
1860     }
1861     next_free_number = i->second;
1862   }
1863   if (next_free_number <= FieldDescriptor::kMaxNumber) {
1864     StringAppendF(&output, " %d-INF", next_free_number);
1865   }
1866   std::cout << output << std::endl;
1867 }
1868 
1869 }  // namespace
1870 
PrintFreeFieldNumbers(const Descriptor * descriptor)1871 void CommandLineInterface::PrintFreeFieldNumbers(
1872     const Descriptor* descriptor) {
1873   set<FieldRange> ranges;
1874   vector<const Descriptor*> nested_messages;
1875   GatherOccupiedFieldRanges(descriptor, &ranges, &nested_messages);
1876 
1877   for (int i = 0; i < nested_messages.size(); ++i) {
1878     PrintFreeFieldNumbers(nested_messages[i]);
1879   }
1880   FormatFreeFieldNumbers(descriptor->full_name(), ranges);
1881 }
1882 
1883 
1884 
1885 }  // namespace compiler
1886 }  // namespace protobuf
1887 }  // namespace google
1888