1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: kenton@google.com (Kenton Varda)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #ifdef _MSC_VER
36 #include <io.h>
37 #else
38 #include <unistd.h>
39 #endif
40 #include <sys/types.h>
41 #include <sys/stat.h>
42 #include <fcntl.h>
43 #include <errno.h>
44
45 #include <algorithm>
46 #include <memory>
47
48 #include <google/protobuf/compiler/importer.h>
49
50 #include <google/protobuf/compiler/parser.h>
51 #include <google/protobuf/io/tokenizer.h>
52 #include <google/protobuf/io/zero_copy_stream_impl.h>
53 #include <google/protobuf/stubs/strutil.h>
54
55 namespace google {
56 namespace protobuf {
57 namespace compiler {
58
59 #ifdef _WIN32
60 #ifndef F_OK
61 #define F_OK 00 // not defined by MSVC for whatever reason
62 #endif
63 #include <ctype.h>
64 #endif
65
66 // Returns true if the text looks like a Windows-style absolute path, starting
67 // with a drive letter. Example: "C:\foo". TODO(kenton): Share this with
68 // copy in command_line_interface.cc?
IsWindowsAbsolutePath(const string & text)69 static bool IsWindowsAbsolutePath(const string& text) {
70 #if defined(_WIN32) || defined(__CYGWIN__)
71 return text.size() >= 3 && text[1] == ':' &&
72 isalpha(text[0]) &&
73 (text[2] == '/' || text[2] == '\\') &&
74 text.find_last_of(':') == 1;
75 #else
76 return false;
77 #endif
78 }
79
~MultiFileErrorCollector()80 MultiFileErrorCollector::~MultiFileErrorCollector() {}
81
82 // This class serves two purposes:
83 // - It implements the ErrorCollector interface (used by Tokenizer and Parser)
84 // in terms of MultiFileErrorCollector, using a particular filename.
85 // - It lets us check if any errors have occurred.
86 class SourceTreeDescriptorDatabase::SingleFileErrorCollector
87 : public io::ErrorCollector {
88 public:
SingleFileErrorCollector(const string & filename,MultiFileErrorCollector * multi_file_error_collector)89 SingleFileErrorCollector(const string& filename,
90 MultiFileErrorCollector* multi_file_error_collector)
91 : filename_(filename),
92 multi_file_error_collector_(multi_file_error_collector),
93 had_errors_(false) {}
~SingleFileErrorCollector()94 ~SingleFileErrorCollector() {}
95
had_errors()96 bool had_errors() { return had_errors_; }
97
98 // implements ErrorCollector ---------------------------------------
AddError(int line,int column,const string & message)99 void AddError(int line, int column, const string& message) {
100 if (multi_file_error_collector_ != NULL) {
101 multi_file_error_collector_->AddError(filename_, line, column, message);
102 }
103 had_errors_ = true;
104 }
105
106 private:
107 string filename_;
108 MultiFileErrorCollector* multi_file_error_collector_;
109 bool had_errors_;
110 };
111
112 // ===================================================================
113
SourceTreeDescriptorDatabase(SourceTree * source_tree)114 SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
115 SourceTree* source_tree)
116 : source_tree_(source_tree),
117 error_collector_(NULL),
118 using_validation_error_collector_(false),
119 validation_error_collector_(this) {}
120
~SourceTreeDescriptorDatabase()121 SourceTreeDescriptorDatabase::~SourceTreeDescriptorDatabase() {}
122
FindFileByName(const string & filename,FileDescriptorProto * output)123 bool SourceTreeDescriptorDatabase::FindFileByName(
124 const string& filename, FileDescriptorProto* output) {
125 scoped_ptr<io::ZeroCopyInputStream> input(source_tree_->Open(filename));
126 if (input == NULL) {
127 if (error_collector_ != NULL) {
128 error_collector_->AddError(filename, -1, 0,
129 source_tree_->GetLastErrorMessage());
130 }
131 return false;
132 }
133
134 // Set up the tokenizer and parser.
135 SingleFileErrorCollector file_error_collector(filename, error_collector_);
136 io::Tokenizer tokenizer(input.get(), &file_error_collector);
137
138 Parser parser;
139 if (error_collector_ != NULL) {
140 parser.RecordErrorsTo(&file_error_collector);
141 }
142 if (using_validation_error_collector_) {
143 parser.RecordSourceLocationsTo(&source_locations_);
144 }
145
146 // Parse it.
147 output->set_name(filename);
148 return parser.Parse(&tokenizer, output) &&
149 !file_error_collector.had_errors();
150 }
151
FindFileContainingSymbol(const string & symbol_name,FileDescriptorProto * output)152 bool SourceTreeDescriptorDatabase::FindFileContainingSymbol(
153 const string& symbol_name, FileDescriptorProto* output) {
154 return false;
155 }
156
FindFileContainingExtension(const string & containing_type,int field_number,FileDescriptorProto * output)157 bool SourceTreeDescriptorDatabase::FindFileContainingExtension(
158 const string& containing_type, int field_number,
159 FileDescriptorProto* output) {
160 return false;
161 }
162
163 // -------------------------------------------------------------------
164
165 SourceTreeDescriptorDatabase::ValidationErrorCollector::
ValidationErrorCollector(SourceTreeDescriptorDatabase * owner)166 ValidationErrorCollector(SourceTreeDescriptorDatabase* owner)
167 : owner_(owner) {}
168
169 SourceTreeDescriptorDatabase::ValidationErrorCollector::
~ValidationErrorCollector()170 ~ValidationErrorCollector() {}
171
AddError(const string & filename,const string & element_name,const Message * descriptor,ErrorLocation location,const string & message)172 void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddError(
173 const string& filename,
174 const string& element_name,
175 const Message* descriptor,
176 ErrorLocation location,
177 const string& message) {
178 if (owner_->error_collector_ == NULL) return;
179
180 int line, column;
181 owner_->source_locations_.Find(descriptor, location, &line, &column);
182 owner_->error_collector_->AddError(filename, line, column, message);
183 }
184
185 // ===================================================================
186
Importer(SourceTree * source_tree,MultiFileErrorCollector * error_collector)187 Importer::Importer(SourceTree* source_tree,
188 MultiFileErrorCollector* error_collector)
189 : database_(source_tree),
190 pool_(&database_, database_.GetValidationErrorCollector()) {
191 pool_.EnforceWeakDependencies(true);
192 database_.RecordErrorsTo(error_collector);
193 }
194
~Importer()195 Importer::~Importer() {}
196
Import(const string & filename)197 const FileDescriptor* Importer::Import(const string& filename) {
198 return pool_.FindFileByName(filename);
199 }
200
AddUnusedImportTrackFile(const string & file_name)201 void Importer::AddUnusedImportTrackFile(const string& file_name) {
202 pool_.AddUnusedImportTrackFile(file_name);
203 }
204
ClearUnusedImportTrackFiles()205 void Importer::ClearUnusedImportTrackFiles() {
206 pool_.ClearUnusedImportTrackFiles();
207 }
208
209 // ===================================================================
210
~SourceTree()211 SourceTree::~SourceTree() {}
212
GetLastErrorMessage()213 string SourceTree::GetLastErrorMessage() {
214 return "File not found.";
215 }
216
DiskSourceTree()217 DiskSourceTree::DiskSourceTree() {}
218
~DiskSourceTree()219 DiskSourceTree::~DiskSourceTree() {}
220
LastChar(const string & str)221 static inline char LastChar(const string& str) {
222 return str[str.size() - 1];
223 }
224
225 // Given a path, returns an equivalent path with these changes:
226 // - On Windows, any backslashes are replaced with forward slashes.
227 // - Any instances of the directory "." are removed.
228 // - Any consecutive '/'s are collapsed into a single slash.
229 // Note that the resulting string may be empty.
230 //
231 // TODO(kenton): It would be nice to handle "..", e.g. so that we can figure
232 // out that "foo/bar.proto" is inside "baz/../foo". However, if baz is a
233 // symlink or doesn't exist, then things get complicated, and we can't
234 // actually determine this without investigating the filesystem, probably
235 // in non-portable ways. So, we punt.
236 //
237 // TODO(kenton): It would be nice to use realpath() here except that it
238 // resolves symbolic links. This could cause problems if people place
239 // symbolic links in their source tree. For example, if you executed:
240 // protoc --proto_path=foo foo/bar/baz.proto
241 // then if foo/bar is a symbolic link, foo/bar/baz.proto will canonicalize
242 // to a path which does not appear to be under foo, and thus the compiler
243 // will complain that baz.proto is not inside the --proto_path.
CanonicalizePath(string path)244 static string CanonicalizePath(string path) {
245 #ifdef _WIN32
246 // The Win32 API accepts forward slashes as a path delimiter even though
247 // backslashes are standard. Let's avoid confusion and use only forward
248 // slashes.
249 if (HasPrefixString(path, "\\\\")) {
250 // Avoid converting two leading backslashes.
251 path = "\\\\" + StringReplace(path.substr(2), "\\", "/", true);
252 } else {
253 path = StringReplace(path, "\\", "/", true);
254 }
255 #endif
256
257 vector<string> canonical_parts;
258 vector<string> parts = Split(
259 path, "/", true); // Note: Removes empty parts.
260 for (int i = 0; i < parts.size(); i++) {
261 if (parts[i] == ".") {
262 // Ignore.
263 } else {
264 canonical_parts.push_back(parts[i]);
265 }
266 }
267 string result = Join(canonical_parts, "/");
268 if (!path.empty() && path[0] == '/') {
269 // Restore leading slash.
270 result = '/' + result;
271 }
272 if (!path.empty() && LastChar(path) == '/' &&
273 !result.empty() && LastChar(result) != '/') {
274 // Restore trailing slash.
275 result += '/';
276 }
277 return result;
278 }
279
ContainsParentReference(const string & path)280 static inline bool ContainsParentReference(const string& path) {
281 return path == ".." ||
282 HasPrefixString(path, "../") ||
283 HasSuffixString(path, "/..") ||
284 path.find("/../") != string::npos;
285 }
286
287 // Maps a file from an old location to a new one. Typically, old_prefix is
288 // a virtual path and new_prefix is its corresponding disk path. Returns
289 // false if the filename did not start with old_prefix, otherwise replaces
290 // old_prefix with new_prefix and stores the result in *result. Examples:
291 // string result;
292 // assert(ApplyMapping("foo/bar", "", "baz", &result));
293 // assert(result == "baz/foo/bar");
294 //
295 // assert(ApplyMapping("foo/bar", "foo", "baz", &result));
296 // assert(result == "baz/bar");
297 //
298 // assert(ApplyMapping("foo", "foo", "bar", &result));
299 // assert(result == "bar");
300 //
301 // assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
302 // assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
303 // assert(!ApplyMapping("foobar", "foo", "baz", &result));
ApplyMapping(const string & filename,const string & old_prefix,const string & new_prefix,string * result)304 static bool ApplyMapping(const string& filename,
305 const string& old_prefix,
306 const string& new_prefix,
307 string* result) {
308 if (old_prefix.empty()) {
309 // old_prefix matches any relative path.
310 if (ContainsParentReference(filename)) {
311 // We do not allow the file name to use "..".
312 return false;
313 }
314 if (HasPrefixString(filename, "/") ||
315 IsWindowsAbsolutePath(filename)) {
316 // This is an absolute path, so it isn't matched by the empty string.
317 return false;
318 }
319 result->assign(new_prefix);
320 if (!result->empty()) result->push_back('/');
321 result->append(filename);
322 return true;
323 } else if (HasPrefixString(filename, old_prefix)) {
324 // old_prefix is a prefix of the filename. Is it the whole filename?
325 if (filename.size() == old_prefix.size()) {
326 // Yep, it's an exact match.
327 *result = new_prefix;
328 return true;
329 } else {
330 // Not an exact match. Is the next character a '/'? Otherwise,
331 // this isn't actually a match at all. E.g. the prefix "foo/bar"
332 // does not match the filename "foo/barbaz".
333 int after_prefix_start = -1;
334 if (filename[old_prefix.size()] == '/') {
335 after_prefix_start = old_prefix.size() + 1;
336 } else if (filename[old_prefix.size() - 1] == '/') {
337 // old_prefix is never empty, and canonicalized paths never have
338 // consecutive '/' characters.
339 after_prefix_start = old_prefix.size();
340 }
341 if (after_prefix_start != -1) {
342 // Yep. So the prefixes are directories and the filename is a file
343 // inside them.
344 string after_prefix = filename.substr(after_prefix_start);
345 if (ContainsParentReference(after_prefix)) {
346 // We do not allow the file name to use "..".
347 return false;
348 }
349 result->assign(new_prefix);
350 if (!result->empty()) result->push_back('/');
351 result->append(after_prefix);
352 return true;
353 }
354 }
355 }
356
357 return false;
358 }
359
MapPath(const string & virtual_path,const string & disk_path)360 void DiskSourceTree::MapPath(const string& virtual_path,
361 const string& disk_path) {
362 mappings_.push_back(Mapping(virtual_path, CanonicalizePath(disk_path)));
363 }
364
365 DiskSourceTree::DiskFileToVirtualFileResult
DiskFileToVirtualFile(const string & disk_file,string * virtual_file,string * shadowing_disk_file)366 DiskSourceTree::DiskFileToVirtualFile(
367 const string& disk_file,
368 string* virtual_file,
369 string* shadowing_disk_file) {
370 int mapping_index = -1;
371 string canonical_disk_file = CanonicalizePath(disk_file);
372
373 for (int i = 0; i < mappings_.size(); i++) {
374 // Apply the mapping in reverse.
375 if (ApplyMapping(canonical_disk_file, mappings_[i].disk_path,
376 mappings_[i].virtual_path, virtual_file)) {
377 // Success.
378 mapping_index = i;
379 break;
380 }
381 }
382
383 if (mapping_index == -1) {
384 return NO_MAPPING;
385 }
386
387 // Iterate through all mappings with higher precedence and verify that none
388 // of them map this file to some other existing file.
389 for (int i = 0; i < mapping_index; i++) {
390 if (ApplyMapping(*virtual_file, mappings_[i].virtual_path,
391 mappings_[i].disk_path, shadowing_disk_file)) {
392 if (access(shadowing_disk_file->c_str(), F_OK) >= 0) {
393 // File exists.
394 return SHADOWED;
395 }
396 }
397 }
398 shadowing_disk_file->clear();
399
400 // Verify that we can open the file. Note that this also has the side-effect
401 // of verifying that we are not canonicalizing away any non-existent
402 // directories.
403 scoped_ptr<io::ZeroCopyInputStream> stream(OpenDiskFile(disk_file));
404 if (stream == NULL) {
405 return CANNOT_OPEN;
406 }
407
408 return SUCCESS;
409 }
410
VirtualFileToDiskFile(const string & virtual_file,string * disk_file)411 bool DiskSourceTree::VirtualFileToDiskFile(const string& virtual_file,
412 string* disk_file) {
413 scoped_ptr<io::ZeroCopyInputStream> stream(
414 OpenVirtualFile(virtual_file, disk_file));
415 return stream != NULL;
416 }
417
Open(const string & filename)418 io::ZeroCopyInputStream* DiskSourceTree::Open(const string& filename) {
419 return OpenVirtualFile(filename, NULL);
420 }
421
GetLastErrorMessage()422 string DiskSourceTree::GetLastErrorMessage() {
423 return last_error_message_;
424 }
425
OpenVirtualFile(const string & virtual_file,string * disk_file)426 io::ZeroCopyInputStream* DiskSourceTree::OpenVirtualFile(
427 const string& virtual_file,
428 string* disk_file) {
429 if (virtual_file != CanonicalizePath(virtual_file) ||
430 ContainsParentReference(virtual_file)) {
431 // We do not allow importing of paths containing things like ".." or
432 // consecutive slashes since the compiler expects files to be uniquely
433 // identified by file name.
434 last_error_message_ = "Backslashes, consecutive slashes, \".\", or \"..\" "
435 "are not allowed in the virtual path";
436 return NULL;
437 }
438
439 for (int i = 0; i < mappings_.size(); i++) {
440 string temp_disk_file;
441 if (ApplyMapping(virtual_file, mappings_[i].virtual_path,
442 mappings_[i].disk_path, &temp_disk_file)) {
443 io::ZeroCopyInputStream* stream = OpenDiskFile(temp_disk_file);
444 if (stream != NULL) {
445 if (disk_file != NULL) {
446 *disk_file = temp_disk_file;
447 }
448 return stream;
449 }
450
451 if (errno == EACCES) {
452 // The file exists but is not readable.
453 last_error_message_ = "Read access is denied for file: " +
454 temp_disk_file;
455 return NULL;
456 }
457 }
458 }
459 last_error_message_ = "File not found.";
460 return NULL;
461 }
462
OpenDiskFile(const string & filename)463 io::ZeroCopyInputStream* DiskSourceTree::OpenDiskFile(
464 const string& filename) {
465 int file_descriptor;
466 do {
467 file_descriptor = open(filename.c_str(), O_RDONLY);
468 } while (file_descriptor < 0 && errno == EINTR);
469 if (file_descriptor >= 0) {
470 io::FileInputStream* result = new io::FileInputStream(file_descriptor);
471 result->SetCloseOnDelete(true);
472 return result;
473 } else {
474 return NULL;
475 }
476 }
477
478 } // namespace compiler
479 } // namespace protobuf
480 } // namespace google
481