1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/platform/path.h"
17 
18 #include <errno.h>
19 #include <fcntl.h>
20 #include <stdlib.h>
21 #include <sys/stat.h>
22 #include <sys/types.h>
23 #if defined(PLATFORM_WINDOWS)
24 #include <windows.h>
25 #else
26 #include <unistd.h>
27 #endif
28 
29 #include <vector>
30 
31 #include "absl/algorithm/container.h"
32 #include "tensorflow/core/platform/logging.h"
33 #include "tensorflow/core/platform/mutex.h"
34 #include "tensorflow/core/platform/scanner.h"
35 #include "tensorflow/core/platform/strcat.h"
36 
37 namespace tensorflow {
38 namespace io {
39 namespace internal {
40 namespace {
41 
42 const char kPathSep[] = "/";
43 
FixBazelEnvPath(const char * path,string * out)44 bool FixBazelEnvPath(const char* path, string* out) {
45   if (path == nullptr) return false;
46   if (out == nullptr) return true;
47 
48   *out = path;
49 
50 #ifdef PLATFORM_WINDOWS
51   // On Windows, paths generated by Bazel are always use `/` as the path
52   // separator. This prevents normal path management. In the event there are no
53   // `\` in the path, we convert all `/` to `\`.
54   if (out->find('\\') != string::npos) return path;
55 
56   for (size_t pos = out->find('/'); pos != string::npos;
57        pos = out->find('/', pos + 1)) {
58     (*out)[pos] = kPathSep[0];
59   }
60 #endif
61 
62   return true;
63 }
64 
65 }  // namespace
66 
JoinPathImpl(std::initializer_list<StringPiece> paths)67 string JoinPathImpl(std::initializer_list<StringPiece> paths) {
68   string result;
69 
70   for (StringPiece path : paths) {
71     if (path.empty()) continue;
72 
73     if (result.empty()) {
74       result = string(path);
75       continue;
76     }
77 
78     if (IsAbsolutePath(path)) path = path.substr(1);
79 
80     if (result[result.size() - 1] == kPathSep[0]) {
81       strings::StrAppend(&result, path);
82     } else {
83       strings::StrAppend(&result, kPathSep, path);
84     }
85   }
86 
87   return result;
88 }
89 
90 // Return the parts of the URI, split on the final "/" in the path. If there is
91 // no "/" in the path, the first part of the output is the scheme and host, and
92 // the second is the path. If the only "/" in the path is the first character,
93 // it is included in the first part of the output.
SplitPath(StringPiece uri)94 std::pair<StringPiece, StringPiece> SplitPath(StringPiece uri) {
95   StringPiece scheme, host, path;
96   ParseURI(uri, &scheme, &host, &path);
97 
98   auto pos = path.rfind('/');
99 #ifdef PLATFORM_WINDOWS
100   if (pos == StringPiece::npos) pos = path.rfind('\\');
101 #endif
102   // Handle the case with no '/' in 'path'.
103   if (pos == StringPiece::npos)
104     return std::make_pair(StringPiece(uri.begin(), host.end() - uri.begin()),
105                           path);
106 
107   // Handle the case with a single leading '/' in 'path'.
108   if (pos == 0)
109     return std::make_pair(
110         StringPiece(uri.begin(), path.begin() + 1 - uri.begin()),
111         StringPiece(path.data() + 1, path.size() - 1));
112 
113   return std::make_pair(
114       StringPiece(uri.begin(), path.begin() + pos - uri.begin()),
115       StringPiece(path.data() + pos + 1, path.size() - (pos + 1)));
116 }
117 
118 // Return the parts of the basename of path, split on the final ".".
119 // If there is no "." in the basename or "." is the final character in the
120 // basename, the second value will be empty.
SplitBasename(StringPiece path)121 std::pair<StringPiece, StringPiece> SplitBasename(StringPiece path) {
122   path = Basename(path);
123 
124   auto pos = path.rfind('.');
125   if (pos == StringPiece::npos)
126     return std::make_pair(path, StringPiece(path.data() + path.size(), 0));
127   return std::make_pair(
128       StringPiece(path.data(), pos),
129       StringPiece(path.data() + pos + 1, path.size() - (pos + 1)));
130 }
131 
132 }  // namespace internal
133 
IsAbsolutePath(StringPiece path)134 bool IsAbsolutePath(StringPiece path) {
135   return !path.empty() && path[0] == '/';
136 }
137 
Dirname(StringPiece path)138 StringPiece Dirname(StringPiece path) {
139   return internal::SplitPath(path).first;
140 }
141 
Basename(StringPiece path)142 StringPiece Basename(StringPiece path) {
143   return internal::SplitPath(path).second;
144 }
145 
Extension(StringPiece path)146 StringPiece Extension(StringPiece path) {
147   return internal::SplitBasename(path).second;
148 }
149 
CleanPath(StringPiece unclean_path)150 string CleanPath(StringPiece unclean_path) {
151   string path(unclean_path);
152   const char* src = path.c_str();
153   string::iterator dst = path.begin();
154 
155   // Check for absolute path and determine initial backtrack limit.
156   const bool is_absolute_path = *src == '/';
157   if (is_absolute_path) {
158     *dst++ = *src++;
159     while (*src == '/') ++src;
160   }
161   string::const_iterator backtrack_limit = dst;
162 
163   // Process all parts
164   while (*src) {
165     bool parsed = false;
166 
167     if (src[0] == '.') {
168       //  1dot ".<whateverisnext>", check for END or SEP.
169       if (src[1] == '/' || !src[1]) {
170         if (*++src) {
171           ++src;
172         }
173         parsed = true;
174       } else if (src[1] == '.' && (src[2] == '/' || !src[2])) {
175         // 2dot END or SEP (".." | "../<whateverisnext>").
176         src += 2;
177         if (dst != backtrack_limit) {
178           // We can backtrack the previous part
179           for (--dst; dst != backtrack_limit && dst[-1] != '/'; --dst) {
180             // Empty.
181           }
182         } else if (!is_absolute_path) {
183           // Failed to backtrack and we can't skip it either. Rewind and copy.
184           src -= 2;
185           *dst++ = *src++;
186           *dst++ = *src++;
187           if (*src) {
188             *dst++ = *src;
189           }
190           // We can never backtrack over a copied "../" part so set new limit.
191           backtrack_limit = dst;
192         }
193         if (*src) {
194           ++src;
195         }
196         parsed = true;
197       }
198     }
199 
200     // If not parsed, copy entire part until the next SEP or EOS.
201     if (!parsed) {
202       while (*src && *src != '/') {
203         *dst++ = *src++;
204       }
205       if (*src) {
206         *dst++ = *src++;
207       }
208     }
209 
210     // Skip consecutive SEP occurrences
211     while (*src == '/') {
212       ++src;
213     }
214   }
215 
216   // Calculate and check the length of the cleaned path.
217   string::difference_type path_length = dst - path.begin();
218   if (path_length != 0) {
219     // Remove trailing '/' except if it is root path ("/" ==> path_length := 1)
220     if (path_length > 1 && path[path_length - 1] == '/') {
221       --path_length;
222     }
223     path.resize(path_length);
224   } else {
225     // The cleaned path is empty; assign "." as per the spec.
226     path.assign(1, '.');
227   }
228   return path;
229 }
230 
ParseURI(StringPiece remaining,StringPiece * scheme,StringPiece * host,StringPiece * path)231 void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host,
232               StringPiece* path) {
233   // 0. Parse scheme
234   // Make sure scheme matches [a-zA-Z][0-9a-zA-Z.]*
235   // TODO(keveman): Allow "+" and "-" in the scheme.
236   // Keep URI pattern in tensorboard/backend/server.py updated accordingly
237   if (!strings::Scanner(remaining)
238            .One(strings::Scanner::LETTER)
239            .Many(strings::Scanner::LETTER_DIGIT_DOT)
240            .StopCapture()
241            .OneLiteral("://")
242            .GetResult(&remaining, scheme)) {
243     // If there's no scheme, assume the entire string is a path.
244     *scheme = StringPiece(remaining.begin(), 0);
245     *host = StringPiece(remaining.begin(), 0);
246     *path = remaining;
247     return;
248   }
249 
250   // 1. Parse host
251   if (!strings::Scanner(remaining).ScanUntil('/').GetResult(&remaining, host)) {
252     // No path, so the rest of the URI is the host.
253     *host = remaining;
254     *path = StringPiece(remaining.end(), 0);
255     return;
256   }
257 
258   // 2. The rest is the path
259   *path = remaining;
260 }
261 
CreateURI(StringPiece scheme,StringPiece host,StringPiece path)262 string CreateURI(StringPiece scheme, StringPiece host, StringPiece path) {
263   if (scheme.empty()) {
264     return string(path);
265   }
266   return strings::StrCat(scheme, "://", host, path);
267 }
268 
269 // Returns a unique number every time it is called.
UniqueId()270 int64 UniqueId() {
271   static mutex mu(LINKER_INITIALIZED);
272   static int64 id = 0;
273   mutex_lock l(mu);
274   return ++id;
275 }
276 
CommonPathPrefix(absl::Span<const string> paths)277 string CommonPathPrefix(absl::Span<const string> paths) {
278   if (paths.empty()) return "";
279   size_t min_filename_size =
280       absl::c_min_element(paths, [](const string& a, const string& b) {
281         return a.size() < b.size();
282       })->size();
283   if (min_filename_size == 0) return "";
284 
285   size_t common_prefix_size = [&] {
286     for (size_t prefix_size = 0; prefix_size < min_filename_size;
287          prefix_size++) {
288       char c = paths[0][prefix_size];
289       for (int f = 1; f < paths.size(); f++) {
290         if (paths[f][prefix_size] != c) {
291           return prefix_size;
292         }
293       }
294     }
295     return min_filename_size;
296   }();
297 
298   size_t rpos = absl::string_view(paths[0])
299                     .substr(0, common_prefix_size)
300                     .rfind(internal::kPathSep);
301   return rpos == std::string::npos
302              ? ""
303              : std::string(absl::string_view(paths[0]).substr(0, rpos + 1));
304 }
305 
GetTempFilename(const string & extension)306 string GetTempFilename(const string& extension) {
307 #if defined(__ANDROID__)
308   LOG(FATAL) << "GetTempFilename is not implemented in this platform.";
309 #elif defined(PLATFORM_WINDOWS)
310   char temp_dir[_MAX_PATH];
311   DWORD retval;
312   retval = GetTempPath(_MAX_PATH, temp_dir);
313   if (retval > _MAX_PATH || retval == 0) {
314     LOG(FATAL) << "Cannot get the directory for temporary files.";
315   }
316 
317   char temp_file_name[_MAX_PATH];
318   retval = GetTempFileName(temp_dir, "", UniqueId(), temp_file_name);
319   if (retval > _MAX_PATH || retval == 0) {
320     LOG(FATAL) << "Cannot get a temporary file in: " << temp_dir;
321   }
322 
323   string full_tmp_file_name(temp_file_name);
324   full_tmp_file_name.append(extension);
325   return full_tmp_file_name;
326 #else
327   for (const char* dir : std::vector<const char*>(
328            {getenv("TEST_TMPDIR"), getenv("TMPDIR"), getenv("TMP"), "/tmp"})) {
329     if (!dir || !dir[0]) {
330       continue;
331     }
332     struct stat statbuf;
333     if (!stat(dir, &statbuf) && S_ISDIR(statbuf.st_mode)) {
334       // UniqueId is added here because mkstemps is not as thread safe as it
335       // looks. https://github.com/tensorflow/tensorflow/issues/5804 shows
336       // the problem.
337       string tmp_filepath;
338       int fd;
339       if (extension.length()) {
340         tmp_filepath = io::JoinPath(
341             dir, strings::StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX.",
342                                  extension));
343         fd = mkstemps(&tmp_filepath[0], extension.length() + 1);
344       } else {
345         tmp_filepath = io::JoinPath(
346             dir,
347             strings::StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX"));
348         fd = mkstemp(&tmp_filepath[0]);
349       }
350       if (fd < 0) {
351         LOG(FATAL) << "Failed to create temp file.";
352       } else {
353         if (close(fd) < 0) {
354           LOG(ERROR) << "close() failed: " << strerror(errno);
355         }
356         return tmp_filepath;
357       }
358     }
359   }
360   LOG(FATAL) << "No temp directory found.";
361   std::abort();
362 #endif
363 }
364 
GetTestUndeclaredOutputsDir(string * dir)365 bool GetTestUndeclaredOutputsDir(string* dir) {
366   return internal::FixBazelEnvPath(getenv("TEST_UNDECLARED_OUTPUTS_DIR"), dir);
367 }
368 
369 }  // namespace io
370 }  // namespace tensorflow
371