1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/lib/io/path.h"
17 
18 #include <errno.h>
19 #include <fcntl.h>
20 #include <stdlib.h>
21 #include <sys/stat.h>
22 #include <sys/types.h>
23 #if !defined(PLATFORM_WINDOWS)
24 #include <unistd.h>
25 #endif
26 
27 #include <vector>
28 
29 #include "tensorflow/core/lib/strings/scanner.h"
30 #include "tensorflow/core/lib/strings/strcat.h"
31 #include "tensorflow/core/platform/logging.h"
32 #include "tensorflow/core/platform/mutex.h"
33 
34 namespace tensorflow {
35 namespace io {
36 namespace internal {
37 
JoinPathImpl(std::initializer_list<StringPiece> paths)38 string JoinPathImpl(std::initializer_list<StringPiece> paths) {
39   string result;
40 
41   for (StringPiece path : paths) {
42     if (path.empty()) continue;
43 
44     if (result.empty()) {
45       result = string(path);
46       continue;
47     }
48 
49     if (result[result.size() - 1] == '/') {
50       if (IsAbsolutePath(path)) {
51         strings::StrAppend(&result, path.substr(1));
52       } else {
53         strings::StrAppend(&result, path);
54       }
55     } else {
56       if (IsAbsolutePath(path)) {
57         strings::StrAppend(&result, path);
58       } else {
59         strings::StrAppend(&result, "/", path);
60       }
61     }
62   }
63 
64   return result;
65 }
66 
67 // Return the parts of the URI, split on the final "/" in the path. If there is
68 // no "/" in the path, the first part of the output is the scheme and host, and
69 // the second is the path. If the only "/" in the path is the first character,
70 // it is included in the first part of the output.
SplitPath(StringPiece uri)71 std::pair<StringPiece, StringPiece> SplitPath(StringPiece uri) {
72   StringPiece scheme, host, path;
73   ParseURI(uri, &scheme, &host, &path);
74 
75   auto pos = path.rfind('/');
76 #ifdef PLATFORM_WINDOWS
77   if (pos == StringPiece::npos) pos = path.rfind('\\');
78 #endif
79   // Handle the case with no '/' in 'path'.
80   if (pos == StringPiece::npos)
81     return std::make_pair(StringPiece(uri.begin(), host.end() - uri.begin()),
82                           path);
83 
84   // Handle the case with a single leading '/' in 'path'.
85   if (pos == 0)
86     return std::make_pair(
87         StringPiece(uri.begin(), path.begin() + 1 - uri.begin()),
88         StringPiece(path.data() + 1, path.size() - 1));
89 
90   return std::make_pair(
91       StringPiece(uri.begin(), path.begin() + pos - uri.begin()),
92       StringPiece(path.data() + pos + 1, path.size() - (pos + 1)));
93 }
94 
95 // Return the parts of the basename of path, split on the final ".".
96 // If there is no "." in the basename or "." is the final character in the
97 // basename, the second value will be empty.
SplitBasename(StringPiece path)98 std::pair<StringPiece, StringPiece> SplitBasename(StringPiece path) {
99   path = Basename(path);
100 
101   auto pos = path.rfind('.');
102   if (pos == StringPiece::npos)
103     return std::make_pair(path, StringPiece(path.data() + path.size(), 0));
104   return std::make_pair(
105       StringPiece(path.data(), pos),
106       StringPiece(path.data() + pos + 1, path.size() - (pos + 1)));
107 }
108 }  // namespace internal
109 
IsAbsolutePath(StringPiece path)110 bool IsAbsolutePath(StringPiece path) {
111   return !path.empty() && path[0] == '/';
112 }
113 
Dirname(StringPiece path)114 StringPiece Dirname(StringPiece path) {
115   return internal::SplitPath(path).first;
116 }
117 
Basename(StringPiece path)118 StringPiece Basename(StringPiece path) {
119   return internal::SplitPath(path).second;
120 }
121 
Extension(StringPiece path)122 StringPiece Extension(StringPiece path) {
123   return internal::SplitBasename(path).second;
124 }
125 
CleanPath(StringPiece unclean_path)126 string CleanPath(StringPiece unclean_path) {
127   string path(unclean_path);
128   const char* src = path.c_str();
129   string::iterator dst = path.begin();
130 
131   // Check for absolute path and determine initial backtrack limit.
132   const bool is_absolute_path = *src == '/';
133   if (is_absolute_path) {
134     *dst++ = *src++;
135     while (*src == '/') ++src;
136   }
137   string::const_iterator backtrack_limit = dst;
138 
139   // Process all parts
140   while (*src) {
141     bool parsed = false;
142 
143     if (src[0] == '.') {
144       //  1dot ".<whateverisnext>", check for END or SEP.
145       if (src[1] == '/' || !src[1]) {
146         if (*++src) {
147           ++src;
148         }
149         parsed = true;
150       } else if (src[1] == '.' && (src[2] == '/' || !src[2])) {
151         // 2dot END or SEP (".." | "../<whateverisnext>").
152         src += 2;
153         if (dst != backtrack_limit) {
154           // We can backtrack the previous part
155           for (--dst; dst != backtrack_limit && dst[-1] != '/'; --dst) {
156             // Empty.
157           }
158         } else if (!is_absolute_path) {
159           // Failed to backtrack and we can't skip it either. Rewind and copy.
160           src -= 2;
161           *dst++ = *src++;
162           *dst++ = *src++;
163           if (*src) {
164             *dst++ = *src;
165           }
166           // We can never backtrack over a copied "../" part so set new limit.
167           backtrack_limit = dst;
168         }
169         if (*src) {
170           ++src;
171         }
172         parsed = true;
173       }
174     }
175 
176     // If not parsed, copy entire part until the next SEP or EOS.
177     if (!parsed) {
178       while (*src && *src != '/') {
179         *dst++ = *src++;
180       }
181       if (*src) {
182         *dst++ = *src++;
183       }
184     }
185 
186     // Skip consecutive SEP occurrences
187     while (*src == '/') {
188       ++src;
189     }
190   }
191 
192   // Calculate and check the length of the cleaned path.
193   string::difference_type path_length = dst - path.begin();
194   if (path_length != 0) {
195     // Remove trailing '/' except if it is root path ("/" ==> path_length := 1)
196     if (path_length > 1 && path[path_length - 1] == '/') {
197       --path_length;
198     }
199     path.resize(path_length);
200   } else {
201     // The cleaned path is empty; assign "." as per the spec.
202     path.assign(1, '.');
203   }
204   return path;
205 }
206 
ParseURI(StringPiece remaining,StringPiece * scheme,StringPiece * host,StringPiece * path)207 void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host,
208               StringPiece* path) {
209   // 0. Parse scheme
210   // Make sure scheme matches [a-zA-Z][0-9a-zA-Z.]*
211   // TODO(keveman): Allow "+" and "-" in the scheme.
212   // Keep URI pattern in tensorboard/backend/server.py updated accordingly
213   if (!strings::Scanner(remaining)
214            .One(strings::Scanner::LETTER)
215            .Many(strings::Scanner::LETTER_DIGIT_DOT)
216            .StopCapture()
217            .OneLiteral("://")
218            .GetResult(&remaining, scheme)) {
219     // If there's no scheme, assume the entire string is a path.
220     *scheme = StringPiece(remaining.begin(), 0);
221     *host = StringPiece(remaining.begin(), 0);
222     *path = remaining;
223     return;
224   }
225 
226   // 1. Parse host
227   if (!strings::Scanner(remaining).ScanUntil('/').GetResult(&remaining, host)) {
228     // No path, so the rest of the URI is the host.
229     *host = remaining;
230     *path = StringPiece(remaining.end(), 0);
231     return;
232   }
233 
234   // 2. The rest is the path
235   *path = remaining;
236 }
237 
CreateURI(StringPiece scheme,StringPiece host,StringPiece path)238 string CreateURI(StringPiece scheme, StringPiece host, StringPiece path) {
239   if (scheme.empty()) {
240     return string(path);
241   }
242   return strings::StrCat(scheme, "://", host, path);
243 }
244 
245 // Returns a unique number every time it is called.
UniqueId()246 int64 UniqueId() {
247   static mutex mu(LINKER_INITIALIZED);
248   static int64 id = 0;
249   mutex_lock l(mu);
250   return ++id;
251 }
252 
GetTempFilename(const string & extension)253 string GetTempFilename(const string& extension) {
254 #if defined(PLATFORM_WINDOWS) || defined(__ANDROID__)
255   LOG(FATAL) << "GetTempFilename is not implemented in this platform.";
256 #else
257   for (const char* dir : std::vector<const char*>(
258            {getenv("TEST_TMPDIR"), getenv("TMPDIR"), getenv("TMP"), "/tmp"})) {
259     if (!dir || !dir[0]) {
260       continue;
261     }
262     struct stat statbuf;
263     if (!stat(dir, &statbuf) && S_ISDIR(statbuf.st_mode)) {
264       // UniqueId is added here because mkstemps is not as thread safe as it
265       // looks. https://github.com/tensorflow/tensorflow/issues/5804 shows
266       // the problem.
267       string tmp_filepath;
268       int fd;
269       if (extension.length()) {
270         tmp_filepath = io::JoinPath(
271             dir, strings::StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX.",
272                                  extension));
273         fd = mkstemps(&tmp_filepath[0], extension.length() + 1);
274       } else {
275         tmp_filepath = io::JoinPath(
276             dir,
277             strings::StrCat("tmp_file_tensorflow_", UniqueId(), "_XXXXXX"));
278         fd = mkstemp(&tmp_filepath[0]);
279       }
280       if (fd < 0) {
281         LOG(FATAL) << "Failed to create temp file.";
282       } else {
283         close(fd);
284         return tmp_filepath;
285       }
286     }
287   }
288   LOG(FATAL) << "No temp directory found.";
289 #endif
290 }
291 
292 }  // namespace io
293 }  // namespace tensorflow
294