1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // This module exports ParseFlagsFromEnvAndDieIfUnknown(), which allows other
17 // modules to parse flags from an environtment variable, or a file named by the
18 // environment variable.
19 
20 #include "tensorflow/compiler/xla/parse_flags_from_env.h"
21 
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 
26 #include <memory>
27 #include <unordered_map>
28 #include <vector>
29 
30 #include "absl/strings/ascii.h"
31 #include "absl/strings/str_format.h"
32 #include "absl/strings/str_join.h"
33 #include "absl/types/span.h"
34 #include "tensorflow/compiler/xla/types.h"
35 #include "tensorflow/core/platform/logging.h"
36 #include "tensorflow/core/platform/macros.h"
37 #include "tensorflow/core/platform/mutex.h"
38 #include "tensorflow/core/platform/types.h"
39 #include "tensorflow/core/util/command_line_flags.h"
40 
41 namespace xla {
42 
43 static const char kWS[] = " \t\r\n";  // whitespace
44 
45 // The following struct represents an argv[]-style array, parsed
46 // from data gleaned from the environment.
47 //
48 // As usual, an anonymous namespace is advisable to avoid
49 // constructor/destructor collisions with other "private" types
50 // in the same named namespace.
51 namespace {
52 
53 // Functor which deletes objects by calling `free`.  Necessary to free strdup'ed
54 // strings created by AppendToEnvArgv.
55 struct FreeDeleter {
operator ()xla::__anon845e2fc90111::FreeDeleter56   void operator()(char* ptr) { free(ptr); }
57 };
58 
59 struct EnvArgv {
EnvArgvxla::__anon845e2fc90111::EnvArgv60   EnvArgv() : initialized(false), argc(0) {}
61   bool initialized;         // whether the other fields have been set.
62   int argc;                 // elements used in argv[]
63   std::vector<char*> argv;  // flag arguments parsed from environment string.
64   // saved values from argv[] to avoid leaks
65   std::vector<std::unique_ptr<char, FreeDeleter>> argv_save;
66 };
67 }  // anonymous namespace
68 
69 // Append the string s0[0, .., s0len-1] concatenated with s1[0, .., s1len-1] as
70 // a newly allocated nul-terminated string to the array *a.  If s0==nullptr, a
71 // nullptr is appended without increasing a->argc.
AppendToEnvArgv(const char * s0,size_t s0len,const char * s1,size_t s1len,EnvArgv * a)72 static void AppendToEnvArgv(const char* s0, size_t s0len, const char* s1,
73                             size_t s1len, EnvArgv* a) {
74   if (s0 == nullptr) {
75     a->argv.push_back(nullptr);
76     a->argv_save.push_back(nullptr);
77   } else {
78     string s = string(s0, s0len) + string(s1, s1len);
79     char* str = strdup(s.c_str());
80     a->argv.push_back(str);
81     a->argv_save.emplace_back(str);
82     a->argc++;
83   }
84 }
85 
86 // Like s.find_first_of(x, pos), but return s.size() when find_first_of() would
87 // return string::npos.  This avoids if-statements elsewhere.
FindFirstOf(const string & s,const char * x,size_t pos)88 static size_t FindFirstOf(const string& s, const char* x, size_t pos) {
89   size_t result = s.find_first_of(x, pos);
90   return result == string::npos ? s.size() : result;
91 }
92 
93 // Like s.find_first_not_of(x, pos), but return s.size() when
94 // find_first_not_of() would return string::npos.  This avoids if-statements
95 // elsewhere.
FindFirstNotOf(const string & s,const char * x,size_t pos)96 static size_t FindFirstNotOf(const string& s, const char* x, size_t pos) {
97   size_t result = s.find_first_not_of(x, pos);
98   return result == string::npos ? s.size() : result;
99 }
100 
101 // Given a string containing flags, parse them into the XLA command line flags.
102 // The parse is best effort, and gives up on the first syntax error.
ParseArgvFromString(const string & flag_str,EnvArgv * a)103 static void ParseArgvFromString(const string& flag_str, EnvArgv* a) {
104   size_t b = FindFirstNotOf(flag_str, kWS, 0);
105   while (b != flag_str.size() && flag_str[b] == '-') {
106     // b is the index of the start of a flag.
107     // Set e to the index just past the end of the flag.
108     size_t e = b;
109     while (e != flag_str.size() && isascii(flag_str[e]) &&
110            (strchr("-_", flag_str[e]) != nullptr ||
111             absl::ascii_isalnum(flag_str[e]))) {
112       e++;
113     }
114     if (e != flag_str.size() && flag_str[e] == '=' &&
115         e + 1 != flag_str.size() && strchr("'\"", flag_str[e + 1]) != nullptr) {
116       // A flag of the form  --flag="something in double or single quotes"
117       int c;
118       e++;  // point just past '='
119       size_t eflag = e;
120       char quote = flag_str[e];
121       e++;  // point just past quote
122       // Put in value the string with quotes removed.
123       string value;
124       for (; e != flag_str.size() && (c = flag_str[e]) != quote; e++) {
125         if (quote == '"' && c == '\\' && e + 1 != flag_str.size()) {
126           // Handle backslash in double quoted strings.  They are literal in
127           // single-quoted strings.
128           e++;
129           c = flag_str[e];
130         }
131         value += c;
132       }
133       if (e != flag_str.size()) {  // skip final " or '
134         e++;
135       }
136       AppendToEnvArgv(flag_str.data() + b, eflag - b, value.data(),
137                       value.size(), a);
138     } else {  // A flag without a quoted value.
139       e = FindFirstOf(flag_str, kWS, e);
140       AppendToEnvArgv(flag_str.data() + b, e - b, "", 0, a);
141     }
142     b = FindFirstNotOf(flag_str, kWS, e);
143   }
144 }
145 
146 // Call ParseArgvFromString(..., a) on a string derived from the setting of the
147 // environment variable `envvar`, or a file it points to.
SetArgvFromEnv(absl::string_view envvar,EnvArgv * a)148 static void SetArgvFromEnv(absl::string_view envvar, EnvArgv* a) {
149   if (!a->initialized) {
150     static const char kDummyArgv[] = "<argv[0]>";
151     AppendToEnvArgv(kDummyArgv, strlen(kDummyArgv), nullptr, 0,
152                     a);  // dummy argv[0]
153     const char* env = getenv(string(envvar).c_str());
154     if (env == nullptr || env[0] == '\0') {
155       // nothing
156     } else if (env[strspn(env, kWS)] == '-') {  // flags in env var value
157       ParseArgvFromString(env, a);
158     } else {  // assume it's a file name
159       FILE* fp = fopen(env, "r");
160       if (fp != nullptr) {
161         string str;
162         char buf[512];
163         int n;
164         while ((n = fread(buf, 1, sizeof(buf), fp)) > 0) {
165           str.append(buf, n);
166         }
167         fclose(fp);
168         ParseArgvFromString(str, a);
169       } else {
170         LOG(QFATAL)
171             << "Could not open file \"" << env
172             << "\" to read flags for environment variable \"" << envvar
173             << "\".  (We assumed \"" << env
174             << "\" was a file name because it did not start with a \"--\".)";
175       }
176     }
177     AppendToEnvArgv(nullptr, 0, nullptr, 0, a);  // add trailing nullptr to *a.
178     a->initialized = true;
179   }
180 }
181 
182 // The simulated argv[] parsed from the environment, one for each different
183 // environment variable we've seen.
EnvArgvs()184 static std::unordered_map<string, EnvArgv>& EnvArgvs() {
185   static auto* env_argvs = new std::unordered_map<string, EnvArgv>();
186   return *env_argvs;
187 }
188 
189 // Used to protect accesses to env_argvs.
190 static tensorflow::mutex env_argv_mu(tensorflow::LINKER_INITIALIZED);
191 
ParseFlagsFromEnvAndDieIfUnknown(absl::string_view envvar,const std::vector<tensorflow::Flag> & flag_list)192 bool ParseFlagsFromEnvAndDieIfUnknown(
193     absl::string_view envvar, const std::vector<tensorflow::Flag>& flag_list) {
194   tensorflow::mutex_lock lock(env_argv_mu);
195   auto* env_argv = &EnvArgvs()[string(envvar)];
196   SetArgvFromEnv(envvar, env_argv);  // a no-op if already initialized
197 
198   if (VLOG_IS_ON(1)) {
199     VLOG(1) << "For env var " << envvar << " found arguments:";
200     for (int i = 0; i < env_argv->argc; i++) {
201       VLOG(1) << "  argv[" << i << "] = " << env_argv->argv[i];
202     }
203   }
204 
205   bool result =
206       tensorflow::Flags::Parse(&env_argv->argc, &env_argv->argv[0], flag_list);
207 
208   // There's always at least one unparsed argc, namely the fake argv[0].
209   if (result && env_argv->argc != 1) {
210     // Skip the first argv, which is the fake argv[0].
211     auto unknown_flags = absl::MakeSpan(env_argv->argv);
212     unknown_flags.remove_prefix(1);
213 
214     // Some flags are set on XLA_FLAGS, others on TF_XLA_FLAGS.  If we find an
215     // unrecognized flag, suggest the alternative.
216     string alternate_envvar;
217     if (envvar == "TF_XLA_FLAGS") {
218       alternate_envvar = "XLA_FLAGS";
219     } else if (envvar == "XLA_FLAGS") {
220       alternate_envvar = "TF_XLA_FLAGS";
221     }
222     string did_you_mean;
223     if (!alternate_envvar.empty()) {
224       did_you_mean = absl::StrFormat(
225           "\nPerhaps you meant to specify these on the %s envvar?",
226           alternate_envvar);
227     }
228 
229     LOG(QFATAL) << "Unknown flag" << (unknown_flags.size() > 1 ? "s" : "")
230                 << " in " << envvar << ": " << absl::StrJoin(unknown_flags, " ")
231                 << did_you_mean;
232     return false;
233   }
234   return result;
235 }
236 
237 // Testing only.
238 //
239 // Resets the env_argv struct so that subsequent calls to
240 // ParseFlagsFromEnvAndDieIfUnknown() will parse the environment variable (or
241 // the file it points to) anew, and set *pargc, and *pargv to point to the
242 // internal locations of the argc and argv constructed from the environment.
ResetFlagsFromEnvForTesting(absl::string_view envvar,int ** pargc,std::vector<char * > ** pargv)243 void ResetFlagsFromEnvForTesting(absl::string_view envvar, int** pargc,
244                                  std::vector<char*>** pargv) {
245   tensorflow::mutex_lock lock(env_argv_mu);
246   EnvArgvs().erase(string(envvar));
247   auto& env_argv = EnvArgvs()[string(envvar)];
248   *pargc = &env_argv.argc;
249   *pargv = &env_argv.argv;
250 }
251 
252 }  // namespace xla
253