1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
18 #define ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
19 
20 #include <type_traits>
21 #include <assert.h>
22 #include <functional>
23 #include <vector>
24 #include <algorithm>
25 #include <numeric>
26 #include <memory>
27 
28 #include "cmdline/cmdline_parse_result.h"
29 #include "cmdline/token_range.h"
30 #include "cmdline/unit.h"
31 #include "cmdline/cmdline_types.h"
32 
33 namespace art {
34   // Implementation details for the parser. Do not look inside if you hate templates.
35   namespace detail {
36     // A non-templated base class for argument parsers. Used by the general parser
37     // to parse arguments, without needing to know the argument type at compile time.
38     //
39     // This is an application of the type erasure idiom.
40     struct CmdlineParseArgumentAny {
~CmdlineParseArgumentAnyCmdlineParseArgumentAny41       virtual ~CmdlineParseArgumentAny() {}
42 
43       // Attempt to parse this argument starting at arguments[position].
44       // If the parsing succeeds, the parsed value will be saved as a side-effect.
45       //
46       // In most situations, the parsing will not match by returning kUnknown. In this case,
47       // no tokens were consumed and the position variable will not be updated.
48       //
49       // At other times, parsing may fail due to validation but the initial token was still matched
50       // (for example an out of range value, or passing in a string where an int was expected).
51       // In this case the tokens are still consumed, and the position variable will get incremented
52       // by all the consumed tokens.
53       //
54       // The # of tokens consumed by the parse attempt will be set as an out-parameter into
55       // consumed_tokens. The parser should skip this many tokens before parsing the next
56       // argument.
57       virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) = 0;
58       // How many tokens should be taken off argv for parsing this argument.
59       // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space).
60       //
61       // A [min,max] range is returned to represent argument definitions with multiple
62       // value tokens. (e.g. {"-h", "-h " } would return [1,2]).
63       virtual std::pair<size_t, size_t> GetNumTokens() const = 0;
64       // Get the run-time typename of the argument type.
65       virtual const char* GetTypeName() const = 0;
66       // Try to do a close match, returning how many tokens were matched against this argument
67       // definition. More tokens is better.
68       //
69       // Do a quick match token-by-token, and see if they match.
70       // Any tokens with a wildcard in them are only matched up until the wildcard.
71       // If this is true, then the wildcard matching later on can still fail, so this is not
72       // a guarantee that the argument is correct, it's more of a strong hint that the
73       // user-provided input *probably* was trying to match this argument.
74       //
75       // Returns how many tokens were either matched (or ignored because there was a
76       // wildcard present). 0 means no match. If the Size() tokens are returned.
77       virtual size_t MaybeMatches(const TokenRange& tokens) = 0;
78     };
79 
80     template <typename T>
81     using EnableIfNumeric = std::enable_if<std::is_arithmetic<T>::value>;
82 
83     template <typename T>
84     using DisableIfNumeric = std::enable_if<!std::is_arithmetic<T>::value>;
85 
86     // Argument definition information, created by an ArgumentBuilder and an UntypedArgumentBuilder.
87     template <typename TArg>
88     struct CmdlineParserArgumentInfo {
89       // This version will only be used if TArg is arithmetic and thus has the <= operators.
90       template <typename T = TArg>  // Necessary to get SFINAE to kick in.
91       bool CheckRange(const TArg& value, typename EnableIfNumeric<T>::type* = 0) {
92         if (has_range_) {
93           return min_ <= value && value <= max_;
94         }
95         return true;
96       }
97 
98       // This version will be used at other times when TArg is not arithmetic.
99       template <typename T = TArg>
100       bool CheckRange(const TArg&, typename DisableIfNumeric<T>::type* = 0) {
101         assert(!has_range_);
102         return true;
103       }
104 
105       // Do a quick match token-by-token, and see if they match.
106       // Any tokens with a wildcard in them only match the prefix up until the wildcard.
107       //
108       // If this is true, then the wildcard matching later on can still fail, so this is not
109       // a guarantee that the argument is correct, it's more of a strong hint that the
110       // user-provided input *probably* was trying to match this argument.
MaybeMatchesCmdlineParserArgumentInfo111       size_t MaybeMatches(TokenRange token_list) const {
112         auto best_match = FindClosestMatch(token_list);
113 
114         return best_match.second;
115       }
116 
117       // Attempt to find the closest match (see MaybeMatches).
118       //
119       // Returns the token range that was the closest match and the # of tokens that
120       // this range was matched up until.
FindClosestMatchCmdlineParserArgumentInfo121       std::pair<const TokenRange*, size_t> FindClosestMatch(TokenRange token_list) const {
122         const TokenRange* best_match_ptr = nullptr;
123 
124         size_t best_match = 0;
125         for (auto&& token_range : tokenized_names_) {
126           size_t this_match = token_range.MaybeMatches(token_list, std::string("_"));
127 
128           if (this_match > best_match) {
129             best_match_ptr = &token_range;
130             best_match = this_match;
131           }
132         }
133 
134         return std::make_pair(best_match_ptr, best_match);
135       }
136 
137       // Mark the argument definition as completed, do not mutate the object anymore after this
138       // call is done.
139       //
140       // Performs several sanity checks and token calculations.
CompleteArgumentCmdlineParserArgumentInfo141       void CompleteArgument() {
142         assert(names_.size() >= 1);
143         assert(!is_completed_);
144 
145         is_completed_ = true;
146 
147         size_t blank_count = 0;
148         size_t token_count = 0;
149 
150         size_t global_blank_count = 0;
151         size_t global_token_count = 0;
152         for (auto&& name : names_) {
153           std::string s(name);
154 
155           size_t local_blank_count = std::count(s.begin(), s.end(), '_');
156           size_t local_token_count = std::count(s.begin(), s.end(), ' ');
157 
158           if (global_blank_count != 0) {
159             assert(local_blank_count == global_blank_count
160                    && "Every argument descriptor string must have same amount of blanks (_)");
161           }
162 
163           if (local_blank_count != 0) {
164             global_blank_count = local_blank_count;
165             blank_count++;
166 
167             assert(local_blank_count == 1 && "More than one blank is not supported");
168             assert(s.back() == '_' && "The blank character must only be at the end of the string");
169           }
170 
171           if (global_token_count != 0) {
172             assert(local_token_count == global_token_count
173                    && "Every argument descriptor string must have same amount of tokens (spaces)");
174           }
175 
176           if (local_token_count != 0) {
177             global_token_count = local_token_count;
178             token_count++;
179           }
180 
181           // Tokenize every name, turning it from a string to a token list.
182           tokenized_names_.clear();
183           for (auto&& name1 : names_) {
184             // Split along ' ' only, removing any duplicated spaces.
185             tokenized_names_.push_back(
186                 TokenRange::Split(name1, {' '}).RemoveToken(" "));
187           }
188 
189           // remove the _ character from each of the token ranges
190           // we will often end up with an empty token (i.e. ["-XX", "_"] -> ["-XX", ""]
191           // and this is OK because we still need an empty token to simplify
192           // range comparisons
193           simple_names_.clear();
194 
195           for (auto&& tokenized_name : tokenized_names_) {
196             simple_names_.push_back(tokenized_name.RemoveCharacter('_'));
197           }
198         }
199 
200         if (token_count != 0) {
201           assert(("Every argument descriptor string must have equal amount of tokens (spaces)" &&
202               token_count == names_.size()));
203         }
204 
205         if (blank_count != 0) {
206           assert(("Every argument descriptor string must have an equal amount of blanks (_)" &&
207               blank_count == names_.size()));
208         }
209 
210         using_blanks_ = blank_count > 0;
211         {
212           size_t smallest_name_token_range_size =
213               std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), ~(0u),
214                               [](size_t min, const TokenRange& cur) {
215                                 return std::min(min, cur.Size());
216                               });
217           size_t largest_name_token_range_size =
218               std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), 0u,
219                               [](size_t max, const TokenRange& cur) {
220                                 return std::max(max, cur.Size());
221                               });
222 
223           token_range_size_ = std::make_pair(smallest_name_token_range_size,
224                                              largest_name_token_range_size);
225         }
226 
227         if (has_value_list_) {
228           assert(names_.size() == value_list_.size()
229                  && "Number of arg descriptors must match number of values");
230           assert(!has_value_map_);
231         }
232         if (has_value_map_) {
233           if (!using_blanks_) {
234             assert(names_.size() == value_map_.size() &&
235                    "Since no blanks were specified, each arg is mapped directly into a mapped "
236                    "value without parsing; sizes must match");
237           }
238 
239           assert(!has_value_list_);
240         }
241 
242         if (!using_blanks_ && !CmdlineType<TArg>::kCanParseBlankless) {
243           assert((has_value_map_ || has_value_list_) &&
244                  "Arguments without a blank (_) must provide either a value map or a value list");
245         }
246 
247         TypedCheck();
248       }
249 
250       // List of aliases for a single argument definition, e.g. {"-Xdex2oat", "-Xnodex2oat"}.
251       std::vector<const char*> names_;
252       // Is there at least 1 wildcard '_' in the argument definition?
253       bool using_blanks_ = false;
254       // [min, max] token counts in each arg def
255       std::pair<size_t, size_t> token_range_size_;
256 
257       // contains all the names in a tokenized form, i.e. as a space-delimited list
258       std::vector<TokenRange> tokenized_names_;
259 
260       // contains the tokenized names, but with the _ character stripped
261       std::vector<TokenRange> simple_names_;
262 
263       // For argument definitions created with '.AppendValues()'
264       // Meaning that parsing should mutate the existing value in-place if possible.
265       bool appending_values_ = false;
266 
267       // For argument definitions created with '.WithRange(min, max)'
268       bool has_range_ = false;
269       TArg min_;
270       TArg max_;
271 
272       // For argument definitions created with '.WithValueMap'
273       bool has_value_map_ = false;
274       std::vector<std::pair<const char*, TArg>> value_map_;
275 
276       // For argument definitions created with '.WithValues'
277       bool has_value_list_ = false;
278       std::vector<TArg> value_list_;
279 
280       // Make sure there's a default constructor.
281       CmdlineParserArgumentInfo() = default;
282 
283       // Ensure there's a default move constructor.
284       CmdlineParserArgumentInfo(CmdlineParserArgumentInfo&&) = default;
285 
286      private:
287       // Perform type-specific checks at runtime.
288       template <typename T = TArg>
289       void TypedCheck(typename std::enable_if<std::is_same<Unit, T>::value>::type* = 0) {
290         assert(!using_blanks_ &&
291                "Blanks are not supported in Unit arguments; since a Unit has no parse-able value");
292       }
293 
TypedCheckCmdlineParserArgumentInfo294       void TypedCheck() {}
295 
296       bool is_completed_ = false;
297     };
298 
299     // A virtual-implementation of the necessary argument information in order to
300     // be able to parse arguments.
301     template <typename TArg>
302     struct CmdlineParseArgument : CmdlineParseArgumentAny {
CmdlineParseArgumentCmdlineParseArgument303       explicit CmdlineParseArgument(CmdlineParserArgumentInfo<TArg>&& argument_info,
304                                     std::function<void(TArg&)>&& save_argument,
305                                     std::function<TArg&(void)>&& load_argument)
306           : argument_info_(std::forward<decltype(argument_info)>(argument_info)),
307             save_argument_(std::forward<decltype(save_argument)>(save_argument)),
308             load_argument_(std::forward<decltype(load_argument)>(load_argument)) {
309       }
310 
311       using UserTypeInfo = CmdlineType<TArg>;
312 
ParseArgumentCmdlineParseArgument313       virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) {
314         assert(arguments.Size() > 0);
315         assert(consumed_tokens != nullptr);
316 
317         auto closest_match_res = argument_info_.FindClosestMatch(arguments);
318         size_t best_match_size = closest_match_res.second;
319         const TokenRange* best_match_arg_def = closest_match_res.first;
320 
321         if (best_match_size > arguments.Size()) {
322           // The best match has more tokens than were provided.
323           // Shouldn't happen in practice since the outer parser does this check.
324           return CmdlineResult(CmdlineResult::kUnknown, "Size mismatch");
325         }
326 
327         assert(best_match_arg_def != nullptr);
328         *consumed_tokens = best_match_arg_def->Size();
329 
330         if (!argument_info_.using_blanks_) {
331           return ParseArgumentSingle(arguments.Join(' '));
332         }
333 
334         // Extract out the blank value from arguments
335         // e.g. for a def of "foo:_" and input "foo:bar", blank_value == "bar"
336         std::string blank_value = "";
337         size_t idx = 0;
338         for (auto&& def_token : *best_match_arg_def) {
339           auto&& arg_token = arguments[idx];
340 
341           // Does this definition-token have a wildcard in it?
342           if (def_token.find('_') == std::string::npos) {
343             // No, regular token. Match 1:1 against the argument token.
344             bool token_match = def_token == arg_token;
345 
346             if (!token_match) {
347               return CmdlineResult(CmdlineResult::kFailure,
348                                    std::string("Failed to parse ") + best_match_arg_def->GetToken(0)
349                                    + " at token " + std::to_string(idx));
350             }
351           } else {
352             // This is a wild-carded token.
353             TokenRange def_split_wildcards = TokenRange::Split(def_token, {'_'});
354 
355             // Extract the wildcard contents out of the user-provided arg_token.
356             std::unique_ptr<TokenRange> arg_matches =
357                 def_split_wildcards.MatchSubstrings(arg_token, "_");
358             if (arg_matches == nullptr) {
359               return CmdlineResult(CmdlineResult::kFailure,
360                                    std::string("Failed to parse ") + best_match_arg_def->GetToken(0)
361                                    + ", with a wildcard pattern " + def_token
362                                    + " at token " + std::to_string(idx));
363             }
364 
365             // Get the corresponding wildcard tokens from arg_matches,
366             // and concatenate it to blank_value.
367             for (size_t sub_idx = 0;
368                 sub_idx < def_split_wildcards.Size() && sub_idx < arg_matches->Size(); ++sub_idx) {
369               if (def_split_wildcards[sub_idx] == "_") {
370                 blank_value += arg_matches->GetToken(sub_idx);
371               }
372             }
373           }
374 
375           ++idx;
376         }
377 
378         return ParseArgumentSingle(blank_value);
379       }
380 
381      private:
ParseArgumentSingleCmdlineParseArgument382       virtual CmdlineResult ParseArgumentSingle(const std::string& argument) {
383         // TODO: refactor to use LookupValue for the value lists/maps
384 
385         // Handle the 'WithValueMap(...)' argument definition
386         if (argument_info_.has_value_map_) {
387           for (auto&& value_pair : argument_info_.value_map_) {
388             const char* name = value_pair.first;
389 
390             if (argument == name) {
391               return SaveArgument(value_pair.second);
392             }
393           }
394 
395           // Error case: Fail, telling the user what the allowed values were.
396           std::vector<std::string> allowed_values;
397           for (auto&& value_pair : argument_info_.value_map_) {
398             const char* name = value_pair.first;
399             allowed_values.push_back(name);
400           }
401 
402           std::string allowed_values_flat = Join(allowed_values, ',');
403           return CmdlineResult(CmdlineResult::kFailure,
404                                "Argument value '" + argument + "' does not match any of known valid"
405                                 "values: {" + allowed_values_flat + "}");
406         }
407 
408         // Handle the 'WithValues(...)' argument definition
409         if (argument_info_.has_value_list_) {
410           size_t arg_def_idx = 0;
411           for (auto&& value : argument_info_.value_list_) {
412             auto&& arg_def_token = argument_info_.names_[arg_def_idx];
413 
414             if (arg_def_token == argument) {
415               return SaveArgument(value);
416             }
417             ++arg_def_idx;
418           }
419 
420           assert(arg_def_idx + 1 == argument_info_.value_list_.size() &&
421                  "Number of named argument definitions must match number of values defined");
422 
423           // Error case: Fail, telling the user what the allowed values were.
424           std::vector<std::string> allowed_values;
425           for (auto&& arg_name : argument_info_.names_) {
426             allowed_values.push_back(arg_name);
427           }
428 
429           std::string allowed_values_flat = Join(allowed_values, ',');
430           return CmdlineResult(CmdlineResult::kFailure,
431                                "Argument value '" + argument + "' does not match any of known valid"
432                                 "values: {" + allowed_values_flat + "}");
433         }
434 
435         // Handle the regular case where we parsed an unknown value from a blank.
436         UserTypeInfo type_parser;
437 
438         if (argument_info_.appending_values_) {
439           TArg& existing = load_argument_();
440           CmdlineParseResult<TArg> result = type_parser.ParseAndAppend(argument, existing);
441 
442           assert(!argument_info_.has_range_);
443 
444           return result;
445         }
446 
447         CmdlineParseResult<TArg> result = type_parser.Parse(argument);
448 
449         if (result.IsSuccess()) {
450           TArg& value = result.GetValue();
451 
452           // Do a range check for 'WithRange(min,max)' argument definition.
453           if (!argument_info_.CheckRange(value)) {
454             return CmdlineParseResult<TArg>::OutOfRange(
455                 value, argument_info_.min_, argument_info_.max_);
456           }
457 
458           return SaveArgument(value);
459         }
460 
461         // Some kind of type-specific parse error. Pass the result as-is.
462         CmdlineResult raw_result = std::move(result);
463         return raw_result;
464       }
465 
466      public:
GetTypeNameCmdlineParseArgument467       virtual const char* GetTypeName() const {
468         // TODO: Obviate the need for each type specialization to hardcode the type name
469         return UserTypeInfo::Name();
470       }
471 
472       // How many tokens should be taken off argv for parsing this argument.
473       // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space).
474       //
475       // A [min,max] range is returned to represent argument definitions with multiple
476       // value tokens. (e.g. {"-h", "-h " } would return [1,2]).
GetNumTokensCmdlineParseArgument477       virtual std::pair<size_t, size_t> GetNumTokens() const {
478         return argument_info_.token_range_size_;
479       }
480 
481       // See if this token range might begin the same as the argument definition.
MaybeMatchesCmdlineParseArgument482       virtual size_t MaybeMatches(const TokenRange& tokens) {
483         return argument_info_.MaybeMatches(tokens);
484       }
485 
486      private:
SaveArgumentCmdlineParseArgument487       CmdlineResult SaveArgument(const TArg& value) {
488         assert(!argument_info_.appending_values_
489                && "If the values are being appended, then the updated parse value is "
490                    "updated by-ref as a side effect and shouldn't be stored directly");
491         TArg val = value;
492         save_argument_(val);
493         return CmdlineResult(CmdlineResult::kSuccess);
494       }
495 
496       CmdlineParserArgumentInfo<TArg> argument_info_;
497       std::function<void(TArg&)> save_argument_;
498       std::function<TArg&(void)> load_argument_;
499     };
500   } // namespace detail // NOLINT [readability/namespace] [5] [whitespace/comments] [2]
501 }  // namespace art
502 
503 #endif  // ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
504