1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
18 #define ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
19 
20 #include <type_traits>
21 #include <assert.h>
22 #include <functional>
23 #include <vector>
24 #include <algorithm>
25 #include <numeric>
26 #include <memory>
27 
28 #include "android-base/strings.h"
29 
30 #include "cmdline_parse_result.h"
31 #include "cmdline_types.h"
32 #include "token_range.h"
33 #include "unit.h"
34 
35 namespace art {
36   // Implementation details for the parser. Do not look inside if you hate templates.
37   namespace detail {
38     // A non-templated base class for argument parsers. Used by the general parser
39     // to parse arguments, without needing to know the argument type at compile time.
40     //
41     // This is an application of the type erasure idiom.
42     struct CmdlineParseArgumentAny {
~CmdlineParseArgumentAnyCmdlineParseArgumentAny43       virtual ~CmdlineParseArgumentAny() {}
44 
45       // Attempt to parse this argument starting at arguments[position].
46       // If the parsing succeeds, the parsed value will be saved as a side-effect.
47       //
48       // In most situations, the parsing will not match by returning kUnknown. In this case,
49       // no tokens were consumed and the position variable will not be updated.
50       //
51       // At other times, parsing may fail due to validation but the initial token was still matched
52       // (for example an out of range value, or passing in a string where an int was expected).
53       // In this case the tokens are still consumed, and the position variable will get incremented
54       // by all the consumed tokens.
55       //
56       // The # of tokens consumed by the parse attempt will be set as an out-parameter into
57       // consumed_tokens. The parser should skip this many tokens before parsing the next
58       // argument.
59       virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) = 0;
60       // How many tokens should be taken off argv for parsing this argument.
61       // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space).
62       //
63       // A [min,max] range is returned to represent argument definitions with multiple
64       // value tokens. (e.g. {"-h", "-h " } would return [1,2]).
65       virtual std::pair<size_t, size_t> GetNumTokens() const = 0;
66       // Get the run-time typename of the argument type.
67       virtual const char* GetTypeName() const = 0;
68       // Try to do a close match, returning how many tokens were matched against this argument
69       // definition. More tokens is better.
70       //
71       // Do a quick match token-by-token, and see if they match.
72       // Any tokens with a wildcard in them are only matched up until the wildcard.
73       // If this is true, then the wildcard matching later on can still fail, so this is not
74       // a guarantee that the argument is correct, it's more of a strong hint that the
75       // user-provided input *probably* was trying to match this argument.
76       //
77       // Returns how many tokens were either matched (or ignored because there was a
78       // wildcard present). 0 means no match. If the Size() tokens are returned.
79       virtual size_t MaybeMatches(const TokenRange& tokens) = 0;
80     };
81 
82     template <typename T>
83     using EnableIfNumeric = std::enable_if<std::is_arithmetic<T>::value>;
84 
85     template <typename T>
86     using DisableIfNumeric = std::enable_if<!std::is_arithmetic<T>::value>;
87 
88     // Argument definition information, created by an ArgumentBuilder and an UntypedArgumentBuilder.
89     template <typename TArg>
90     struct CmdlineParserArgumentInfo {
91       // This version will only be used if TArg is arithmetic and thus has the <= operators.
92       template <typename T = TArg>  // Necessary to get SFINAE to kick in.
93       bool CheckRange(const TArg& value, typename EnableIfNumeric<T>::type* = 0) {
94         if (has_range_) {
95           return min_ <= value && value <= max_;
96         }
97         return true;
98       }
99 
100       // This version will be used at other times when TArg is not arithmetic.
101       template <typename T = TArg>
102       bool CheckRange(const TArg&, typename DisableIfNumeric<T>::type* = 0) {
103         assert(!has_range_);
104         return true;
105       }
106 
107       // Do a quick match token-by-token, and see if they match.
108       // Any tokens with a wildcard in them only match the prefix up until the wildcard.
109       //
110       // If this is true, then the wildcard matching later on can still fail, so this is not
111       // a guarantee that the argument is correct, it's more of a strong hint that the
112       // user-provided input *probably* was trying to match this argument.
MaybeMatchesCmdlineParserArgumentInfo113       size_t MaybeMatches(const TokenRange& token_list) const {
114         auto best_match = FindClosestMatch(token_list);
115 
116         return best_match.second;
117       }
118 
119       // Attempt to find the closest match (see MaybeMatches).
120       //
121       // Returns the token range that was the closest match and the # of tokens that
122       // this range was matched up until.
FindClosestMatchCmdlineParserArgumentInfo123       std::pair<const TokenRange*, size_t> FindClosestMatch(const TokenRange& token_list) const {
124         const TokenRange* best_match_ptr = nullptr;
125 
126         size_t best_match = 0;
127         for (auto&& token_range : tokenized_names_) {
128           size_t this_match = token_range.MaybeMatches(token_list, std::string("_"));
129 
130           if (this_match > best_match) {
131             best_match_ptr = &token_range;
132             best_match = this_match;
133           }
134         }
135 
136         return std::make_pair(best_match_ptr, best_match);
137       }
138 
139       // Mark the argument definition as completed, do not mutate the object anymore after this
140       // call is done.
141       //
142       // Performs several sanity checks and token calculations.
CompleteArgumentCmdlineParserArgumentInfo143       void CompleteArgument() {
144         assert(names_.size() >= 1);
145         assert(!is_completed_);
146 
147         is_completed_ = true;
148 
149         size_t blank_count = 0;
150         size_t token_count = 0;
151 
152         size_t global_blank_count = 0;
153         size_t global_token_count = 0;
154         for (auto&& name : names_) {
155           std::string s(name);
156 
157           size_t local_blank_count = std::count(s.begin(), s.end(), '_');
158           size_t local_token_count = std::count(s.begin(), s.end(), ' ');
159 
160           if (global_blank_count != 0) {
161             assert(local_blank_count == global_blank_count
162                    && "Every argument descriptor string must have same amount of blanks (_)");
163           }
164 
165           if (local_blank_count != 0) {
166             global_blank_count = local_blank_count;
167             blank_count++;
168 
169             assert(local_blank_count == 1 && "More than one blank is not supported");
170             assert(s.back() == '_' && "The blank character must only be at the end of the string");
171           }
172 
173           if (global_token_count != 0) {
174             assert(local_token_count == global_token_count
175                    && "Every argument descriptor string must have same amount of tokens (spaces)");
176           }
177 
178           if (local_token_count != 0) {
179             global_token_count = local_token_count;
180             token_count++;
181           }
182 
183           // Tokenize every name, turning it from a string to a token list.
184           tokenized_names_.clear();
185           for (auto&& name1 : names_) {
186             // Split along ' ' only, removing any duplicated spaces.
187             tokenized_names_.push_back(
188                 TokenRange::Split(name1, {' '}).RemoveToken(" "));
189           }
190 
191           // remove the _ character from each of the token ranges
192           // we will often end up with an empty token (i.e. ["-XX", "_"] -> ["-XX", ""]
193           // and this is OK because we still need an empty token to simplify
194           // range comparisons
195           simple_names_.clear();
196 
197           for (auto&& tokenized_name : tokenized_names_) {
198             simple_names_.push_back(tokenized_name.RemoveCharacter('_'));
199           }
200         }
201 
202         if (token_count != 0) {
203           assert(("Every argument descriptor string must have equal amount of tokens (spaces)" &&
204               token_count == names_.size()));
205         }
206 
207         if (blank_count != 0) {
208           assert(("Every argument descriptor string must have an equal amount of blanks (_)" &&
209               blank_count == names_.size()));
210         }
211 
212         using_blanks_ = blank_count > 0;
213         {
214           size_t smallest_name_token_range_size =
215               std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), ~(0u),
216                               [](size_t min, const TokenRange& cur) {
217                                 return std::min(min, cur.Size());
218                               });
219           size_t largest_name_token_range_size =
220               std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), 0u,
221                               [](size_t max, const TokenRange& cur) {
222                                 return std::max(max, cur.Size());
223                               });
224 
225           token_range_size_ = std::make_pair(smallest_name_token_range_size,
226                                              largest_name_token_range_size);
227         }
228 
229         if (has_value_list_) {
230           assert(names_.size() == value_list_.size()
231                  && "Number of arg descriptors must match number of values");
232           assert(!has_value_map_);
233         }
234         if (has_value_map_) {
235           if (!using_blanks_) {
236             assert(names_.size() == value_map_.size() &&
237                    "Since no blanks were specified, each arg is mapped directly into a mapped "
238                    "value without parsing; sizes must match");
239           }
240 
241           assert(!has_value_list_);
242         }
243 
244         if (!using_blanks_ && !CmdlineType<TArg>::kCanParseBlankless) {
245           assert((has_value_map_ || has_value_list_) &&
246                  "Arguments without a blank (_) must provide either a value map or a value list");
247         }
248 
249         TypedCheck();
250       }
251 
252       // List of aliases for a single argument definition, e.g. {"-Xdex2oat", "-Xnodex2oat"}.
253       std::vector<const char*> names_;
254       // Is there at least 1 wildcard '_' in the argument definition?
255       bool using_blanks_ = false;
256       // [min, max] token counts in each arg def
257       std::pair<size_t, size_t> token_range_size_;
258 
259       // contains all the names in a tokenized form, i.e. as a space-delimited list
260       std::vector<TokenRange> tokenized_names_;
261 
262       // contains the tokenized names, but with the _ character stripped
263       std::vector<TokenRange> simple_names_;
264 
265       // For argument definitions created with '.AppendValues()'
266       // Meaning that parsing should mutate the existing value in-place if possible.
267       bool appending_values_ = false;
268 
269       // For argument definitions created with '.WithRange(min, max)'
270       bool has_range_ = false;
271       TArg min_;
272       TArg max_;
273 
274       // For argument definitions created with '.WithValueMap'
275       bool has_value_map_ = false;
276       std::vector<std::pair<const char*, TArg>> value_map_;
277 
278       // For argument definitions created with '.WithValues'
279       bool has_value_list_ = false;
280       std::vector<TArg> value_list_;
281 
282       // Make sure there's a default constructor.
283       CmdlineParserArgumentInfo() = default;
284 
285       // Ensure there's a default move constructor.
286       CmdlineParserArgumentInfo(CmdlineParserArgumentInfo&&) = default;
287 
288      private:
289       // Perform type-specific checks at runtime.
290       template <typename T = TArg>
291       void TypedCheck(typename std::enable_if<std::is_same<Unit, T>::value>::type* = 0) {
292         assert(!using_blanks_ &&
293                "Blanks are not supported in Unit arguments; since a Unit has no parse-able value");
294       }
295 
TypedCheckCmdlineParserArgumentInfo296       void TypedCheck() {}
297 
298       bool is_completed_ = false;
299     };
300 
301     // A virtual-implementation of the necessary argument information in order to
302     // be able to parse arguments.
303     template <typename TArg>
304     struct CmdlineParseArgument : CmdlineParseArgumentAny {
CmdlineParseArgumentCmdlineParseArgument305       CmdlineParseArgument(CmdlineParserArgumentInfo<TArg>&& argument_info,
306                            std::function<void(TArg&)>&& save_argument,
307                            std::function<TArg&(void)>&& load_argument)
308           : argument_info_(std::forward<decltype(argument_info)>(argument_info)),
309             save_argument_(std::forward<decltype(save_argument)>(save_argument)),
310             load_argument_(std::forward<decltype(load_argument)>(load_argument)) {
311       }
312 
313       using UserTypeInfo = CmdlineType<TArg>;
314 
ParseArgumentCmdlineParseArgument315       virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) {
316         assert(arguments.Size() > 0);
317         assert(consumed_tokens != nullptr);
318 
319         auto closest_match_res = argument_info_.FindClosestMatch(arguments);
320         size_t best_match_size = closest_match_res.second;
321         const TokenRange* best_match_arg_def = closest_match_res.first;
322 
323         if (best_match_size > arguments.Size()) {
324           // The best match has more tokens than were provided.
325           // Shouldn't happen in practice since the outer parser does this check.
326           return CmdlineResult(CmdlineResult::kUnknown, "Size mismatch");
327         }
328 
329         assert(best_match_arg_def != nullptr);
330         *consumed_tokens = best_match_arg_def->Size();
331 
332         if (!argument_info_.using_blanks_) {
333           return ParseArgumentSingle(arguments.Join(' '));
334         }
335 
336         // Extract out the blank value from arguments
337         // e.g. for a def of "foo:_" and input "foo:bar", blank_value == "bar"
338         std::string blank_value = "";
339         size_t idx = 0;
340         for (auto&& def_token : *best_match_arg_def) {
341           auto&& arg_token = arguments[idx];
342 
343           // Does this definition-token have a wildcard in it?
344           if (def_token.find('_') == std::string::npos) {
345             // No, regular token. Match 1:1 against the argument token.
346             bool token_match = def_token == arg_token;
347 
348             if (!token_match) {
349               return CmdlineResult(CmdlineResult::kFailure,
350                                    std::string("Failed to parse ") + best_match_arg_def->GetToken(0)
351                                    + " at token " + std::to_string(idx));
352             }
353           } else {
354             // This is a wild-carded token.
355             TokenRange def_split_wildcards = TokenRange::Split(def_token, {'_'});
356 
357             // Extract the wildcard contents out of the user-provided arg_token.
358             std::unique_ptr<TokenRange> arg_matches =
359                 def_split_wildcards.MatchSubstrings(arg_token, "_");
360             if (arg_matches == nullptr) {
361               return CmdlineResult(CmdlineResult::kFailure,
362                                    std::string("Failed to parse ") + best_match_arg_def->GetToken(0)
363                                    + ", with a wildcard pattern " + def_token
364                                    + " at token " + std::to_string(idx));
365             }
366 
367             // Get the corresponding wildcard tokens from arg_matches,
368             // and concatenate it to blank_value.
369             for (size_t sub_idx = 0;
370                 sub_idx < def_split_wildcards.Size() && sub_idx < arg_matches->Size(); ++sub_idx) {
371               if (def_split_wildcards[sub_idx] == "_") {
372                 blank_value += arg_matches->GetToken(sub_idx);
373               }
374             }
375           }
376 
377           ++idx;
378         }
379 
380         return ParseArgumentSingle(blank_value);
381       }
382 
383      private:
ParseArgumentSingleCmdlineParseArgument384       virtual CmdlineResult ParseArgumentSingle(const std::string& argument) {
385         // TODO: refactor to use LookupValue for the value lists/maps
386 
387         // Handle the 'WithValueMap(...)' argument definition
388         if (argument_info_.has_value_map_) {
389           for (auto&& value_pair : argument_info_.value_map_) {
390             const char* name = value_pair.first;
391 
392             if (argument == name) {
393               return SaveArgument(value_pair.second);
394             }
395           }
396 
397           // Error case: Fail, telling the user what the allowed values were.
398           std::vector<std::string> allowed_values;
399           for (auto&& value_pair : argument_info_.value_map_) {
400             const char* name = value_pair.first;
401             allowed_values.push_back(name);
402           }
403 
404           std::string allowed_values_flat = android::base::Join(allowed_values, ',');
405           return CmdlineResult(CmdlineResult::kFailure,
406                                "Argument value '" + argument + "' does not match any of known valid"
407                                 "values: {" + allowed_values_flat + "}");
408         }
409 
410         // Handle the 'WithValues(...)' argument definition
411         if (argument_info_.has_value_list_) {
412           size_t arg_def_idx = 0;
413           for (auto&& value : argument_info_.value_list_) {
414             auto&& arg_def_token = argument_info_.names_[arg_def_idx];
415 
416             if (arg_def_token == argument) {
417               return SaveArgument(value);
418             }
419             ++arg_def_idx;
420           }
421 
422           assert(arg_def_idx + 1 == argument_info_.value_list_.size() &&
423                  "Number of named argument definitions must match number of values defined");
424 
425           // Error case: Fail, telling the user what the allowed values were.
426           std::vector<std::string> allowed_values;
427           for (auto&& arg_name : argument_info_.names_) {
428             allowed_values.push_back(arg_name);
429           }
430 
431           std::string allowed_values_flat = android::base::Join(allowed_values, ',');
432           return CmdlineResult(CmdlineResult::kFailure,
433                                "Argument value '" + argument + "' does not match any of known valid"
434                                 "values: {" + allowed_values_flat + "}");
435         }
436 
437         // Handle the regular case where we parsed an unknown value from a blank.
438         UserTypeInfo type_parser;
439 
440         if (argument_info_.appending_values_) {
441           TArg& existing = load_argument_();
442           CmdlineParseResult<TArg> result = type_parser.ParseAndAppend(argument, existing);
443 
444           assert(!argument_info_.has_range_);
445 
446           return result;
447         }
448 
449         CmdlineParseResult<TArg> result = type_parser.Parse(argument);
450 
451         if (result.IsSuccess()) {
452           TArg& value = result.GetValue();
453 
454           // Do a range check for 'WithRange(min,max)' argument definition.
455           if (!argument_info_.CheckRange(value)) {
456             return CmdlineParseResult<TArg>::OutOfRange(
457                 value, argument_info_.min_, argument_info_.max_);
458           }
459 
460           return SaveArgument(value);
461         }
462 
463         // Some kind of type-specific parse error. Pass the result as-is.
464         CmdlineResult raw_result = std::move(result);
465         return raw_result;
466       }
467 
468      public:
GetTypeNameCmdlineParseArgument469       virtual const char* GetTypeName() const {
470         // TODO: Obviate the need for each type specialization to hardcode the type name
471         return UserTypeInfo::Name();
472       }
473 
474       // How many tokens should be taken off argv for parsing this argument.
475       // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space).
476       //
477       // A [min,max] range is returned to represent argument definitions with multiple
478       // value tokens. (e.g. {"-h", "-h " } would return [1,2]).
GetNumTokensCmdlineParseArgument479       virtual std::pair<size_t, size_t> GetNumTokens() const {
480         return argument_info_.token_range_size_;
481       }
482 
483       // See if this token range might begin the same as the argument definition.
MaybeMatchesCmdlineParseArgument484       virtual size_t MaybeMatches(const TokenRange& tokens) {
485         return argument_info_.MaybeMatches(tokens);
486       }
487 
488      private:
SaveArgumentCmdlineParseArgument489       CmdlineResult SaveArgument(const TArg& value) {
490         assert(!argument_info_.appending_values_
491                && "If the values are being appended, then the updated parse value is "
492                    "updated by-ref as a side effect and shouldn't be stored directly");
493         TArg val = value;
494         save_argument_(val);
495         return CmdlineResult(CmdlineResult::kSuccess);
496       }
497 
498       CmdlineParserArgumentInfo<TArg> argument_info_;
499       std::function<void(TArg&)> save_argument_;
500       std::function<TArg&(void)> load_argument_;
501     };
502   }  // namespace detail  // NOLINT [readability/namespace] [5]
503 }  // namespace art
504 
505 #endif  // ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
506