1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
18 #define ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
19 
20 #include <assert.h>
21 #include <algorithm>
22 #include <functional>
23 #include <memory>
24 #include <numeric>
25 #include <string_view>
26 #include <type_traits>
27 #include <vector>
28 
29 #include "android-base/strings.h"
30 
31 #include "base/indenter.h"
32 #include "cmdline_parse_result.h"
33 #include "cmdline_types.h"
34 #include "token_range.h"
35 #include "unit.h"
36 
37 namespace art {
38 // Implementation details for the parser. Do not look inside if you hate templates.
39 namespace detail {
40 
41 // A non-templated base class for argument parsers. Used by the general parser
42 // to parse arguments, without needing to know the argument type at compile time.
43 //
44 // This is an application of the type erasure idiom.
45 struct CmdlineParseArgumentAny {
~CmdlineParseArgumentAnyCmdlineParseArgumentAny46   virtual ~CmdlineParseArgumentAny() {}
47 
48   // Attempt to parse this argument starting at arguments[position].
49   // If the parsing succeeds, the parsed value will be saved as a side-effect.
50   //
51   // In most situations, the parsing will not match by returning kUnknown. In this case,
52   // no tokens were consumed and the position variable will not be updated.
53   //
54   // At other times, parsing may fail due to validation but the initial token was still matched
55   // (for example an out of range value, or passing in a string where an int was expected).
56   // In this case the tokens are still consumed, and the position variable will get incremented
57   // by all the consumed tokens.
58   //
59   // The # of tokens consumed by the parse attempt will be set as an out-parameter into
60   // consumed_tokens. The parser should skip this many tokens before parsing the next
61   // argument.
62   virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) = 0;
63   // How many tokens should be taken off argv for parsing this argument.
64   // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space).
65   //
66   // A [min,max] range is returned to represent argument definitions with multiple
67   // value tokens. (e.g. {"-h", "-h " } would return [1,2]).
68   virtual std::pair<size_t, size_t> GetNumTokens() const = 0;
69   // Get the run-time typename of the argument type.
70   virtual const char* GetTypeName() const = 0;
71   // Try to do a close match, returning how many tokens were matched against this argument
72   // definition. More tokens is better.
73   //
74   // Do a quick match token-by-token, and see if they match.
75   // Any tokens with a wildcard in them are only matched up until the wildcard.
76   // If this is true, then the wildcard matching later on can still fail, so this is not
77   // a guarantee that the argument is correct, it's more of a strong hint that the
78   // user-provided input *probably* was trying to match this argument.
79   //
80   // Returns how many tokens were either matched (or ignored because there was a
81   // wildcard present). 0 means no match. If the Size() tokens are returned.
82   virtual size_t MaybeMatches(const TokenRange& tokens) = 0;
83 
84   virtual void DumpHelp(VariableIndentationOutputStream& os) = 0;
85 
86   virtual const std::optional<const char*>& GetCategory() = 0;
87 };
88 
89 template <typename T>
90 using EnableIfNumeric = std::enable_if<std::is_arithmetic<T>::value>;
91 
92 template <typename T>
93 using DisableIfNumeric = std::enable_if<!std::is_arithmetic<T>::value>;
94 
95 // Argument definition information, created by an ArgumentBuilder and an UntypedArgumentBuilder.
96 template <typename TArg>
97 struct CmdlineParserArgumentInfo {
98   // This version will only be used if TArg is arithmetic and thus has the <= operators.
99   template <typename T = TArg>  // Necessary to get SFINAE to kick in.
100   bool CheckRange(const TArg& value, typename EnableIfNumeric<T>::type* = nullptr) {
101     if (has_range_) {
102       return min_ <= value && value <= max_;
103     }
104     return true;
105   }
106 
107   // This version will be used at other times when TArg is not arithmetic.
108   template <typename T = TArg>
109   bool CheckRange(const TArg&, typename DisableIfNumeric<T>::type* = nullptr) {
110     assert(!has_range_);
111     return true;
112   }
113 
114   // Do a quick match token-by-token, and see if they match.
115   // Any tokens with a wildcard in them only match the prefix up until the wildcard.
116   //
117   // If this is true, then the wildcard matching later on can still fail, so this is not
118   // a guarantee that the argument is correct, it's more of a strong hint that the
119   // user-provided input *probably* was trying to match this argument.
MaybeMatchesCmdlineParserArgumentInfo120   size_t MaybeMatches(const TokenRange& token_list) const {
121     auto best_match = FindClosestMatch(token_list);
122 
123     return best_match.second;
124   }
125 
126   // Attempt to find the closest match (see MaybeMatches).
127   //
128   // Returns the token range that was the closest match and the # of tokens that
129   // this range was matched up until.
FindClosestMatchCmdlineParserArgumentInfo130   std::pair<const TokenRange*, size_t> FindClosestMatch(const TokenRange& token_list) const {
131     const TokenRange* best_match_ptr = nullptr;
132 
133     size_t best_match = 0;
134     for (auto&& token_range : tokenized_names_) {
135       size_t this_match = token_range.MaybeMatches(token_list, std::string("_"));
136 
137       if (this_match > best_match) {
138         best_match_ptr = &token_range;
139         best_match = this_match;
140       }
141     }
142 
143     return std::make_pair(best_match_ptr, best_match);
144   }
145 
146   template <typename T = TArg>  // Necessary to get SFINAE to kick in.
DumpHelpCmdlineParserArgumentInfo147   void DumpHelp(VariableIndentationOutputStream& vios) {
148     // Separate arguments
149     vios.Stream() << std::endl;
150     for (auto cname : names_) {
151       std::string_view name = cname;
152       auto& os = vios.Stream();
153       std::function<void()> print_once;
154       if (using_blanks_) {
155         std::string_view nblank = name.substr(0, name.find("_"));
156         print_once = [&]() {
157           os << nblank;
158           if (has_value_map_) {
159             bool first = true;
160             for (auto [val, unused] : value_map_) {
161               os << (first ? "{" : "|") << val;
162               first = false;
163             }
164             os << "}";
165           } else if (metavar_) {
166             os << metavar_.value();
167           } else {
168             os << "{" << CmdlineType<T>::DescribeType() << "}";
169           }
170         };
171       } else {
172         print_once = [&]() {
173           os << name;
174         };
175       }
176       print_once();
177       if (appending_values_) {
178         os << " [";
179         print_once();
180         os << "...]";
181       }
182       os << std::endl;
183     }
184     if (help_) {
185       ScopedIndentation si(&vios);
186       vios.Stream() << help_.value() << std::endl;
187     }
188   }
189 
190 
191   // Mark the argument definition as completed, do not mutate the object anymore after this
192   // call is done.
193   //
194   // Performs several checks of the validity and token calculations.
CompleteArgumentCmdlineParserArgumentInfo195   void CompleteArgument() {
196     assert(names_.size() >= 1);
197     assert(!is_completed_);
198 
199     is_completed_ = true;
200 
201     size_t blank_count = 0;
202     size_t token_count = 0;
203 
204     size_t global_blank_count = 0;
205     size_t global_token_count = 0;
206     for (auto&& name : names_) {
207       std::string s(name);
208 
209       size_t local_blank_count = std::count(s.begin(), s.end(), '_');
210       size_t local_token_count = std::count(s.begin(), s.end(), ' ');
211 
212       if (global_blank_count != 0) {
213         assert(local_blank_count == global_blank_count
214                && "Every argument descriptor string must have same amount of blanks (_)");
215       }
216 
217       if (local_blank_count != 0) {
218         global_blank_count = local_blank_count;
219         blank_count++;
220 
221         assert(local_blank_count == 1 && "More than one blank is not supported");
222         assert(s.back() == '_' && "The blank character must only be at the end of the string");
223       }
224 
225       if (global_token_count != 0) {
226         assert(local_token_count == global_token_count
227                && "Every argument descriptor string must have same amount of tokens (spaces)");
228       }
229 
230       if (local_token_count != 0) {
231         global_token_count = local_token_count;
232         token_count++;
233       }
234 
235       // Tokenize every name, turning it from a string to a token list.
236       tokenized_names_.clear();
237       for (auto&& name1 : names_) {
238         // Split along ' ' only, removing any duplicated spaces.
239         tokenized_names_.push_back(
240             TokenRange::Split(name1, {' '}).RemoveToken(" "));
241       }
242 
243       // remove the _ character from each of the token ranges
244       // we will often end up with an empty token (i.e. ["-XX", "_"] -> ["-XX", ""]
245       // and this is OK because we still need an empty token to simplify
246       // range comparisons
247       simple_names_.clear();
248 
249       for (auto&& tokenized_name : tokenized_names_) {
250         simple_names_.push_back(tokenized_name.RemoveCharacter('_'));
251       }
252     }
253 
254     if (token_count != 0) {
255       assert(("Every argument descriptor string must have equal amount of tokens (spaces)" &&
256           token_count == names_.size()));
257     }
258 
259     if (blank_count != 0) {
260       assert(("Every argument descriptor string must have an equal amount of blanks (_)" &&
261           blank_count == names_.size()));
262     }
263 
264     using_blanks_ = blank_count > 0;
265     {
266       size_t smallest_name_token_range_size =
267           std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), ~(0u),
268                           [](size_t min, const TokenRange& cur) {
269                             return std::min(min, cur.Size());
270                           });
271       size_t largest_name_token_range_size =
272           std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), 0u,
273                           [](size_t max, const TokenRange& cur) {
274                             return std::max(max, cur.Size());
275                           });
276 
277       token_range_size_ = std::make_pair(smallest_name_token_range_size,
278                                          largest_name_token_range_size);
279     }
280 
281     if (has_value_list_) {
282       assert(names_.size() == value_list_.size()
283              && "Number of arg descriptors must match number of values");
284       assert(!has_value_map_);
285     }
286     if (has_value_map_) {
287       if (!using_blanks_) {
288         assert(names_.size() == value_map_.size() &&
289                "Since no blanks were specified, each arg is mapped directly into a mapped "
290                "value without parsing; sizes must match");
291       }
292 
293       assert(!has_value_list_);
294     }
295 
296     if (!using_blanks_ && !CmdlineType<TArg>::kCanParseBlankless) {
297       assert((has_value_map_ || has_value_list_) &&
298              "Arguments without a blank (_) must provide either a value map or a value list");
299     }
300 
301     TypedCheck();
302   }
303 
304   // List of aliases for a single argument definition, e.g. {"-Xdex2oat", "-Xnodex2oat"}.
305   std::vector<const char*> names_;
306   // Is there at least 1 wildcard '_' in the argument definition?
307   bool using_blanks_ = false;
308   // [min, max] token counts in each arg def
309   std::pair<size_t, size_t> token_range_size_;
310 
311   // contains all the names in a tokenized form, i.e. as a space-delimited list
312   std::vector<TokenRange> tokenized_names_;
313 
314   // contains the tokenized names, but with the _ character stripped
315   std::vector<TokenRange> simple_names_;
316 
317   // For argument definitions created with '.AppendValues()'
318   // Meaning that parsing should mutate the existing value in-place if possible.
319   bool appending_values_ = false;
320 
321   // For argument definitions created with '.WithRange(min, max)'
322   bool has_range_ = false;
323   TArg min_;
324   TArg max_;
325 
326   // For argument definitions created with '.WithValueMap'
327   bool has_value_map_ = false;
328   std::vector<std::pair<const char*, TArg>> value_map_;
329 
330   // For argument definitions created with '.WithValues'
331   bool has_value_list_ = false;
332   std::vector<TArg> value_list_;
333 
334   std::optional<const char*> help_;
335   std::optional<const char*> category_;
336   std::optional<const char*> metavar_;
337 
338   // Make sure there's a default constructor.
339   CmdlineParserArgumentInfo() = default;
340 
341   // Ensure there's a default move constructor.
342   CmdlineParserArgumentInfo(CmdlineParserArgumentInfo&&) = default;
343 
344  private:
345   // Perform type-specific checks at runtime.
346   template <typename T = TArg>
347   void TypedCheck(typename std::enable_if<std::is_same<Unit, T>::value>::type* = 0) {
348     assert(!using_blanks_ &&
349            "Blanks are not supported in Unit arguments; since a Unit has no parse-able value");
350   }
351 
TypedCheckCmdlineParserArgumentInfo352   void TypedCheck() {}
353 
354   bool is_completed_ = false;
355 };
356 
357 // A virtual-implementation of the necessary argument information in order to
358 // be able to parse arguments.
359 template <typename TArg>
360 struct CmdlineParseArgument : CmdlineParseArgumentAny {
CmdlineParseArgumentCmdlineParseArgument361   CmdlineParseArgument(CmdlineParserArgumentInfo<TArg>&& argument_info,
362                        std::function<void(TArg&)>&& save_argument,
363                        std::function<TArg&(void)>&& load_argument)
364       : argument_info_(std::forward<decltype(argument_info)>(argument_info)),
365         save_argument_(std::forward<decltype(save_argument)>(save_argument)),
366         load_argument_(std::forward<decltype(load_argument)>(load_argument)) {
367   }
368 
369   using UserTypeInfo = CmdlineType<TArg>;
370 
ParseArgumentCmdlineParseArgument371   virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) {
372     assert(arguments.Size() > 0);
373     assert(consumed_tokens != nullptr);
374 
375     auto closest_match_res = argument_info_.FindClosestMatch(arguments);
376     size_t best_match_size = closest_match_res.second;
377     const TokenRange* best_match_arg_def = closest_match_res.first;
378 
379     if (best_match_size > arguments.Size()) {
380       // The best match has more tokens than were provided.
381       // Shouldn't happen in practice since the outer parser does this check.
382       return CmdlineResult(CmdlineResult::kUnknown, "Size mismatch");
383     }
384 
385     assert(best_match_arg_def != nullptr);
386     *consumed_tokens = best_match_arg_def->Size();
387 
388     if (!argument_info_.using_blanks_) {
389       return ParseArgumentSingle(arguments.Join(' '));
390     }
391 
392     // Extract out the blank value from arguments
393     // e.g. for a def of "foo:_" and input "foo:bar", blank_value == "bar"
394     std::string blank_value = "";
395     size_t idx = 0;
396     for (auto&& def_token : *best_match_arg_def) {
397       auto&& arg_token = arguments[idx];
398 
399       // Does this definition-token have a wildcard in it?
400       if (def_token.find('_') == std::string::npos) {
401         // No, regular token. Match 1:1 against the argument token.
402         bool token_match = def_token == arg_token;
403 
404         if (!token_match) {
405           return CmdlineResult(CmdlineResult::kFailure,
406                                std::string("Failed to parse ") + best_match_arg_def->GetToken(0)
407                                + " at token " + std::to_string(idx));
408         }
409       } else {
410         // This is a wild-carded token.
411         TokenRange def_split_wildcards = TokenRange::Split(def_token, {'_'});
412 
413         // Extract the wildcard contents out of the user-provided arg_token.
414         std::unique_ptr<TokenRange> arg_matches =
415             def_split_wildcards.MatchSubstrings(arg_token, "_");
416         if (arg_matches == nullptr) {
417           return CmdlineResult(CmdlineResult::kFailure,
418                                std::string("Failed to parse ") + best_match_arg_def->GetToken(0)
419                                + ", with a wildcard pattern " + def_token
420                                + " at token " + std::to_string(idx));
421         }
422 
423         // Get the corresponding wildcard tokens from arg_matches,
424         // and concatenate it to blank_value.
425         for (size_t sub_idx = 0;
426             sub_idx < def_split_wildcards.Size() && sub_idx < arg_matches->Size(); ++sub_idx) {
427           if (def_split_wildcards[sub_idx] == "_") {
428             blank_value += arg_matches->GetToken(sub_idx);
429           }
430         }
431       }
432 
433       ++idx;
434     }
435 
436     return ParseArgumentSingle(blank_value);
437   }
438 
DumpHelpCmdlineParseArgument439   virtual void DumpHelp(VariableIndentationOutputStream& os) {
440     argument_info_.DumpHelp(os);
441   }
442 
GetCategoryCmdlineParseArgument443   virtual const std::optional<const char*>& GetCategory() {
444     return argument_info_.category_;
445   }
446 
447  private:
ParseArgumentSingleCmdlineParseArgument448   virtual CmdlineResult ParseArgumentSingle(const std::string& argument) {
449     // TODO: refactor to use LookupValue for the value lists/maps
450 
451     // Handle the 'WithValueMap(...)' argument definition
452     if (argument_info_.has_value_map_) {
453       for (auto&& value_pair : argument_info_.value_map_) {
454         const char* name = value_pair.first;
455 
456         if (argument == name) {
457           return SaveArgument(value_pair.second);
458         }
459       }
460 
461       // Error case: Fail, telling the user what the allowed values were.
462       std::vector<std::string> allowed_values;
463       for (auto&& value_pair : argument_info_.value_map_) {
464         const char* name = value_pair.first;
465         allowed_values.push_back(name);
466       }
467 
468       std::string allowed_values_flat = android::base::Join(allowed_values, ',');
469       return CmdlineResult(CmdlineResult::kFailure,
470                            "Argument value '" + argument + "' does not match any of known valid "
471                             "values: {" + allowed_values_flat + "}");
472     }
473 
474     // Handle the 'WithValues(...)' argument definition
475     if (argument_info_.has_value_list_) {
476       size_t arg_def_idx = 0;
477       for (auto&& value : argument_info_.value_list_) {
478         auto&& arg_def_token = argument_info_.names_[arg_def_idx];
479 
480         if (arg_def_token == argument) {
481           return SaveArgument(value);
482         }
483         ++arg_def_idx;
484       }
485 
486       assert(arg_def_idx + 1 == argument_info_.value_list_.size() &&
487              "Number of named argument definitions must match number of values defined");
488 
489       // Error case: Fail, telling the user what the allowed values were.
490       std::vector<std::string> allowed_values;
491       for (auto&& arg_name : argument_info_.names_) {
492         allowed_values.push_back(arg_name);
493       }
494 
495       std::string allowed_values_flat = android::base::Join(allowed_values, ',');
496       return CmdlineResult(CmdlineResult::kFailure,
497                            "Argument value '" + argument + "' does not match any of known valid"
498                             "values: {" + allowed_values_flat + "}");
499     }
500 
501     // Handle the regular case where we parsed an unknown value from a blank.
502     UserTypeInfo type_parser;
503 
504     if (argument_info_.appending_values_) {
505       TArg& existing = load_argument_();
506       CmdlineParseResult<TArg> result = type_parser.ParseAndAppend(argument, existing);
507 
508       assert(!argument_info_.has_range_);
509 
510       return std::move(result);
511     }
512 
513     CmdlineParseResult<TArg> result = type_parser.Parse(argument);
514 
515     if (result.IsSuccess()) {
516       TArg& value = result.GetValue();
517 
518       // Do a range check for 'WithRange(min,max)' argument definition.
519       if (!argument_info_.CheckRange(value)) {
520         return CmdlineParseResult<TArg>::OutOfRange(
521             value, argument_info_.min_, argument_info_.max_);
522       }
523 
524       return SaveArgument(value);
525     }
526 
527     // Some kind of type-specific parse error. Pass the result as-is.
528     CmdlineResult raw_result = std::move(result);
529     return raw_result;
530   }
531 
532  public:
GetTypeNameCmdlineParseArgument533   virtual const char* GetTypeName() const {
534     // TODO: Obviate the need for each type specialization to hardcode the type name
535     return UserTypeInfo::Name();
536   }
537 
538   // How many tokens should be taken off argv for parsing this argument.
539   // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space).
540   //
541   // A [min,max] range is returned to represent argument definitions with multiple
542   // value tokens. (e.g. {"-h", "-h " } would return [1,2]).
GetNumTokensCmdlineParseArgument543   virtual std::pair<size_t, size_t> GetNumTokens() const {
544     return argument_info_.token_range_size_;
545   }
546 
547   // See if this token range might begin the same as the argument definition.
MaybeMatchesCmdlineParseArgument548   virtual size_t MaybeMatches(const TokenRange& tokens) {
549     return argument_info_.MaybeMatches(tokens);
550   }
551 
552  private:
SaveArgumentCmdlineParseArgument553   CmdlineResult SaveArgument(const TArg& value) {
554     assert(!argument_info_.appending_values_
555            && "If the values are being appended, then the updated parse value is "
556                "updated by-ref as a side effect and shouldn't be stored directly");
557     TArg val = value;
558     save_argument_(val);
559     return CmdlineResult(CmdlineResult::kSuccess);
560   }
561 
562   CmdlineParserArgumentInfo<TArg> argument_info_;
563   std::function<void(TArg&)> save_argument_;
564   std::function<TArg&(void)> load_argument_;
565 };
566 }  // namespace detail  // NOLINT [readability/namespace] [5]
567 }  // namespace art
568 
569 #endif  // ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_
570