1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_ 18 #define ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_ 19 20 #include <assert.h> 21 #include <algorithm> 22 #include <functional> 23 #include <memory> 24 #include <numeric> 25 #include <string_view> 26 #include <type_traits> 27 #include <vector> 28 29 #include "android-base/strings.h" 30 31 #include "base/indenter.h" 32 #include "cmdline_parse_result.h" 33 #include "cmdline_types.h" 34 #include "token_range.h" 35 #include "unit.h" 36 37 namespace art { 38 // Implementation details for the parser. Do not look inside if you hate templates. 39 namespace detail { 40 41 // A non-templated base class for argument parsers. Used by the general parser 42 // to parse arguments, without needing to know the argument type at compile time. 43 // 44 // This is an application of the type erasure idiom. 45 struct CmdlineParseArgumentAny { ~CmdlineParseArgumentAnyCmdlineParseArgumentAny46 virtual ~CmdlineParseArgumentAny() {} 47 48 // Attempt to parse this argument starting at arguments[position]. 49 // If the parsing succeeds, the parsed value will be saved as a side-effect. 50 // 51 // In most situations, the parsing will not match by returning kUnknown. In this case, 52 // no tokens were consumed and the position variable will not be updated. 53 // 54 // At other times, parsing may fail due to validation but the initial token was still matched 55 // (for example an out of range value, or passing in a string where an int was expected). 56 // In this case the tokens are still consumed, and the position variable will get incremented 57 // by all the consumed tokens. 58 // 59 // The # of tokens consumed by the parse attempt will be set as an out-parameter into 60 // consumed_tokens. The parser should skip this many tokens before parsing the next 61 // argument. 62 virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) = 0; 63 // How many tokens should be taken off argv for parsing this argument. 64 // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space). 65 // 66 // A [min,max] range is returned to represent argument definitions with multiple 67 // value tokens. (e.g. {"-h", "-h " } would return [1,2]). 68 virtual std::pair<size_t, size_t> GetNumTokens() const = 0; 69 // Get the run-time typename of the argument type. 70 virtual const char* GetTypeName() const = 0; 71 // Try to do a close match, returning how many tokens were matched against this argument 72 // definition. More tokens is better. 73 // 74 // Do a quick match token-by-token, and see if they match. 75 // Any tokens with a wildcard in them are only matched up until the wildcard. 76 // If this is true, then the wildcard matching later on can still fail, so this is not 77 // a guarantee that the argument is correct, it's more of a strong hint that the 78 // user-provided input *probably* was trying to match this argument. 79 // 80 // Returns how many tokens were either matched (or ignored because there was a 81 // wildcard present). 0 means no match. If the Size() tokens are returned. 82 virtual size_t MaybeMatches(const TokenRange& tokens) = 0; 83 84 virtual void DumpHelp(VariableIndentationOutputStream& os) = 0; 85 86 virtual const std::optional<const char*>& GetCategory() = 0; 87 }; 88 89 template <typename T> 90 using EnableIfNumeric = std::enable_if<std::is_arithmetic<T>::value>; 91 92 template <typename T> 93 using DisableIfNumeric = std::enable_if<!std::is_arithmetic<T>::value>; 94 95 // Argument definition information, created by an ArgumentBuilder and an UntypedArgumentBuilder. 96 template <typename TArg> 97 struct CmdlineParserArgumentInfo { 98 // This version will only be used if TArg is arithmetic and thus has the <= operators. 99 template <typename T = TArg> // Necessary to get SFINAE to kick in. 100 bool CheckRange(const TArg& value, typename EnableIfNumeric<T>::type* = nullptr) { 101 if (has_range_) { 102 return min_ <= value && value <= max_; 103 } 104 return true; 105 } 106 107 // This version will be used at other times when TArg is not arithmetic. 108 template <typename T = TArg> 109 bool CheckRange(const TArg&, typename DisableIfNumeric<T>::type* = nullptr) { 110 assert(!has_range_); 111 return true; 112 } 113 114 // Do a quick match token-by-token, and see if they match. 115 // Any tokens with a wildcard in them only match the prefix up until the wildcard. 116 // 117 // If this is true, then the wildcard matching later on can still fail, so this is not 118 // a guarantee that the argument is correct, it's more of a strong hint that the 119 // user-provided input *probably* was trying to match this argument. MaybeMatchesCmdlineParserArgumentInfo120 size_t MaybeMatches(const TokenRange& token_list) const { 121 auto best_match = FindClosestMatch(token_list); 122 123 return best_match.second; 124 } 125 126 // Attempt to find the closest match (see MaybeMatches). 127 // 128 // Returns the token range that was the closest match and the # of tokens that 129 // this range was matched up until. FindClosestMatchCmdlineParserArgumentInfo130 std::pair<const TokenRange*, size_t> FindClosestMatch(const TokenRange& token_list) const { 131 const TokenRange* best_match_ptr = nullptr; 132 133 size_t best_match = 0; 134 for (auto&& token_range : tokenized_names_) { 135 size_t this_match = token_range.MaybeMatches(token_list, std::string("_")); 136 137 if (this_match > best_match) { 138 best_match_ptr = &token_range; 139 best_match = this_match; 140 } 141 } 142 143 return std::make_pair(best_match_ptr, best_match); 144 } 145 146 template <typename T = TArg> // Necessary to get SFINAE to kick in. DumpHelpCmdlineParserArgumentInfo147 void DumpHelp(VariableIndentationOutputStream& vios) { 148 // Separate arguments 149 vios.Stream() << std::endl; 150 for (auto cname : names_) { 151 std::string_view name = cname; 152 auto& os = vios.Stream(); 153 std::function<void()> print_once; 154 if (using_blanks_) { 155 std::string_view nblank = name.substr(0, name.find("_")); 156 print_once = [&]() { 157 os << nblank; 158 if (has_value_map_) { 159 bool first = true; 160 for (auto [val, unused] : value_map_) { 161 os << (first ? "{" : "|") << val; 162 first = false; 163 } 164 os << "}"; 165 } else if (metavar_) { 166 os << metavar_.value(); 167 } else { 168 os << "{" << CmdlineType<T>::DescribeType() << "}"; 169 } 170 }; 171 } else { 172 print_once = [&]() { 173 os << name; 174 }; 175 } 176 print_once(); 177 if (appending_values_) { 178 os << " ["; 179 print_once(); 180 os << "...]"; 181 } 182 os << std::endl; 183 } 184 if (help_) { 185 ScopedIndentation si(&vios); 186 vios.Stream() << help_.value() << std::endl; 187 } 188 } 189 190 191 // Mark the argument definition as completed, do not mutate the object anymore after this 192 // call is done. 193 // 194 // Performs several checks of the validity and token calculations. CompleteArgumentCmdlineParserArgumentInfo195 void CompleteArgument() { 196 assert(names_.size() >= 1); 197 assert(!is_completed_); 198 199 is_completed_ = true; 200 201 size_t blank_count = 0; 202 size_t token_count = 0; 203 204 size_t global_blank_count = 0; 205 size_t global_token_count = 0; 206 for (auto&& name : names_) { 207 std::string s(name); 208 209 size_t local_blank_count = std::count(s.begin(), s.end(), '_'); 210 size_t local_token_count = std::count(s.begin(), s.end(), ' '); 211 212 if (global_blank_count != 0) { 213 assert(local_blank_count == global_blank_count 214 && "Every argument descriptor string must have same amount of blanks (_)"); 215 } 216 217 if (local_blank_count != 0) { 218 global_blank_count = local_blank_count; 219 blank_count++; 220 221 assert(local_blank_count == 1 && "More than one blank is not supported"); 222 assert(s.back() == '_' && "The blank character must only be at the end of the string"); 223 } 224 225 if (global_token_count != 0) { 226 assert(local_token_count == global_token_count 227 && "Every argument descriptor string must have same amount of tokens (spaces)"); 228 } 229 230 if (local_token_count != 0) { 231 global_token_count = local_token_count; 232 token_count++; 233 } 234 235 // Tokenize every name, turning it from a string to a token list. 236 tokenized_names_.clear(); 237 for (auto&& name1 : names_) { 238 // Split along ' ' only, removing any duplicated spaces. 239 tokenized_names_.push_back( 240 TokenRange::Split(name1, {' '}).RemoveToken(" ")); 241 } 242 243 // remove the _ character from each of the token ranges 244 // we will often end up with an empty token (i.e. ["-XX", "_"] -> ["-XX", ""] 245 // and this is OK because we still need an empty token to simplify 246 // range comparisons 247 simple_names_.clear(); 248 249 for (auto&& tokenized_name : tokenized_names_) { 250 simple_names_.push_back(tokenized_name.RemoveCharacter('_')); 251 } 252 } 253 254 if (token_count != 0) { 255 assert(("Every argument descriptor string must have equal amount of tokens (spaces)" && 256 token_count == names_.size())); 257 } 258 259 if (blank_count != 0) { 260 assert(("Every argument descriptor string must have an equal amount of blanks (_)" && 261 blank_count == names_.size())); 262 } 263 264 using_blanks_ = blank_count > 0; 265 { 266 size_t smallest_name_token_range_size = 267 std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), ~(0u), 268 [](size_t min, const TokenRange& cur) { 269 return std::min(min, cur.Size()); 270 }); 271 size_t largest_name_token_range_size = 272 std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), 0u, 273 [](size_t max, const TokenRange& cur) { 274 return std::max(max, cur.Size()); 275 }); 276 277 token_range_size_ = std::make_pair(smallest_name_token_range_size, 278 largest_name_token_range_size); 279 } 280 281 if (has_value_list_) { 282 assert(names_.size() == value_list_.size() 283 && "Number of arg descriptors must match number of values"); 284 assert(!has_value_map_); 285 } 286 if (has_value_map_) { 287 if (!using_blanks_) { 288 assert(names_.size() == value_map_.size() && 289 "Since no blanks were specified, each arg is mapped directly into a mapped " 290 "value without parsing; sizes must match"); 291 } 292 293 assert(!has_value_list_); 294 } 295 296 if (!using_blanks_ && !CmdlineType<TArg>::kCanParseBlankless) { 297 assert((has_value_map_ || has_value_list_) && 298 "Arguments without a blank (_) must provide either a value map or a value list"); 299 } 300 301 TypedCheck(); 302 } 303 304 // List of aliases for a single argument definition, e.g. {"-Xdex2oat", "-Xnodex2oat"}. 305 std::vector<const char*> names_; 306 // Is there at least 1 wildcard '_' in the argument definition? 307 bool using_blanks_ = false; 308 // [min, max] token counts in each arg def 309 std::pair<size_t, size_t> token_range_size_; 310 311 // contains all the names in a tokenized form, i.e. as a space-delimited list 312 std::vector<TokenRange> tokenized_names_; 313 314 // contains the tokenized names, but with the _ character stripped 315 std::vector<TokenRange> simple_names_; 316 317 // For argument definitions created with '.AppendValues()' 318 // Meaning that parsing should mutate the existing value in-place if possible. 319 bool appending_values_ = false; 320 321 // For argument definitions created with '.WithRange(min, max)' 322 bool has_range_ = false; 323 TArg min_; 324 TArg max_; 325 326 // For argument definitions created with '.WithValueMap' 327 bool has_value_map_ = false; 328 std::vector<std::pair<const char*, TArg>> value_map_; 329 330 // For argument definitions created with '.WithValues' 331 bool has_value_list_ = false; 332 std::vector<TArg> value_list_; 333 334 std::optional<const char*> help_; 335 std::optional<const char*> category_; 336 std::optional<const char*> metavar_; 337 338 // Make sure there's a default constructor. 339 CmdlineParserArgumentInfo() = default; 340 341 // Ensure there's a default move constructor. 342 CmdlineParserArgumentInfo(CmdlineParserArgumentInfo&&) = default; 343 344 private: 345 // Perform type-specific checks at runtime. 346 template <typename T = TArg> 347 void TypedCheck(typename std::enable_if<std::is_same<Unit, T>::value>::type* = 0) { 348 assert(!using_blanks_ && 349 "Blanks are not supported in Unit arguments; since a Unit has no parse-able value"); 350 } 351 TypedCheckCmdlineParserArgumentInfo352 void TypedCheck() {} 353 354 bool is_completed_ = false; 355 }; 356 357 // A virtual-implementation of the necessary argument information in order to 358 // be able to parse arguments. 359 template <typename TArg> 360 struct CmdlineParseArgument : CmdlineParseArgumentAny { CmdlineParseArgumentCmdlineParseArgument361 CmdlineParseArgument(CmdlineParserArgumentInfo<TArg>&& argument_info, 362 std::function<void(TArg&)>&& save_argument, 363 std::function<TArg&(void)>&& load_argument) 364 : argument_info_(std::forward<decltype(argument_info)>(argument_info)), 365 save_argument_(std::forward<decltype(save_argument)>(save_argument)), 366 load_argument_(std::forward<decltype(load_argument)>(load_argument)) { 367 } 368 369 using UserTypeInfo = CmdlineType<TArg>; 370 ParseArgumentCmdlineParseArgument371 virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) { 372 assert(arguments.Size() > 0); 373 assert(consumed_tokens != nullptr); 374 375 auto closest_match_res = argument_info_.FindClosestMatch(arguments); 376 size_t best_match_size = closest_match_res.second; 377 const TokenRange* best_match_arg_def = closest_match_res.first; 378 379 if (best_match_size > arguments.Size()) { 380 // The best match has more tokens than were provided. 381 // Shouldn't happen in practice since the outer parser does this check. 382 return CmdlineResult(CmdlineResult::kUnknown, "Size mismatch"); 383 } 384 385 assert(best_match_arg_def != nullptr); 386 *consumed_tokens = best_match_arg_def->Size(); 387 388 if (!argument_info_.using_blanks_) { 389 return ParseArgumentSingle(arguments.Join(' ')); 390 } 391 392 // Extract out the blank value from arguments 393 // e.g. for a def of "foo:_" and input "foo:bar", blank_value == "bar" 394 std::string blank_value = ""; 395 size_t idx = 0; 396 for (auto&& def_token : *best_match_arg_def) { 397 auto&& arg_token = arguments[idx]; 398 399 // Does this definition-token have a wildcard in it? 400 if (def_token.find('_') == std::string::npos) { 401 // No, regular token. Match 1:1 against the argument token. 402 bool token_match = def_token == arg_token; 403 404 if (!token_match) { 405 return CmdlineResult(CmdlineResult::kFailure, 406 std::string("Failed to parse ") + best_match_arg_def->GetToken(0) 407 + " at token " + std::to_string(idx)); 408 } 409 } else { 410 // This is a wild-carded token. 411 TokenRange def_split_wildcards = TokenRange::Split(def_token, {'_'}); 412 413 // Extract the wildcard contents out of the user-provided arg_token. 414 std::unique_ptr<TokenRange> arg_matches = 415 def_split_wildcards.MatchSubstrings(arg_token, "_"); 416 if (arg_matches == nullptr) { 417 return CmdlineResult(CmdlineResult::kFailure, 418 std::string("Failed to parse ") + best_match_arg_def->GetToken(0) 419 + ", with a wildcard pattern " + def_token 420 + " at token " + std::to_string(idx)); 421 } 422 423 // Get the corresponding wildcard tokens from arg_matches, 424 // and concatenate it to blank_value. 425 for (size_t sub_idx = 0; 426 sub_idx < def_split_wildcards.Size() && sub_idx < arg_matches->Size(); ++sub_idx) { 427 if (def_split_wildcards[sub_idx] == "_") { 428 blank_value += arg_matches->GetToken(sub_idx); 429 } 430 } 431 } 432 433 ++idx; 434 } 435 436 return ParseArgumentSingle(blank_value); 437 } 438 DumpHelpCmdlineParseArgument439 virtual void DumpHelp(VariableIndentationOutputStream& os) { 440 argument_info_.DumpHelp(os); 441 } 442 GetCategoryCmdlineParseArgument443 virtual const std::optional<const char*>& GetCategory() { 444 return argument_info_.category_; 445 } 446 447 private: ParseArgumentSingleCmdlineParseArgument448 virtual CmdlineResult ParseArgumentSingle(const std::string& argument) { 449 // TODO: refactor to use LookupValue for the value lists/maps 450 451 // Handle the 'WithValueMap(...)' argument definition 452 if (argument_info_.has_value_map_) { 453 for (auto&& value_pair : argument_info_.value_map_) { 454 const char* name = value_pair.first; 455 456 if (argument == name) { 457 return SaveArgument(value_pair.second); 458 } 459 } 460 461 // Error case: Fail, telling the user what the allowed values were. 462 std::vector<std::string> allowed_values; 463 for (auto&& value_pair : argument_info_.value_map_) { 464 const char* name = value_pair.first; 465 allowed_values.push_back(name); 466 } 467 468 std::string allowed_values_flat = android::base::Join(allowed_values, ','); 469 return CmdlineResult(CmdlineResult::kFailure, 470 "Argument value '" + argument + "' does not match any of known valid " 471 "values: {" + allowed_values_flat + "}"); 472 } 473 474 // Handle the 'WithValues(...)' argument definition 475 if (argument_info_.has_value_list_) { 476 size_t arg_def_idx = 0; 477 for (auto&& value : argument_info_.value_list_) { 478 auto&& arg_def_token = argument_info_.names_[arg_def_idx]; 479 480 if (arg_def_token == argument) { 481 return SaveArgument(value); 482 } 483 ++arg_def_idx; 484 } 485 486 assert(arg_def_idx + 1 == argument_info_.value_list_.size() && 487 "Number of named argument definitions must match number of values defined"); 488 489 // Error case: Fail, telling the user what the allowed values were. 490 std::vector<std::string> allowed_values; 491 for (auto&& arg_name : argument_info_.names_) { 492 allowed_values.push_back(arg_name); 493 } 494 495 std::string allowed_values_flat = android::base::Join(allowed_values, ','); 496 return CmdlineResult(CmdlineResult::kFailure, 497 "Argument value '" + argument + "' does not match any of known valid" 498 "values: {" + allowed_values_flat + "}"); 499 } 500 501 // Handle the regular case where we parsed an unknown value from a blank. 502 UserTypeInfo type_parser; 503 504 if (argument_info_.appending_values_) { 505 TArg& existing = load_argument_(); 506 CmdlineParseResult<TArg> result = type_parser.ParseAndAppend(argument, existing); 507 508 assert(!argument_info_.has_range_); 509 510 return std::move(result); 511 } 512 513 CmdlineParseResult<TArg> result = type_parser.Parse(argument); 514 515 if (result.IsSuccess()) { 516 TArg& value = result.GetValue(); 517 518 // Do a range check for 'WithRange(min,max)' argument definition. 519 if (!argument_info_.CheckRange(value)) { 520 return CmdlineParseResult<TArg>::OutOfRange( 521 value, argument_info_.min_, argument_info_.max_); 522 } 523 524 return SaveArgument(value); 525 } 526 527 // Some kind of type-specific parse error. Pass the result as-is. 528 CmdlineResult raw_result = std::move(result); 529 return raw_result; 530 } 531 532 public: GetTypeNameCmdlineParseArgument533 virtual const char* GetTypeName() const { 534 // TODO: Obviate the need for each type specialization to hardcode the type name 535 return UserTypeInfo::Name(); 536 } 537 538 // How many tokens should be taken off argv for parsing this argument. 539 // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space). 540 // 541 // A [min,max] range is returned to represent argument definitions with multiple 542 // value tokens. (e.g. {"-h", "-h " } would return [1,2]). GetNumTokensCmdlineParseArgument543 virtual std::pair<size_t, size_t> GetNumTokens() const { 544 return argument_info_.token_range_size_; 545 } 546 547 // See if this token range might begin the same as the argument definition. MaybeMatchesCmdlineParseArgument548 virtual size_t MaybeMatches(const TokenRange& tokens) { 549 return argument_info_.MaybeMatches(tokens); 550 } 551 552 private: SaveArgumentCmdlineParseArgument553 CmdlineResult SaveArgument(const TArg& value) { 554 assert(!argument_info_.appending_values_ 555 && "If the values are being appended, then the updated parse value is " 556 "updated by-ref as a side effect and shouldn't be stored directly"); 557 TArg val = value; 558 save_argument_(val); 559 return CmdlineResult(CmdlineResult::kSuccess); 560 } 561 562 CmdlineParserArgumentInfo<TArg> argument_info_; 563 std::function<void(TArg&)> save_argument_; 564 std::function<TArg&(void)> load_argument_; 565 }; 566 } // namespace detail // NOLINT [readability/namespace] [5] 567 } // namespace art 568 569 #endif // ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_ 570