1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_ 18 #define ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_ 19 20 #include <type_traits> 21 #include <assert.h> 22 #include <functional> 23 #include <vector> 24 #include <algorithm> 25 #include <numeric> 26 #include <memory> 27 28 #include "cmdline/cmdline_parse_result.h" 29 #include "cmdline/token_range.h" 30 #include "cmdline/unit.h" 31 #include "cmdline/cmdline_types.h" 32 33 namespace art { 34 // Implementation details for the parser. Do not look inside if you hate templates. 35 namespace detail { 36 // A non-templated base class for argument parsers. Used by the general parser 37 // to parse arguments, without needing to know the argument type at compile time. 38 // 39 // This is an application of the type erasure idiom. 40 struct CmdlineParseArgumentAny { ~CmdlineParseArgumentAnyCmdlineParseArgumentAny41 virtual ~CmdlineParseArgumentAny() {} 42 43 // Attempt to parse this argument starting at arguments[position]. 44 // If the parsing succeeds, the parsed value will be saved as a side-effect. 45 // 46 // In most situations, the parsing will not match by returning kUnknown. In this case, 47 // no tokens were consumed and the position variable will not be updated. 48 // 49 // At other times, parsing may fail due to validation but the initial token was still matched 50 // (for example an out of range value, or passing in a string where an int was expected). 51 // In this case the tokens are still consumed, and the position variable will get incremented 52 // by all the consumed tokens. 53 // 54 // The # of tokens consumed by the parse attempt will be set as an out-parameter into 55 // consumed_tokens. The parser should skip this many tokens before parsing the next 56 // argument. 57 virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) = 0; 58 // How many tokens should be taken off argv for parsing this argument. 59 // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space). 60 // 61 // A [min,max] range is returned to represent argument definitions with multiple 62 // value tokens. (e.g. {"-h", "-h " } would return [1,2]). 63 virtual std::pair<size_t, size_t> GetNumTokens() const = 0; 64 // Get the run-time typename of the argument type. 65 virtual const char* GetTypeName() const = 0; 66 // Try to do a close match, returning how many tokens were matched against this argument 67 // definition. More tokens is better. 68 // 69 // Do a quick match token-by-token, and see if they match. 70 // Any tokens with a wildcard in them are only matched up until the wildcard. 71 // If this is true, then the wildcard matching later on can still fail, so this is not 72 // a guarantee that the argument is correct, it's more of a strong hint that the 73 // user-provided input *probably* was trying to match this argument. 74 // 75 // Returns how many tokens were either matched (or ignored because there was a 76 // wildcard present). 0 means no match. If the Size() tokens are returned. 77 virtual size_t MaybeMatches(const TokenRange& tokens) = 0; 78 }; 79 80 template <typename T> 81 using EnableIfNumeric = std::enable_if<std::is_arithmetic<T>::value>; 82 83 template <typename T> 84 using DisableIfNumeric = std::enable_if<!std::is_arithmetic<T>::value>; 85 86 // Argument definition information, created by an ArgumentBuilder and an UntypedArgumentBuilder. 87 template <typename TArg> 88 struct CmdlineParserArgumentInfo { 89 // This version will only be used if TArg is arithmetic and thus has the <= operators. 90 template <typename T = TArg> // Necessary to get SFINAE to kick in. 91 bool CheckRange(const TArg& value, typename EnableIfNumeric<T>::type* = 0) { 92 if (has_range_) { 93 return min_ <= value && value <= max_; 94 } 95 return true; 96 } 97 98 // This version will be used at other times when TArg is not arithmetic. 99 template <typename T = TArg> 100 bool CheckRange(const TArg&, typename DisableIfNumeric<T>::type* = 0) { 101 assert(!has_range_); 102 return true; 103 } 104 105 // Do a quick match token-by-token, and see if they match. 106 // Any tokens with a wildcard in them only match the prefix up until the wildcard. 107 // 108 // If this is true, then the wildcard matching later on can still fail, so this is not 109 // a guarantee that the argument is correct, it's more of a strong hint that the 110 // user-provided input *probably* was trying to match this argument. MaybeMatchesCmdlineParserArgumentInfo111 size_t MaybeMatches(TokenRange token_list) const { 112 auto best_match = FindClosestMatch(token_list); 113 114 return best_match.second; 115 } 116 117 // Attempt to find the closest match (see MaybeMatches). 118 // 119 // Returns the token range that was the closest match and the # of tokens that 120 // this range was matched up until. FindClosestMatchCmdlineParserArgumentInfo121 std::pair<const TokenRange*, size_t> FindClosestMatch(TokenRange token_list) const { 122 const TokenRange* best_match_ptr = nullptr; 123 124 size_t best_match = 0; 125 for (auto&& token_range : tokenized_names_) { 126 size_t this_match = token_range.MaybeMatches(token_list, std::string("_")); 127 128 if (this_match > best_match) { 129 best_match_ptr = &token_range; 130 best_match = this_match; 131 } 132 } 133 134 return std::make_pair(best_match_ptr, best_match); 135 } 136 137 // Mark the argument definition as completed, do not mutate the object anymore after this 138 // call is done. 139 // 140 // Performs several sanity checks and token calculations. CompleteArgumentCmdlineParserArgumentInfo141 void CompleteArgument() { 142 assert(names_.size() >= 1); 143 assert(!is_completed_); 144 145 is_completed_ = true; 146 147 size_t blank_count = 0; 148 size_t token_count = 0; 149 150 size_t global_blank_count = 0; 151 size_t global_token_count = 0; 152 for (auto&& name : names_) { 153 std::string s(name); 154 155 size_t local_blank_count = std::count(s.begin(), s.end(), '_'); 156 size_t local_token_count = std::count(s.begin(), s.end(), ' '); 157 158 if (global_blank_count != 0) { 159 assert(local_blank_count == global_blank_count 160 && "Every argument descriptor string must have same amount of blanks (_)"); 161 } 162 163 if (local_blank_count != 0) { 164 global_blank_count = local_blank_count; 165 blank_count++; 166 167 assert(local_blank_count == 1 && "More than one blank is not supported"); 168 assert(s.back() == '_' && "The blank character must only be at the end of the string"); 169 } 170 171 if (global_token_count != 0) { 172 assert(local_token_count == global_token_count 173 && "Every argument descriptor string must have same amount of tokens (spaces)"); 174 } 175 176 if (local_token_count != 0) { 177 global_token_count = local_token_count; 178 token_count++; 179 } 180 181 // Tokenize every name, turning it from a string to a token list. 182 tokenized_names_.clear(); 183 for (auto&& name1 : names_) { 184 // Split along ' ' only, removing any duplicated spaces. 185 tokenized_names_.push_back( 186 TokenRange::Split(name1, {' '}).RemoveToken(" ")); 187 } 188 189 // remove the _ character from each of the token ranges 190 // we will often end up with an empty token (i.e. ["-XX", "_"] -> ["-XX", ""] 191 // and this is OK because we still need an empty token to simplify 192 // range comparisons 193 simple_names_.clear(); 194 195 for (auto&& tokenized_name : tokenized_names_) { 196 simple_names_.push_back(tokenized_name.RemoveCharacter('_')); 197 } 198 } 199 200 if (token_count != 0) { 201 assert(("Every argument descriptor string must have equal amount of tokens (spaces)" && 202 token_count == names_.size())); 203 } 204 205 if (blank_count != 0) { 206 assert(("Every argument descriptor string must have an equal amount of blanks (_)" && 207 blank_count == names_.size())); 208 } 209 210 using_blanks_ = blank_count > 0; 211 { 212 size_t smallest_name_token_range_size = 213 std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), ~(0u), 214 [](size_t min, const TokenRange& cur) { 215 return std::min(min, cur.Size()); 216 }); 217 size_t largest_name_token_range_size = 218 std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), 0u, 219 [](size_t max, const TokenRange& cur) { 220 return std::max(max, cur.Size()); 221 }); 222 223 token_range_size_ = std::make_pair(smallest_name_token_range_size, 224 largest_name_token_range_size); 225 } 226 227 if (has_value_list_) { 228 assert(names_.size() == value_list_.size() 229 && "Number of arg descriptors must match number of values"); 230 assert(!has_value_map_); 231 } 232 if (has_value_map_) { 233 if (!using_blanks_) { 234 assert(names_.size() == value_map_.size() && 235 "Since no blanks were specified, each arg is mapped directly into a mapped " 236 "value without parsing; sizes must match"); 237 } 238 239 assert(!has_value_list_); 240 } 241 242 if (!using_blanks_ && !CmdlineType<TArg>::kCanParseBlankless) { 243 assert((has_value_map_ || has_value_list_) && 244 "Arguments without a blank (_) must provide either a value map or a value list"); 245 } 246 247 TypedCheck(); 248 } 249 250 // List of aliases for a single argument definition, e.g. {"-Xdex2oat", "-Xnodex2oat"}. 251 std::vector<const char*> names_; 252 // Is there at least 1 wildcard '_' in the argument definition? 253 bool using_blanks_ = false; 254 // [min, max] token counts in each arg def 255 std::pair<size_t, size_t> token_range_size_; 256 257 // contains all the names in a tokenized form, i.e. as a space-delimited list 258 std::vector<TokenRange> tokenized_names_; 259 260 // contains the tokenized names, but with the _ character stripped 261 std::vector<TokenRange> simple_names_; 262 263 // For argument definitions created with '.AppendValues()' 264 // Meaning that parsing should mutate the existing value in-place if possible. 265 bool appending_values_ = false; 266 267 // For argument definitions created with '.WithRange(min, max)' 268 bool has_range_ = false; 269 TArg min_; 270 TArg max_; 271 272 // For argument definitions created with '.WithValueMap' 273 bool has_value_map_ = false; 274 std::vector<std::pair<const char*, TArg>> value_map_; 275 276 // For argument definitions created with '.WithValues' 277 bool has_value_list_ = false; 278 std::vector<TArg> value_list_; 279 280 // Make sure there's a default constructor. 281 CmdlineParserArgumentInfo() = default; 282 283 // Ensure there's a default move constructor. 284 CmdlineParserArgumentInfo(CmdlineParserArgumentInfo&&) = default; 285 286 private: 287 // Perform type-specific checks at runtime. 288 template <typename T = TArg> 289 void TypedCheck(typename std::enable_if<std::is_same<Unit, T>::value>::type* = 0) { 290 assert(!using_blanks_ && 291 "Blanks are not supported in Unit arguments; since a Unit has no parse-able value"); 292 } 293 TypedCheckCmdlineParserArgumentInfo294 void TypedCheck() {} 295 296 bool is_completed_ = false; 297 }; 298 299 // A virtual-implementation of the necessary argument information in order to 300 // be able to parse arguments. 301 template <typename TArg> 302 struct CmdlineParseArgument : CmdlineParseArgumentAny { CmdlineParseArgumentCmdlineParseArgument303 explicit CmdlineParseArgument(CmdlineParserArgumentInfo<TArg>&& argument_info, 304 std::function<void(TArg&)>&& save_argument, 305 std::function<TArg&(void)>&& load_argument) 306 : argument_info_(std::forward<decltype(argument_info)>(argument_info)), 307 save_argument_(std::forward<decltype(save_argument)>(save_argument)), 308 load_argument_(std::forward<decltype(load_argument)>(load_argument)) { 309 } 310 311 using UserTypeInfo = CmdlineType<TArg>; 312 ParseArgumentCmdlineParseArgument313 virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) { 314 assert(arguments.Size() > 0); 315 assert(consumed_tokens != nullptr); 316 317 auto closest_match_res = argument_info_.FindClosestMatch(arguments); 318 size_t best_match_size = closest_match_res.second; 319 const TokenRange* best_match_arg_def = closest_match_res.first; 320 321 if (best_match_size > arguments.Size()) { 322 // The best match has more tokens than were provided. 323 // Shouldn't happen in practice since the outer parser does this check. 324 return CmdlineResult(CmdlineResult::kUnknown, "Size mismatch"); 325 } 326 327 assert(best_match_arg_def != nullptr); 328 *consumed_tokens = best_match_arg_def->Size(); 329 330 if (!argument_info_.using_blanks_) { 331 return ParseArgumentSingle(arguments.Join(' ')); 332 } 333 334 // Extract out the blank value from arguments 335 // e.g. for a def of "foo:_" and input "foo:bar", blank_value == "bar" 336 std::string blank_value = ""; 337 size_t idx = 0; 338 for (auto&& def_token : *best_match_arg_def) { 339 auto&& arg_token = arguments[idx]; 340 341 // Does this definition-token have a wildcard in it? 342 if (def_token.find('_') == std::string::npos) { 343 // No, regular token. Match 1:1 against the argument token. 344 bool token_match = def_token == arg_token; 345 346 if (!token_match) { 347 return CmdlineResult(CmdlineResult::kFailure, 348 std::string("Failed to parse ") + best_match_arg_def->GetToken(0) 349 + " at token " + std::to_string(idx)); 350 } 351 } else { 352 // This is a wild-carded token. 353 TokenRange def_split_wildcards = TokenRange::Split(def_token, {'_'}); 354 355 // Extract the wildcard contents out of the user-provided arg_token. 356 std::unique_ptr<TokenRange> arg_matches = 357 def_split_wildcards.MatchSubstrings(arg_token, "_"); 358 if (arg_matches == nullptr) { 359 return CmdlineResult(CmdlineResult::kFailure, 360 std::string("Failed to parse ") + best_match_arg_def->GetToken(0) 361 + ", with a wildcard pattern " + def_token 362 + " at token " + std::to_string(idx)); 363 } 364 365 // Get the corresponding wildcard tokens from arg_matches, 366 // and concatenate it to blank_value. 367 for (size_t sub_idx = 0; 368 sub_idx < def_split_wildcards.Size() && sub_idx < arg_matches->Size(); ++sub_idx) { 369 if (def_split_wildcards[sub_idx] == "_") { 370 blank_value += arg_matches->GetToken(sub_idx); 371 } 372 } 373 } 374 375 ++idx; 376 } 377 378 return ParseArgumentSingle(blank_value); 379 } 380 381 private: ParseArgumentSingleCmdlineParseArgument382 virtual CmdlineResult ParseArgumentSingle(const std::string& argument) { 383 // TODO: refactor to use LookupValue for the value lists/maps 384 385 // Handle the 'WithValueMap(...)' argument definition 386 if (argument_info_.has_value_map_) { 387 for (auto&& value_pair : argument_info_.value_map_) { 388 const char* name = value_pair.first; 389 390 if (argument == name) { 391 return SaveArgument(value_pair.second); 392 } 393 } 394 395 // Error case: Fail, telling the user what the allowed values were. 396 std::vector<std::string> allowed_values; 397 for (auto&& value_pair : argument_info_.value_map_) { 398 const char* name = value_pair.first; 399 allowed_values.push_back(name); 400 } 401 402 std::string allowed_values_flat = Join(allowed_values, ','); 403 return CmdlineResult(CmdlineResult::kFailure, 404 "Argument value '" + argument + "' does not match any of known valid" 405 "values: {" + allowed_values_flat + "}"); 406 } 407 408 // Handle the 'WithValues(...)' argument definition 409 if (argument_info_.has_value_list_) { 410 size_t arg_def_idx = 0; 411 for (auto&& value : argument_info_.value_list_) { 412 auto&& arg_def_token = argument_info_.names_[arg_def_idx]; 413 414 if (arg_def_token == argument) { 415 return SaveArgument(value); 416 } 417 ++arg_def_idx; 418 } 419 420 assert(arg_def_idx + 1 == argument_info_.value_list_.size() && 421 "Number of named argument definitions must match number of values defined"); 422 423 // Error case: Fail, telling the user what the allowed values were. 424 std::vector<std::string> allowed_values; 425 for (auto&& arg_name : argument_info_.names_) { 426 allowed_values.push_back(arg_name); 427 } 428 429 std::string allowed_values_flat = Join(allowed_values, ','); 430 return CmdlineResult(CmdlineResult::kFailure, 431 "Argument value '" + argument + "' does not match any of known valid" 432 "values: {" + allowed_values_flat + "}"); 433 } 434 435 // Handle the regular case where we parsed an unknown value from a blank. 436 UserTypeInfo type_parser; 437 438 if (argument_info_.appending_values_) { 439 TArg& existing = load_argument_(); 440 CmdlineParseResult<TArg> result = type_parser.ParseAndAppend(argument, existing); 441 442 assert(!argument_info_.has_range_); 443 444 return result; 445 } 446 447 CmdlineParseResult<TArg> result = type_parser.Parse(argument); 448 449 if (result.IsSuccess()) { 450 TArg& value = result.GetValue(); 451 452 // Do a range check for 'WithRange(min,max)' argument definition. 453 if (!argument_info_.CheckRange(value)) { 454 return CmdlineParseResult<TArg>::OutOfRange( 455 value, argument_info_.min_, argument_info_.max_); 456 } 457 458 return SaveArgument(value); 459 } 460 461 // Some kind of type-specific parse error. Pass the result as-is. 462 CmdlineResult raw_result = std::move(result); 463 return raw_result; 464 } 465 466 public: GetTypeNameCmdlineParseArgument467 virtual const char* GetTypeName() const { 468 // TODO: Obviate the need for each type specialization to hardcode the type name 469 return UserTypeInfo::Name(); 470 } 471 472 // How many tokens should be taken off argv for parsing this argument. 473 // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space). 474 // 475 // A [min,max] range is returned to represent argument definitions with multiple 476 // value tokens. (e.g. {"-h", "-h " } would return [1,2]). GetNumTokensCmdlineParseArgument477 virtual std::pair<size_t, size_t> GetNumTokens() const { 478 return argument_info_.token_range_size_; 479 } 480 481 // See if this token range might begin the same as the argument definition. MaybeMatchesCmdlineParseArgument482 virtual size_t MaybeMatches(const TokenRange& tokens) { 483 return argument_info_.MaybeMatches(tokens); 484 } 485 486 private: SaveArgumentCmdlineParseArgument487 CmdlineResult SaveArgument(const TArg& value) { 488 assert(!argument_info_.appending_values_ 489 && "If the values are being appended, then the updated parse value is " 490 "updated by-ref as a side effect and shouldn't be stored directly"); 491 TArg val = value; 492 save_argument_(val); 493 return CmdlineResult(CmdlineResult::kSuccess); 494 } 495 496 CmdlineParserArgumentInfo<TArg> argument_info_; 497 std::function<void(TArg&)> save_argument_; 498 std::function<TArg&(void)> load_argument_; 499 }; 500 } // namespace detail // NOLINT [readability/namespace] [5] [whitespace/comments] [2] 501 } // namespace art 502 503 #endif // ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_ 504