1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_ 18 #define ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_ 19 20 #include <type_traits> 21 #include <assert.h> 22 #include <functional> 23 #include <vector> 24 #include <algorithm> 25 #include <numeric> 26 #include <memory> 27 28 #include "android-base/strings.h" 29 30 #include "cmdline_parse_result.h" 31 #include "cmdline_types.h" 32 #include "token_range.h" 33 #include "unit.h" 34 35 namespace art { 36 // Implementation details for the parser. Do not look inside if you hate templates. 37 namespace detail { 38 // A non-templated base class for argument parsers. Used by the general parser 39 // to parse arguments, without needing to know the argument type at compile time. 40 // 41 // This is an application of the type erasure idiom. 42 struct CmdlineParseArgumentAny { ~CmdlineParseArgumentAnyCmdlineParseArgumentAny43 virtual ~CmdlineParseArgumentAny() {} 44 45 // Attempt to parse this argument starting at arguments[position]. 46 // If the parsing succeeds, the parsed value will be saved as a side-effect. 47 // 48 // In most situations, the parsing will not match by returning kUnknown. In this case, 49 // no tokens were consumed and the position variable will not be updated. 50 // 51 // At other times, parsing may fail due to validation but the initial token was still matched 52 // (for example an out of range value, or passing in a string where an int was expected). 53 // In this case the tokens are still consumed, and the position variable will get incremented 54 // by all the consumed tokens. 55 // 56 // The # of tokens consumed by the parse attempt will be set as an out-parameter into 57 // consumed_tokens. The parser should skip this many tokens before parsing the next 58 // argument. 59 virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) = 0; 60 // How many tokens should be taken off argv for parsing this argument. 61 // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space). 62 // 63 // A [min,max] range is returned to represent argument definitions with multiple 64 // value tokens. (e.g. {"-h", "-h " } would return [1,2]). 65 virtual std::pair<size_t, size_t> GetNumTokens() const = 0; 66 // Get the run-time typename of the argument type. 67 virtual const char* GetTypeName() const = 0; 68 // Try to do a close match, returning how many tokens were matched against this argument 69 // definition. More tokens is better. 70 // 71 // Do a quick match token-by-token, and see if they match. 72 // Any tokens with a wildcard in them are only matched up until the wildcard. 73 // If this is true, then the wildcard matching later on can still fail, so this is not 74 // a guarantee that the argument is correct, it's more of a strong hint that the 75 // user-provided input *probably* was trying to match this argument. 76 // 77 // Returns how many tokens were either matched (or ignored because there was a 78 // wildcard present). 0 means no match. If the Size() tokens are returned. 79 virtual size_t MaybeMatches(const TokenRange& tokens) = 0; 80 }; 81 82 template <typename T> 83 using EnableIfNumeric = std::enable_if<std::is_arithmetic<T>::value>; 84 85 template <typename T> 86 using DisableIfNumeric = std::enable_if<!std::is_arithmetic<T>::value>; 87 88 // Argument definition information, created by an ArgumentBuilder and an UntypedArgumentBuilder. 89 template <typename TArg> 90 struct CmdlineParserArgumentInfo { 91 // This version will only be used if TArg is arithmetic and thus has the <= operators. 92 template <typename T = TArg> // Necessary to get SFINAE to kick in. 93 bool CheckRange(const TArg& value, typename EnableIfNumeric<T>::type* = 0) { 94 if (has_range_) { 95 return min_ <= value && value <= max_; 96 } 97 return true; 98 } 99 100 // This version will be used at other times when TArg is not arithmetic. 101 template <typename T = TArg> 102 bool CheckRange(const TArg&, typename DisableIfNumeric<T>::type* = 0) { 103 assert(!has_range_); 104 return true; 105 } 106 107 // Do a quick match token-by-token, and see if they match. 108 // Any tokens with a wildcard in them only match the prefix up until the wildcard. 109 // 110 // If this is true, then the wildcard matching later on can still fail, so this is not 111 // a guarantee that the argument is correct, it's more of a strong hint that the 112 // user-provided input *probably* was trying to match this argument. MaybeMatchesCmdlineParserArgumentInfo113 size_t MaybeMatches(const TokenRange& token_list) const { 114 auto best_match = FindClosestMatch(token_list); 115 116 return best_match.second; 117 } 118 119 // Attempt to find the closest match (see MaybeMatches). 120 // 121 // Returns the token range that was the closest match and the # of tokens that 122 // this range was matched up until. FindClosestMatchCmdlineParserArgumentInfo123 std::pair<const TokenRange*, size_t> FindClosestMatch(const TokenRange& token_list) const { 124 const TokenRange* best_match_ptr = nullptr; 125 126 size_t best_match = 0; 127 for (auto&& token_range : tokenized_names_) { 128 size_t this_match = token_range.MaybeMatches(token_list, std::string("_")); 129 130 if (this_match > best_match) { 131 best_match_ptr = &token_range; 132 best_match = this_match; 133 } 134 } 135 136 return std::make_pair(best_match_ptr, best_match); 137 } 138 139 // Mark the argument definition as completed, do not mutate the object anymore after this 140 // call is done. 141 // 142 // Performs several sanity checks and token calculations. CompleteArgumentCmdlineParserArgumentInfo143 void CompleteArgument() { 144 assert(names_.size() >= 1); 145 assert(!is_completed_); 146 147 is_completed_ = true; 148 149 size_t blank_count = 0; 150 size_t token_count = 0; 151 152 size_t global_blank_count = 0; 153 size_t global_token_count = 0; 154 for (auto&& name : names_) { 155 std::string s(name); 156 157 size_t local_blank_count = std::count(s.begin(), s.end(), '_'); 158 size_t local_token_count = std::count(s.begin(), s.end(), ' '); 159 160 if (global_blank_count != 0) { 161 assert(local_blank_count == global_blank_count 162 && "Every argument descriptor string must have same amount of blanks (_)"); 163 } 164 165 if (local_blank_count != 0) { 166 global_blank_count = local_blank_count; 167 blank_count++; 168 169 assert(local_blank_count == 1 && "More than one blank is not supported"); 170 assert(s.back() == '_' && "The blank character must only be at the end of the string"); 171 } 172 173 if (global_token_count != 0) { 174 assert(local_token_count == global_token_count 175 && "Every argument descriptor string must have same amount of tokens (spaces)"); 176 } 177 178 if (local_token_count != 0) { 179 global_token_count = local_token_count; 180 token_count++; 181 } 182 183 // Tokenize every name, turning it from a string to a token list. 184 tokenized_names_.clear(); 185 for (auto&& name1 : names_) { 186 // Split along ' ' only, removing any duplicated spaces. 187 tokenized_names_.push_back( 188 TokenRange::Split(name1, {' '}).RemoveToken(" ")); 189 } 190 191 // remove the _ character from each of the token ranges 192 // we will often end up with an empty token (i.e. ["-XX", "_"] -> ["-XX", ""] 193 // and this is OK because we still need an empty token to simplify 194 // range comparisons 195 simple_names_.clear(); 196 197 for (auto&& tokenized_name : tokenized_names_) { 198 simple_names_.push_back(tokenized_name.RemoveCharacter('_')); 199 } 200 } 201 202 if (token_count != 0) { 203 assert(("Every argument descriptor string must have equal amount of tokens (spaces)" && 204 token_count == names_.size())); 205 } 206 207 if (blank_count != 0) { 208 assert(("Every argument descriptor string must have an equal amount of blanks (_)" && 209 blank_count == names_.size())); 210 } 211 212 using_blanks_ = blank_count > 0; 213 { 214 size_t smallest_name_token_range_size = 215 std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), ~(0u), 216 [](size_t min, const TokenRange& cur) { 217 return std::min(min, cur.Size()); 218 }); 219 size_t largest_name_token_range_size = 220 std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), 0u, 221 [](size_t max, const TokenRange& cur) { 222 return std::max(max, cur.Size()); 223 }); 224 225 token_range_size_ = std::make_pair(smallest_name_token_range_size, 226 largest_name_token_range_size); 227 } 228 229 if (has_value_list_) { 230 assert(names_.size() == value_list_.size() 231 && "Number of arg descriptors must match number of values"); 232 assert(!has_value_map_); 233 } 234 if (has_value_map_) { 235 if (!using_blanks_) { 236 assert(names_.size() == value_map_.size() && 237 "Since no blanks were specified, each arg is mapped directly into a mapped " 238 "value without parsing; sizes must match"); 239 } 240 241 assert(!has_value_list_); 242 } 243 244 if (!using_blanks_ && !CmdlineType<TArg>::kCanParseBlankless) { 245 assert((has_value_map_ || has_value_list_) && 246 "Arguments without a blank (_) must provide either a value map or a value list"); 247 } 248 249 TypedCheck(); 250 } 251 252 // List of aliases for a single argument definition, e.g. {"-Xdex2oat", "-Xnodex2oat"}. 253 std::vector<const char*> names_; 254 // Is there at least 1 wildcard '_' in the argument definition? 255 bool using_blanks_ = false; 256 // [min, max] token counts in each arg def 257 std::pair<size_t, size_t> token_range_size_; 258 259 // contains all the names in a tokenized form, i.e. as a space-delimited list 260 std::vector<TokenRange> tokenized_names_; 261 262 // contains the tokenized names, but with the _ character stripped 263 std::vector<TokenRange> simple_names_; 264 265 // For argument definitions created with '.AppendValues()' 266 // Meaning that parsing should mutate the existing value in-place if possible. 267 bool appending_values_ = false; 268 269 // For argument definitions created with '.WithRange(min, max)' 270 bool has_range_ = false; 271 TArg min_; 272 TArg max_; 273 274 // For argument definitions created with '.WithValueMap' 275 bool has_value_map_ = false; 276 std::vector<std::pair<const char*, TArg>> value_map_; 277 278 // For argument definitions created with '.WithValues' 279 bool has_value_list_ = false; 280 std::vector<TArg> value_list_; 281 282 // Make sure there's a default constructor. 283 CmdlineParserArgumentInfo() = default; 284 285 // Ensure there's a default move constructor. 286 CmdlineParserArgumentInfo(CmdlineParserArgumentInfo&&) = default; 287 288 private: 289 // Perform type-specific checks at runtime. 290 template <typename T = TArg> 291 void TypedCheck(typename std::enable_if<std::is_same<Unit, T>::value>::type* = 0) { 292 assert(!using_blanks_ && 293 "Blanks are not supported in Unit arguments; since a Unit has no parse-able value"); 294 } 295 TypedCheckCmdlineParserArgumentInfo296 void TypedCheck() {} 297 298 bool is_completed_ = false; 299 }; 300 301 // A virtual-implementation of the necessary argument information in order to 302 // be able to parse arguments. 303 template <typename TArg> 304 struct CmdlineParseArgument : CmdlineParseArgumentAny { CmdlineParseArgumentCmdlineParseArgument305 CmdlineParseArgument(CmdlineParserArgumentInfo<TArg>&& argument_info, 306 std::function<void(TArg&)>&& save_argument, 307 std::function<TArg&(void)>&& load_argument) 308 : argument_info_(std::forward<decltype(argument_info)>(argument_info)), 309 save_argument_(std::forward<decltype(save_argument)>(save_argument)), 310 load_argument_(std::forward<decltype(load_argument)>(load_argument)) { 311 } 312 313 using UserTypeInfo = CmdlineType<TArg>; 314 ParseArgumentCmdlineParseArgument315 virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) { 316 assert(arguments.Size() > 0); 317 assert(consumed_tokens != nullptr); 318 319 auto closest_match_res = argument_info_.FindClosestMatch(arguments); 320 size_t best_match_size = closest_match_res.second; 321 const TokenRange* best_match_arg_def = closest_match_res.first; 322 323 if (best_match_size > arguments.Size()) { 324 // The best match has more tokens than were provided. 325 // Shouldn't happen in practice since the outer parser does this check. 326 return CmdlineResult(CmdlineResult::kUnknown, "Size mismatch"); 327 } 328 329 assert(best_match_arg_def != nullptr); 330 *consumed_tokens = best_match_arg_def->Size(); 331 332 if (!argument_info_.using_blanks_) { 333 return ParseArgumentSingle(arguments.Join(' ')); 334 } 335 336 // Extract out the blank value from arguments 337 // e.g. for a def of "foo:_" and input "foo:bar", blank_value == "bar" 338 std::string blank_value = ""; 339 size_t idx = 0; 340 for (auto&& def_token : *best_match_arg_def) { 341 auto&& arg_token = arguments[idx]; 342 343 // Does this definition-token have a wildcard in it? 344 if (def_token.find('_') == std::string::npos) { 345 // No, regular token. Match 1:1 against the argument token. 346 bool token_match = def_token == arg_token; 347 348 if (!token_match) { 349 return CmdlineResult(CmdlineResult::kFailure, 350 std::string("Failed to parse ") + best_match_arg_def->GetToken(0) 351 + " at token " + std::to_string(idx)); 352 } 353 } else { 354 // This is a wild-carded token. 355 TokenRange def_split_wildcards = TokenRange::Split(def_token, {'_'}); 356 357 // Extract the wildcard contents out of the user-provided arg_token. 358 std::unique_ptr<TokenRange> arg_matches = 359 def_split_wildcards.MatchSubstrings(arg_token, "_"); 360 if (arg_matches == nullptr) { 361 return CmdlineResult(CmdlineResult::kFailure, 362 std::string("Failed to parse ") + best_match_arg_def->GetToken(0) 363 + ", with a wildcard pattern " + def_token 364 + " at token " + std::to_string(idx)); 365 } 366 367 // Get the corresponding wildcard tokens from arg_matches, 368 // and concatenate it to blank_value. 369 for (size_t sub_idx = 0; 370 sub_idx < def_split_wildcards.Size() && sub_idx < arg_matches->Size(); ++sub_idx) { 371 if (def_split_wildcards[sub_idx] == "_") { 372 blank_value += arg_matches->GetToken(sub_idx); 373 } 374 } 375 } 376 377 ++idx; 378 } 379 380 return ParseArgumentSingle(blank_value); 381 } 382 383 private: ParseArgumentSingleCmdlineParseArgument384 virtual CmdlineResult ParseArgumentSingle(const std::string& argument) { 385 // TODO: refactor to use LookupValue for the value lists/maps 386 387 // Handle the 'WithValueMap(...)' argument definition 388 if (argument_info_.has_value_map_) { 389 for (auto&& value_pair : argument_info_.value_map_) { 390 const char* name = value_pair.first; 391 392 if (argument == name) { 393 return SaveArgument(value_pair.second); 394 } 395 } 396 397 // Error case: Fail, telling the user what the allowed values were. 398 std::vector<std::string> allowed_values; 399 for (auto&& value_pair : argument_info_.value_map_) { 400 const char* name = value_pair.first; 401 allowed_values.push_back(name); 402 } 403 404 std::string allowed_values_flat = android::base::Join(allowed_values, ','); 405 return CmdlineResult(CmdlineResult::kFailure, 406 "Argument value '" + argument + "' does not match any of known valid" 407 "values: {" + allowed_values_flat + "}"); 408 } 409 410 // Handle the 'WithValues(...)' argument definition 411 if (argument_info_.has_value_list_) { 412 size_t arg_def_idx = 0; 413 for (auto&& value : argument_info_.value_list_) { 414 auto&& arg_def_token = argument_info_.names_[arg_def_idx]; 415 416 if (arg_def_token == argument) { 417 return SaveArgument(value); 418 } 419 ++arg_def_idx; 420 } 421 422 assert(arg_def_idx + 1 == argument_info_.value_list_.size() && 423 "Number of named argument definitions must match number of values defined"); 424 425 // Error case: Fail, telling the user what the allowed values were. 426 std::vector<std::string> allowed_values; 427 for (auto&& arg_name : argument_info_.names_) { 428 allowed_values.push_back(arg_name); 429 } 430 431 std::string allowed_values_flat = android::base::Join(allowed_values, ','); 432 return CmdlineResult(CmdlineResult::kFailure, 433 "Argument value '" + argument + "' does not match any of known valid" 434 "values: {" + allowed_values_flat + "}"); 435 } 436 437 // Handle the regular case where we parsed an unknown value from a blank. 438 UserTypeInfo type_parser; 439 440 if (argument_info_.appending_values_) { 441 TArg& existing = load_argument_(); 442 CmdlineParseResult<TArg> result = type_parser.ParseAndAppend(argument, existing); 443 444 assert(!argument_info_.has_range_); 445 446 return result; 447 } 448 449 CmdlineParseResult<TArg> result = type_parser.Parse(argument); 450 451 if (result.IsSuccess()) { 452 TArg& value = result.GetValue(); 453 454 // Do a range check for 'WithRange(min,max)' argument definition. 455 if (!argument_info_.CheckRange(value)) { 456 return CmdlineParseResult<TArg>::OutOfRange( 457 value, argument_info_.min_, argument_info_.max_); 458 } 459 460 return SaveArgument(value); 461 } 462 463 // Some kind of type-specific parse error. Pass the result as-is. 464 CmdlineResult raw_result = std::move(result); 465 return raw_result; 466 } 467 468 public: GetTypeNameCmdlineParseArgument469 virtual const char* GetTypeName() const { 470 // TODO: Obviate the need for each type specialization to hardcode the type name 471 return UserTypeInfo::Name(); 472 } 473 474 // How many tokens should be taken off argv for parsing this argument. 475 // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space). 476 // 477 // A [min,max] range is returned to represent argument definitions with multiple 478 // value tokens. (e.g. {"-h", "-h " } would return [1,2]). GetNumTokensCmdlineParseArgument479 virtual std::pair<size_t, size_t> GetNumTokens() const { 480 return argument_info_.token_range_size_; 481 } 482 483 // See if this token range might begin the same as the argument definition. MaybeMatchesCmdlineParseArgument484 virtual size_t MaybeMatches(const TokenRange& tokens) { 485 return argument_info_.MaybeMatches(tokens); 486 } 487 488 private: SaveArgumentCmdlineParseArgument489 CmdlineResult SaveArgument(const TArg& value) { 490 assert(!argument_info_.appending_values_ 491 && "If the values are being appended, then the updated parse value is " 492 "updated by-ref as a side effect and shouldn't be stored directly"); 493 TArg val = value; 494 save_argument_(val); 495 return CmdlineResult(CmdlineResult::kSuccess); 496 } 497 498 CmdlineParserArgumentInfo<TArg> argument_info_; 499 std::function<void(TArg&)> save_argument_; 500 std::function<TArg&(void)> load_argument_; 501 }; 502 } // namespace detail // NOLINT [readability/namespace] [5] 503 } // namespace art 504 505 #endif // ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_ 506