/* * Copyright (C) 2015 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ART_CMDLINE_TOKEN_RANGE_H_ #define ART_CMDLINE_TOKEN_RANGE_H_ #include #include #include #include #include namespace art { // A range of tokens to make token matching algorithms easier. // // We try really hard to avoid copying and store only a pointer and iterators to the // interiors of the vector, so a typical copy constructor never ends up doing a deep copy. // It is up to the user to play nice and not to mutate the strings in-place. // // Tokens are only copied if a mutating operation is performed (and even then only // if it *actually* mutates the token). struct TokenRange { // Short-hand for a vector of strings. A single string and a token is synonymous. using TokenList = std::vector; // Copying-from-vector constructor. explicit TokenRange(const TokenList& token_list) : token_list_(new TokenList(token_list)), begin_(token_list_->begin()), end_(token_list_->end()) {} // Copying-from-iterator constructor template TokenRange(ForwardIterator it_begin, ForwardIterator it_end) : token_list_(new TokenList(it_begin, it_end)), begin_(token_list_->begin()), end_(token_list_->end()) {} #if 0 // Copying-from-vector constructor. TokenRange(const TokenList& token_list ATTRIBUTE_UNUSED, TokenList::const_iterator it_begin, TokenList::const_iterator it_end) : token_list_(new TokenList(it_begin, it_end)), begin_(token_list_->begin()), end_(token_list_->end()) { assert(it_begin >= token_list.begin()); assert(it_end <= token_list.end()); } #endif // Copying from char array constructor, convertings into tokens (strings) along the way. TokenRange(const char* token_list[], size_t length) : token_list_(new TokenList(&token_list[0], &token_list[length])), begin_(token_list_->begin()), end_(token_list_->end()) {} // Non-copying move-from-vector constructor. Takes over the token vector. explicit TokenRange(TokenList&& token_list) : token_list_(new TokenList(std::forward(token_list))), begin_(token_list_->begin()), end_(token_list_->end()) {} // Non-copying constructor. Retain reference to existing list of tokens. TokenRange(std::shared_ptr token_list, TokenList::const_iterator it_begin, TokenList::const_iterator it_end) : token_list_(token_list), begin_(it_begin), end_(it_end) { assert(it_begin >= token_list->begin()); assert(it_end <= token_list->end()); } // Non-copying copy constructor. TokenRange(const TokenRange&) = default; // Non-copying move constructor. TokenRange(TokenRange&&) = default; // Non-copying constructor. Retains reference to an existing list of tokens, with offset. explicit TokenRange(std::shared_ptr token_list) : token_list_(token_list), begin_(token_list_->begin()), end_(token_list_->end()) {} // Iterator type for begin() and end(). Guaranteed to be a RandomAccessIterator. using iterator = TokenList::const_iterator; // Iterator type for const begin() and const end(). Guaranteed to be a RandomAccessIterator. using const_iterator = iterator; // Create a token range by splitting a string. Each separator gets their own token. // Since the separator are retained as tokens, it might be useful to call // RemoveToken afterwards. static TokenRange Split(const std::string& string, std::initializer_list separators) { TokenList new_token_list; std::string tok; for (auto&& c : string) { for (char sep : separators) { if (c == sep) { // We spotted a separator character. // Push back everything before the last separator as a new token. // Push back the separator as a token. if (!tok.empty()) { new_token_list.push_back(tok); tok = ""; } new_token_list.push_back(std::string() + sep); } else { // Build up the token with another character. tok += c; } } } if (!tok.empty()) { new_token_list.push_back(tok); } return TokenRange(std::move(new_token_list)); } // A RandomAccessIterator to the first element in this range. iterator begin() const { return begin_; } // A RandomAccessIterator to one past the last element in this range. iterator end() const { return end_; } // The size of the range, i.e. how many tokens are in it. size_t Size() const { return std::distance(begin_, end_); } // Are there 0 tokens in this range? bool IsEmpty() const { return Size() > 0; } // Look up a token by it's offset. const std::string& GetToken(size_t offset) const { assert(offset < Size()); return *(begin_ + offset); } // Does this token range equal the other range? // Equality is defined as having both the same size, and // each corresponding token being equal. bool operator==(const TokenRange& other) const { if (this == &other) { return true; } if (Size() != other.Size()) { return false; } return std::equal(begin(), end(), other.begin()); } // Look up the token at the requested index. const std::string& operator[](int index) const { assert(index >= 0 && static_cast(index) < Size()); return *(begin() + index); } // Does this current range start with the other range? bool StartsWith(const TokenRange& other) const { if (this == &other) { return true; } if (Size() < other.Size()) { return false; } auto& smaller = Size() < other.Size() ? *this : other; auto& greater = Size() < other.Size() ? other : *this; return std::equal(smaller.begin(), smaller.end(), greater.begin()); } // Remove all characters 'c' from each token, potentially copying the underlying tokens. TokenRange RemoveCharacter(char c) const { TokenList new_token_list(begin(), end()); bool changed = false; for (auto&& token : new_token_list) { auto it = std::remove_if(token.begin(), token.end(), [&](char ch) { if (ch == c) { changed = true; return true; } return false; }); token.erase(it, token.end()); } if (!changed) { return *this; } return TokenRange(std::move(new_token_list)); } // Remove all tokens matching this one, potentially copying the underlying tokens. TokenRange RemoveToken(const std::string& token) { return RemoveIf([&](const std::string& tok) { return tok == token; }); } // Discard all empty tokens, potentially copying the underlying tokens. TokenRange DiscardEmpty() const { return RemoveIf([](const std::string& token) { return token.empty(); }); } // Create a non-copying subset of this range. // Length is trimmed so that the Slice does not go out of range. TokenRange Slice(size_t offset, size_t length = std::string::npos) const { assert(offset < Size()); if (length != std::string::npos && offset + length > Size()) { length = Size() - offset; } iterator it_end; if (length == std::string::npos) { it_end = end(); } else { it_end = begin() + offset + length; } return TokenRange(token_list_, begin() + offset, it_end); } // Try to match the string with tokens from this range. // Each token is used to match exactly once (after which the next token is used, and so on). // The matching happens from left-to-right in a non-greedy fashion. // If the currently-matched token is the wildcard, then the new outputted token will // contain as much as possible until the next token is matched. // // For example, if this == ["a:", "_", "b:] and "_" is the match string, then // MatchSubstrings on "a:foob:" will yield: ["a:", "foo", "b:"] // // Since the string matching can fail (e.g. ["foo"] against "bar"), then this // function can fail, in which cause it will return null. std::unique_ptr MatchSubstrings(const std::string& string, const std::string& wildcard) const { TokenList new_token_list; size_t wildcard_idx = std::string::npos; size_t string_idx = 0; // Function to push all the characters matched as a wildcard so far // as a brand new token. It resets the wildcard matching. // Empty wildcards are possible and ok, but only if wildcard matching was on. auto maybe_push_wildcard_token = [&]() { if (wildcard_idx != std::string::npos) { size_t wildcard_length = string_idx - wildcard_idx; std::string wildcard_substr = string.substr(wildcard_idx, wildcard_length); new_token_list.push_back(std::move(wildcard_substr)); wildcard_idx = std::string::npos; } }; for (iterator it = begin(); it != end(); ++it) { const std::string& tok = *it; if (tok == wildcard) { maybe_push_wildcard_token(); wildcard_idx = string_idx; continue; } size_t next_token_idx = string.find(tok); if (next_token_idx == std::string::npos) { // Could not find token at all return nullptr; } else if (next_token_idx != string_idx && wildcard_idx == std::string::npos) { // Found the token at a non-starting location, and we weren't // trying to parse the wildcard. return nullptr; } new_token_list.push_back(string.substr(next_token_idx, tok.size())); maybe_push_wildcard_token(); string_idx += tok.size(); } size_t remaining = string.size() - string_idx; if (remaining > 0) { if (wildcard_idx == std::string::npos) { // Some characters were still remaining in the string, // but it wasn't trying to match a wildcard. return nullptr; } } // If some characters are remaining, the rest must be a wildcard. string_idx += remaining; maybe_push_wildcard_token(); return std::unique_ptr(new TokenRange(std::move(new_token_list))); } // Do a quick match token-by-token, and see if they match. // Any tokens with a wildcard in them are only matched up until the wildcard. // If this is true, then the wildcard matching later on can still fail, so this is not // a guarantee that the argument is correct, it's more of a strong hint that the // user-provided input *probably* was trying to match this argument. // // Returns how many tokens were either matched (or ignored because there was a // wildcard present). 0 means no match. If the size() tokens are returned. size_t MaybeMatches(const TokenRange& token_list, const std::string& wildcard) const { auto token_it = token_list.begin(); auto token_end = token_list.end(); auto name_it = begin(); auto name_end = end(); size_t matched_tokens = 0; while (token_it != token_end && name_it != name_end) { // Skip token matching when the corresponding name has a wildcard in it. const std::string& name = *name_it; size_t wildcard_idx = name.find(wildcard); if (wildcard_idx == std::string::npos) { // No wildcard present // Did the definition token match the user token? if (name != *token_it) { return matched_tokens; } } else { std::string name_prefix = name.substr(0, wildcard_idx); // Did the user token start with the up-to-the-wildcard prefix? if (!StartsWith(*token_it, name_prefix)) { return matched_tokens; } } ++token_it; ++name_it; ++matched_tokens; } // If we got this far, it's either a full match or the token list was too short. return matched_tokens; } // Flatten the token range by joining every adjacent token with the separator character. // e.g. ["hello", "world"].join('$') == "hello$world" std::string Join(char separator) const { TokenList tmp(begin(), end()); return art::Join(tmp, separator); // TODO: Join should probably take an offset or iterators } private: static bool StartsWith(const std::string& larger, const std::string& smaller) { if (larger.size() >= smaller.size()) { return std::equal(smaller.begin(), smaller.end(), larger.begin()); } return false; } template TokenRange RemoveIf(const TPredicate& predicate) const { // If any of the tokens in the token lists are empty, then // we need to remove them and compress the token list into a smaller one. bool remove = false; for (auto it = begin_; it != end_; ++it) { auto&& token = *it; if (predicate(token)) { remove = true; break; } } // Actually copy the token list and remove the tokens that don't match our predicate. if (remove) { auto token_list = std::make_shared(begin(), end()); TokenList::iterator new_end = std::remove_if(token_list->begin(), token_list->end(), predicate); token_list->erase(new_end, token_list->end()); assert(token_list_->size() > token_list->size() && "Nothing was actually removed!"); return TokenRange(token_list); } return *this; } const std::shared_ptr> token_list_; const iterator begin_; const iterator end_; }; } // namespace art #endif // ART_CMDLINE_TOKEN_RANGE_H_