1 // Copyright (C) 2015 The Android Open Source Project 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef _INIT_PARSER_TOKENIZER_H 16 #define _INIT_PARSER_TOKENIZER_H 17 18 #include <string> 19 20 namespace init { 21 22 // Used to tokenize a std::string. 23 // Call Next() to advance through each token until it returns false, 24 // indicating there are no more tokens left in the string. 25 // The current token can be accessed with current(), which returns 26 // a Token. 27 // Supported tokens are: 28 // TOK_START - Next() has yet to be called 29 // TOK_END - At the end of string 30 // TOK_NEWLINE - The end of a line denoted by \n. 31 // TOK_TEXT - A word. 32 // Comments are denoted with '#' and the tokenizer will ignore 33 // the rest of the line. 34 // Double quotes can be used to insert whitespace into words. 35 // A backslash at the end of a line denotes continuation and 36 // a TOK_NEWLINE will not be generated for that line. 37 class Tokenizer { 38 public: 39 Tokenizer(const std::string& data); 40 ~Tokenizer(); 41 42 enum TokenType { TOK_START, TOK_END, TOK_NEWLINE, TOK_TEXT }; 43 struct Token { 44 TokenType type; 45 std::string text; 46 }; 47 48 // Returns the curret token. 49 const Token& current(); 50 51 // Move to the next token, returns false at the end of input. 52 bool Next(); 53 54 private: 55 void GetData(); 56 void AdvChar(); 57 void AdvText(); 58 void AdvUntil(char x); 59 void AdvWhiteSpace(); 60 void StartText(); 61 void EndText(); 62 63 const std::string& data_; 64 Token current_; 65 66 bool eof_; 67 size_t pos_; 68 char cur_char_; 69 size_t tok_start_; 70 }; 71 72 } // namespace init 73 74 #endif 75