1 /*
2  * Copyright (c) 2015, Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without modification,
6  * are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice, this
9  * list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation and/or
13  * other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its contributors
16  * may be used to endorse or promote products derived from this software without
17  * specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
23  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 #pragma once
31 
32 #include "NonCopyable.hpp"
33 
34 #include <string>
35 #include <vector>
36 
37 /** Tokenizer class
38  *
39  * Must be initialized with a string to be tokenized and, optionally, a string
40  * of delimiters (@see Tokenizer::defaultDelimiters).
41  */
42 class Tokenizer : private utility::NonCopyable
43 {
44 public:
45     /** Constructs a Tokenizer
46      *
47      * @param[in] input The string to be tokenized
48      * @param[in] delimiters A string containing all the token delimiters
49      *            (hence, each delimiter can only be a single character)
50      * @param[in] mergeDelimiters If true, consecutive delimiters are considered
51      *            as one; leading and trailing delimiters are also ignored.
52      *            If false, consecutive delimiters produce empty tokens
53      */
54     Tokenizer(const std::string &input, const std::string &delimiters = defaultDelimiters,
55               bool mergeDelimiters = true);
~Tokenizer()56     ~Tokenizer(){};
57 
58     /** Return a vector of all tokens
59      */
60     std::vector<std::string> split();
61 
62     /** Default list of delimiters (" \n\r\t\v\f") */
63     static const std::string defaultDelimiters;
64 
65 private:
66     const std::string _input;      //< string to be tokenized
67     const std::string _delimiters; //< token delimiters
68     const bool _mergeDelimiters;   //< whether subsequent delimiters should be merged
69 };
70