1 /*
2  * Copyright (c) 2015, Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without modification,
6  * are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice, this
9  * list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation and/or
13  * other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its contributors
16  * may be used to endorse or promote products derived from this software without
17  * specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
23  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 #include "Tokenizer.h"
31 
32 using std::string;
33 using std::vector;
34 
35 const string Tokenizer::defaultDelimiters = " \n\r\t\v\f";
36 
Tokenizer(const string & input,const string & delimiters,bool mergeDelimiters)37 Tokenizer::Tokenizer(const string &input, const string &delimiters, bool mergeDelimiters)
38     : _input(input), _delimiters(delimiters), _mergeDelimiters(mergeDelimiters)
39 {
40 }
41 
split()42 vector<string> Tokenizer::split()
43 {
44     vector<string> result;
45     string token;
46     bool leftover = false;
47 
48     for (const auto character : _input) {
49         if (_delimiters.find(character) != string::npos) {
50             if (_mergeDelimiters) {
51                 leftover = false;
52                 if (token.empty()) {
53                     // skip consecutive delimiters
54                     continue;
55                 }
56             } else {
57                 // We've encountered a delimiter, which means that there is a
58                 // left-hand token and a right-side token. We are going to add
59                 // the left-hand one but must not forget that there is a
60                 // right-hand one (possibly empty)
61                 leftover = true;
62             }
63 
64             result.push_back(token);
65             token.clear();
66             continue;
67         }
68         token += character;
69         leftover = true;
70     }
71 
72     // push any leftover token:
73     if (leftover) {
74         result.push_back(token);
75     }
76 
77     return result;
78 }
79