1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/lib/strings/scanner.h"
17
18 #include "tensorflow/core/platform/test.h"
19
20 namespace tensorflow {
21 namespace strings {
22
23 class ScannerTest : public ::testing::Test {
24 protected:
25 // Returns a string with all chars that are in <clz>, in byte value order.
ClassStr(Scanner::CharClass clz)26 string ClassStr(Scanner::CharClass clz) {
27 string s;
28 for (int i = 0; i < 256; ++i) {
29 char ch = i;
30 if (Scanner::Matches(clz, ch)) {
31 s += ch;
32 }
33 }
34 return s;
35 }
36 };
37
TEST_F(ScannerTest,Any)38 TEST_F(ScannerTest, Any) {
39 StringPiece remaining, match;
40 EXPECT_TRUE(Scanner(" horse0123")
41 .Any(Scanner::SPACE)
42 .Any(Scanner::DIGIT)
43 .Any(Scanner::LETTER)
44 .GetResult(&remaining, &match));
45 EXPECT_EQ(" horse", match);
46 EXPECT_EQ("0123", remaining);
47
48 EXPECT_TRUE(Scanner("")
49 .Any(Scanner::SPACE)
50 .Any(Scanner::DIGIT)
51 .Any(Scanner::LETTER)
52 .GetResult(&remaining, &match));
53 EXPECT_EQ("", remaining);
54 EXPECT_EQ("", match);
55
56 EXPECT_TRUE(Scanner("----")
57 .Any(Scanner::SPACE)
58 .Any(Scanner::DIGIT)
59 .Any(Scanner::LETTER)
60 .GetResult(&remaining, &match));
61 EXPECT_EQ("----", remaining);
62 EXPECT_EQ("", match);
63 }
64
TEST_F(ScannerTest,AnySpace)65 TEST_F(ScannerTest, AnySpace) {
66 StringPiece remaining, match;
67 EXPECT_TRUE(Scanner(" a b ")
68 .AnySpace()
69 .One(Scanner::LETTER)
70 .AnySpace()
71 .GetResult(&remaining, &match));
72 EXPECT_EQ(" a ", match);
73 EXPECT_EQ("b ", remaining);
74 }
75
TEST_F(ScannerTest,AnyEscapedNewline)76 TEST_F(ScannerTest, AnyEscapedNewline) {
77 StringPiece remaining, match;
78 EXPECT_TRUE(Scanner("\\\n")
79 .Any(Scanner::LETTER_DIGIT_UNDERSCORE)
80 .GetResult(&remaining, &match));
81 EXPECT_EQ("\\\n", remaining);
82 EXPECT_EQ("", match);
83 }
84
TEST_F(ScannerTest,AnyEmptyString)85 TEST_F(ScannerTest, AnyEmptyString) {
86 StringPiece remaining, match;
87 EXPECT_TRUE(Scanner("")
88 .Any(Scanner::LETTER_DIGIT_UNDERSCORE)
89 .GetResult(&remaining, &match));
90 EXPECT_EQ("", remaining);
91 EXPECT_EQ("", match);
92 }
93
TEST_F(ScannerTest,Eos)94 TEST_F(ScannerTest, Eos) {
95 EXPECT_FALSE(Scanner("a").Eos().GetResult());
96 EXPECT_TRUE(Scanner("").Eos().GetResult());
97 EXPECT_FALSE(Scanner("abc").OneLiteral("ab").Eos().GetResult());
98 EXPECT_TRUE(Scanner("abc").OneLiteral("abc").Eos().GetResult());
99 }
100
TEST_F(ScannerTest,Many)101 TEST_F(ScannerTest, Many) {
102 StringPiece remaining, match;
103 EXPECT_TRUE(Scanner("abc").Many(Scanner::LETTER).GetResult());
104 EXPECT_FALSE(Scanner("0").Many(Scanner::LETTER).GetResult());
105 EXPECT_FALSE(Scanner("").Many(Scanner::LETTER).GetResult());
106
107 EXPECT_TRUE(
108 Scanner("abc ").Many(Scanner::LETTER).GetResult(&remaining, &match));
109 EXPECT_EQ(" ", remaining);
110 EXPECT_EQ("abc", match);
111 EXPECT_TRUE(
112 Scanner("abc").Many(Scanner::LETTER).GetResult(&remaining, &match));
113 EXPECT_EQ("", remaining);
114 EXPECT_EQ("abc", match);
115 }
116
TEST_F(ScannerTest,One)117 TEST_F(ScannerTest, One) {
118 StringPiece remaining, match;
119 EXPECT_TRUE(Scanner("abc").One(Scanner::LETTER).GetResult());
120 EXPECT_FALSE(Scanner("0").One(Scanner::LETTER).GetResult());
121 EXPECT_FALSE(Scanner("").One(Scanner::LETTER).GetResult());
122
123 EXPECT_TRUE(Scanner("abc")
124 .One(Scanner::LETTER)
125 .One(Scanner::LETTER)
126 .GetResult(&remaining, &match));
127 EXPECT_EQ("c", remaining);
128 EXPECT_EQ("ab", match);
129 EXPECT_TRUE(Scanner("a").One(Scanner::LETTER).GetResult(&remaining, &match));
130 EXPECT_EQ("", remaining);
131 EXPECT_EQ("a", match);
132 }
133
TEST_F(ScannerTest,OneLiteral)134 TEST_F(ScannerTest, OneLiteral) {
135 EXPECT_FALSE(Scanner("abc").OneLiteral("abC").GetResult());
136 EXPECT_TRUE(Scanner("abc").OneLiteral("ab").OneLiteral("c").GetResult());
137 }
138
TEST_F(ScannerTest,ScanUntil)139 TEST_F(ScannerTest, ScanUntil) {
140 StringPiece remaining, match;
141 EXPECT_TRUE(Scanner(R"(' \1 \2 \3 \' \\'rest)")
142 .OneLiteral("'")
143 .ScanUntil('\'')
144 .OneLiteral("'")
145 .GetResult(&remaining, &match));
146 EXPECT_EQ(R"( \\'rest)", remaining);
147 EXPECT_EQ(R"(' \1 \2 \3 \')", match);
148
149 // The "scan until" character is not present.
150 remaining = match = "unset";
151 EXPECT_FALSE(Scanner(R"(' \1 \2 \3 \\rest)")
152 .OneLiteral("'")
153 .ScanUntil('\'')
154 .GetResult(&remaining, &match));
155 EXPECT_EQ("unset", remaining);
156 EXPECT_EQ("unset", match);
157
158 // Scan until an escape character.
159 remaining = match = "";
160 EXPECT_TRUE(
161 Scanner(R"(123\456)").ScanUntil('\\').GetResult(&remaining, &match));
162 EXPECT_EQ(R"(\456)", remaining);
163 EXPECT_EQ("123", match);
164 }
165
TEST_F(ScannerTest,ScanEscapedUntil)166 TEST_F(ScannerTest, ScanEscapedUntil) {
167 StringPiece remaining, match;
168 EXPECT_TRUE(Scanner(R"(' \1 \2 \3 \' \\'rest)")
169 .OneLiteral("'")
170 .ScanEscapedUntil('\'')
171 .OneLiteral("'")
172 .GetResult(&remaining, &match));
173 EXPECT_EQ("rest", remaining);
174 EXPECT_EQ(R"(' \1 \2 \3 \' \\')", match);
175
176 // The "scan until" character is not present.
177 remaining = match = "unset";
178 EXPECT_FALSE(Scanner(R"(' \1 \2 \3 \' \\rest)")
179 .OneLiteral("'")
180 .ScanEscapedUntil('\'')
181 .GetResult(&remaining, &match));
182 EXPECT_EQ("unset", remaining);
183 EXPECT_EQ("unset", match);
184 }
185
TEST_F(ScannerTest,ZeroOrOneLiteral)186 TEST_F(ScannerTest, ZeroOrOneLiteral) {
187 StringPiece remaining, match;
188 EXPECT_TRUE(
189 Scanner("abc").ZeroOrOneLiteral("abC").GetResult(&remaining, &match));
190 EXPECT_EQ("abc", remaining);
191 EXPECT_EQ("", match);
192
193 EXPECT_TRUE(
194 Scanner("abcd").ZeroOrOneLiteral("ab").ZeroOrOneLiteral("c").GetResult(
195 &remaining, &match));
196 EXPECT_EQ("d", remaining);
197 EXPECT_EQ("abc", match);
198
199 EXPECT_TRUE(
200 Scanner("").ZeroOrOneLiteral("abc").GetResult(&remaining, &match));
201 EXPECT_EQ("", remaining);
202 EXPECT_EQ("", match);
203 }
204
205 // Test output of GetResult (including the forms with optional params),
206 // and that it can be called multiple times.
TEST_F(ScannerTest,CaptureAndGetResult)207 TEST_F(ScannerTest, CaptureAndGetResult) {
208 StringPiece remaining, match;
209
210 Scanner scan(" first second");
211 EXPECT_TRUE(scan.Any(Scanner::SPACE)
212 .RestartCapture()
213 .One(Scanner::LETTER)
214 .Any(Scanner::LETTER_DIGIT)
215 .StopCapture()
216 .Any(Scanner::SPACE)
217 .GetResult(&remaining, &match));
218 EXPECT_EQ("second", remaining);
219 EXPECT_EQ("first", match);
220 EXPECT_TRUE(scan.GetResult());
221 remaining = "";
222 EXPECT_TRUE(scan.GetResult(&remaining));
223 EXPECT_EQ("second", remaining);
224 remaining = "";
225 match = "";
226 EXPECT_TRUE(scan.GetResult(&remaining, &match));
227 EXPECT_EQ("second", remaining);
228 EXPECT_EQ("first", match);
229
230 scan.RestartCapture().One(Scanner::LETTER).One(Scanner::LETTER);
231 remaining = "";
232 match = "";
233 EXPECT_TRUE(scan.GetResult(&remaining, &match));
234 EXPECT_EQ("cond", remaining);
235 EXPECT_EQ("se", match);
236 }
237
238 // Tests that if StopCapture is not called, then calling GetResult, then
239 // scanning more, then GetResult again will update the capture.
TEST_F(ScannerTest,MultipleGetResultExtendsCapture)240 TEST_F(ScannerTest, MultipleGetResultExtendsCapture) {
241 StringPiece remaining, match;
242
243 Scanner scan("one2three");
244 EXPECT_TRUE(scan.Many(Scanner::LETTER).GetResult(&remaining, &match));
245 EXPECT_EQ("2three", remaining);
246 EXPECT_EQ("one", match);
247 EXPECT_TRUE(scan.Many(Scanner::DIGIT).GetResult(&remaining, &match));
248 EXPECT_EQ("three", remaining);
249 EXPECT_EQ("one2", match);
250 EXPECT_TRUE(scan.Many(Scanner::LETTER).GetResult(&remaining, &match));
251 EXPECT_EQ("", remaining);
252 EXPECT_EQ("one2three", match);
253 }
254
TEST_F(ScannerTest,FailedMatchDoesntChangeResult)255 TEST_F(ScannerTest, FailedMatchDoesntChangeResult) {
256 // A failed match doesn't change pointers passed to GetResult.
257 Scanner scan("name");
258 StringPiece remaining = "rem";
259 StringPiece match = "match";
260 EXPECT_FALSE(scan.One(Scanner::SPACE).GetResult(&remaining, &match));
261 EXPECT_EQ("rem", remaining);
262 EXPECT_EQ("match", match);
263 }
264
TEST_F(ScannerTest,DefaultCapturesAll)265 TEST_F(ScannerTest, DefaultCapturesAll) {
266 // If RestartCapture() is not called, the whole string is used.
267 Scanner scan("a b");
268 StringPiece remaining = "rem";
269 StringPiece match = "match";
270 EXPECT_TRUE(scan.Any(Scanner::LETTER)
271 .AnySpace()
272 .Any(Scanner::LETTER)
273 .GetResult(&remaining, &match));
274 EXPECT_EQ("", remaining);
275 EXPECT_EQ("a b", match);
276 }
277
TEST_F(ScannerTest,AllCharClasses)278 TEST_F(ScannerTest, AllCharClasses) {
279 EXPECT_EQ(256, ClassStr(Scanner::ALL).size());
280 EXPECT_EQ("0123456789", ClassStr(Scanner::DIGIT));
281 EXPECT_EQ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
282 ClassStr(Scanner::LETTER));
283 EXPECT_EQ("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
284 ClassStr(Scanner::LETTER_DIGIT));
285 EXPECT_EQ(
286 "-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_"
287 "abcdefghijklmnopqrstuvwxyz",
288 ClassStr(Scanner::LETTER_DIGIT_DASH_UNDERSCORE));
289 EXPECT_EQ(
290 "-./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
291 "abcdefghijklmnopqrstuvwxyz",
292 ClassStr(Scanner::LETTER_DIGIT_DASH_DOT_SLASH));
293 EXPECT_EQ(
294 "-./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_"
295 "abcdefghijklmnopqrstuvwxyz",
296 ClassStr(Scanner::LETTER_DIGIT_DASH_DOT_SLASH_UNDERSCORE));
297 EXPECT_EQ(".0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
298 ClassStr(Scanner::LETTER_DIGIT_DOT));
299 EXPECT_EQ("+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
300 ClassStr(Scanner::LETTER_DIGIT_DOT_PLUS_MINUS));
301 EXPECT_EQ(".0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz",
302 ClassStr(Scanner::LETTER_DIGIT_DOT_UNDERSCORE));
303 EXPECT_EQ("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz",
304 ClassStr(Scanner::LETTER_DIGIT_UNDERSCORE));
305 EXPECT_EQ("abcdefghijklmnopqrstuvwxyz", ClassStr(Scanner::LOWERLETTER));
306 EXPECT_EQ("0123456789abcdefghijklmnopqrstuvwxyz",
307 ClassStr(Scanner::LOWERLETTER_DIGIT));
308 EXPECT_EQ("0123456789_abcdefghijklmnopqrstuvwxyz",
309 ClassStr(Scanner::LOWERLETTER_DIGIT_UNDERSCORE));
310 EXPECT_EQ("123456789", ClassStr(Scanner::NON_ZERO_DIGIT));
311 EXPECT_EQ("\t\n\v\f\r ", ClassStr(Scanner::SPACE));
312 EXPECT_EQ("ABCDEFGHIJKLMNOPQRSTUVWXYZ", ClassStr(Scanner::UPPERLETTER));
313 }
314
TEST_F(ScannerTest,Peek)315 TEST_F(ScannerTest, Peek) {
316 EXPECT_EQ('a', Scanner("abc").Peek());
317 EXPECT_EQ('a', Scanner("abc").Peek('b'));
318 EXPECT_EQ('\0', Scanner("").Peek());
319 EXPECT_EQ('z', Scanner("").Peek('z'));
320 EXPECT_EQ('A', Scanner("0123A").Any(Scanner::DIGIT).Peek());
321 EXPECT_EQ('\0', Scanner("0123A").Any(Scanner::LETTER_DIGIT).Peek());
322 }
323
324 } // namespace strings
325 } // namespace tensorflow
326