1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/lib/strings/scanner.h"
17 
18 #include "tensorflow/core/platform/test.h"
19 
20 namespace tensorflow {
21 namespace strings {
22 
23 class ScannerTest : public ::testing::Test {
24  protected:
25   // Returns a string with all chars that are in <clz>, in byte value order.
ClassStr(Scanner::CharClass clz)26   string ClassStr(Scanner::CharClass clz) {
27     string s;
28     for (int i = 0; i < 256; ++i) {
29       char ch = i;
30       if (Scanner::Matches(clz, ch)) {
31         s += ch;
32       }
33     }
34     return s;
35   }
36 };
37 
TEST_F(ScannerTest,Any)38 TEST_F(ScannerTest, Any) {
39   StringPiece remaining, match;
40   EXPECT_TRUE(Scanner("   horse0123")
41                   .Any(Scanner::SPACE)
42                   .Any(Scanner::DIGIT)
43                   .Any(Scanner::LETTER)
44                   .GetResult(&remaining, &match));
45   EXPECT_EQ("   horse", match);
46   EXPECT_EQ("0123", remaining);
47 
48   EXPECT_TRUE(Scanner("")
49                   .Any(Scanner::SPACE)
50                   .Any(Scanner::DIGIT)
51                   .Any(Scanner::LETTER)
52                   .GetResult(&remaining, &match));
53   EXPECT_EQ("", remaining);
54   EXPECT_EQ("", match);
55 
56   EXPECT_TRUE(Scanner("----")
57                   .Any(Scanner::SPACE)
58                   .Any(Scanner::DIGIT)
59                   .Any(Scanner::LETTER)
60                   .GetResult(&remaining, &match));
61   EXPECT_EQ("----", remaining);
62   EXPECT_EQ("", match);
63 }
64 
TEST_F(ScannerTest,AnySpace)65 TEST_F(ScannerTest, AnySpace) {
66   StringPiece remaining, match;
67   EXPECT_TRUE(Scanner("  a b ")
68                   .AnySpace()
69                   .One(Scanner::LETTER)
70                   .AnySpace()
71                   .GetResult(&remaining, &match));
72   EXPECT_EQ("  a ", match);
73   EXPECT_EQ("b ", remaining);
74 }
75 
TEST_F(ScannerTest,AnyEscapedNewline)76 TEST_F(ScannerTest, AnyEscapedNewline) {
77   StringPiece remaining, match;
78   EXPECT_TRUE(Scanner("\\\n")
79                   .Any(Scanner::LETTER_DIGIT_UNDERSCORE)
80                   .GetResult(&remaining, &match));
81   EXPECT_EQ("\\\n", remaining);
82   EXPECT_EQ("", match);
83 }
84 
TEST_F(ScannerTest,AnyEmptyString)85 TEST_F(ScannerTest, AnyEmptyString) {
86   StringPiece remaining, match;
87   EXPECT_TRUE(Scanner("")
88                   .Any(Scanner::LETTER_DIGIT_UNDERSCORE)
89                   .GetResult(&remaining, &match));
90   EXPECT_EQ("", remaining);
91   EXPECT_EQ("", match);
92 }
93 
TEST_F(ScannerTest,Eos)94 TEST_F(ScannerTest, Eos) {
95   EXPECT_FALSE(Scanner("a").Eos().GetResult());
96   EXPECT_TRUE(Scanner("").Eos().GetResult());
97   EXPECT_FALSE(Scanner("abc").OneLiteral("ab").Eos().GetResult());
98   EXPECT_TRUE(Scanner("abc").OneLiteral("abc").Eos().GetResult());
99 }
100 
TEST_F(ScannerTest,Many)101 TEST_F(ScannerTest, Many) {
102   StringPiece remaining, match;
103   EXPECT_TRUE(Scanner("abc").Many(Scanner::LETTER).GetResult());
104   EXPECT_FALSE(Scanner("0").Many(Scanner::LETTER).GetResult());
105   EXPECT_FALSE(Scanner("").Many(Scanner::LETTER).GetResult());
106 
107   EXPECT_TRUE(
108       Scanner("abc ").Many(Scanner::LETTER).GetResult(&remaining, &match));
109   EXPECT_EQ(" ", remaining);
110   EXPECT_EQ("abc", match);
111   EXPECT_TRUE(
112       Scanner("abc").Many(Scanner::LETTER).GetResult(&remaining, &match));
113   EXPECT_EQ("", remaining);
114   EXPECT_EQ("abc", match);
115 }
116 
TEST_F(ScannerTest,One)117 TEST_F(ScannerTest, One) {
118   StringPiece remaining, match;
119   EXPECT_TRUE(Scanner("abc").One(Scanner::LETTER).GetResult());
120   EXPECT_FALSE(Scanner("0").One(Scanner::LETTER).GetResult());
121   EXPECT_FALSE(Scanner("").One(Scanner::LETTER).GetResult());
122 
123   EXPECT_TRUE(Scanner("abc")
124                   .One(Scanner::LETTER)
125                   .One(Scanner::LETTER)
126                   .GetResult(&remaining, &match));
127   EXPECT_EQ("c", remaining);
128   EXPECT_EQ("ab", match);
129   EXPECT_TRUE(Scanner("a").One(Scanner::LETTER).GetResult(&remaining, &match));
130   EXPECT_EQ("", remaining);
131   EXPECT_EQ("a", match);
132 }
133 
TEST_F(ScannerTest,OneLiteral)134 TEST_F(ScannerTest, OneLiteral) {
135   EXPECT_FALSE(Scanner("abc").OneLiteral("abC").GetResult());
136   EXPECT_TRUE(Scanner("abc").OneLiteral("ab").OneLiteral("c").GetResult());
137 }
138 
TEST_F(ScannerTest,ScanUntil)139 TEST_F(ScannerTest, ScanUntil) {
140   StringPiece remaining, match;
141   EXPECT_TRUE(Scanner(R"(' \1 \2 \3 \' \\'rest)")
142                   .OneLiteral("'")
143                   .ScanUntil('\'')
144                   .OneLiteral("'")
145                   .GetResult(&remaining, &match));
146   EXPECT_EQ(R"( \\'rest)", remaining);
147   EXPECT_EQ(R"(' \1 \2 \3 \')", match);
148 
149   // The "scan until" character is not present.
150   remaining = match = "unset";
151   EXPECT_FALSE(Scanner(R"(' \1 \2 \3 \\rest)")
152                    .OneLiteral("'")
153                    .ScanUntil('\'')
154                    .GetResult(&remaining, &match));
155   EXPECT_EQ("unset", remaining);
156   EXPECT_EQ("unset", match);
157 
158   // Scan until an escape character.
159   remaining = match = "";
160   EXPECT_TRUE(
161       Scanner(R"(123\456)").ScanUntil('\\').GetResult(&remaining, &match));
162   EXPECT_EQ(R"(\456)", remaining);
163   EXPECT_EQ("123", match);
164 }
165 
TEST_F(ScannerTest,ScanEscapedUntil)166 TEST_F(ScannerTest, ScanEscapedUntil) {
167   StringPiece remaining, match;
168   EXPECT_TRUE(Scanner(R"(' \1 \2 \3 \' \\'rest)")
169                   .OneLiteral("'")
170                   .ScanEscapedUntil('\'')
171                   .OneLiteral("'")
172                   .GetResult(&remaining, &match));
173   EXPECT_EQ("rest", remaining);
174   EXPECT_EQ(R"(' \1 \2 \3 \' \\')", match);
175 
176   // The "scan until" character is not present.
177   remaining = match = "unset";
178   EXPECT_FALSE(Scanner(R"(' \1 \2 \3 \' \\rest)")
179                    .OneLiteral("'")
180                    .ScanEscapedUntil('\'')
181                    .GetResult(&remaining, &match));
182   EXPECT_EQ("unset", remaining);
183   EXPECT_EQ("unset", match);
184 }
185 
TEST_F(ScannerTest,ZeroOrOneLiteral)186 TEST_F(ScannerTest, ZeroOrOneLiteral) {
187   StringPiece remaining, match;
188   EXPECT_TRUE(
189       Scanner("abc").ZeroOrOneLiteral("abC").GetResult(&remaining, &match));
190   EXPECT_EQ("abc", remaining);
191   EXPECT_EQ("", match);
192 
193   EXPECT_TRUE(
194       Scanner("abcd").ZeroOrOneLiteral("ab").ZeroOrOneLiteral("c").GetResult(
195           &remaining, &match));
196   EXPECT_EQ("d", remaining);
197   EXPECT_EQ("abc", match);
198 
199   EXPECT_TRUE(
200       Scanner("").ZeroOrOneLiteral("abc").GetResult(&remaining, &match));
201   EXPECT_EQ("", remaining);
202   EXPECT_EQ("", match);
203 }
204 
205 // Test output of GetResult (including the forms with optional params),
206 // and that it can be called multiple times.
TEST_F(ScannerTest,CaptureAndGetResult)207 TEST_F(ScannerTest, CaptureAndGetResult) {
208   StringPiece remaining, match;
209 
210   Scanner scan("  first    second");
211   EXPECT_TRUE(scan.Any(Scanner::SPACE)
212                   .RestartCapture()
213                   .One(Scanner::LETTER)
214                   .Any(Scanner::LETTER_DIGIT)
215                   .StopCapture()
216                   .Any(Scanner::SPACE)
217                   .GetResult(&remaining, &match));
218   EXPECT_EQ("second", remaining);
219   EXPECT_EQ("first", match);
220   EXPECT_TRUE(scan.GetResult());
221   remaining = "";
222   EXPECT_TRUE(scan.GetResult(&remaining));
223   EXPECT_EQ("second", remaining);
224   remaining = "";
225   match = "";
226   EXPECT_TRUE(scan.GetResult(&remaining, &match));
227   EXPECT_EQ("second", remaining);
228   EXPECT_EQ("first", match);
229 
230   scan.RestartCapture().One(Scanner::LETTER).One(Scanner::LETTER);
231   remaining = "";
232   match = "";
233   EXPECT_TRUE(scan.GetResult(&remaining, &match));
234   EXPECT_EQ("cond", remaining);
235   EXPECT_EQ("se", match);
236 }
237 
238 // Tests that if StopCapture is not called, then calling GetResult, then
239 // scanning more, then GetResult again will update the capture.
TEST_F(ScannerTest,MultipleGetResultExtendsCapture)240 TEST_F(ScannerTest, MultipleGetResultExtendsCapture) {
241   StringPiece remaining, match;
242 
243   Scanner scan("one2three");
244   EXPECT_TRUE(scan.Many(Scanner::LETTER).GetResult(&remaining, &match));
245   EXPECT_EQ("2three", remaining);
246   EXPECT_EQ("one", match);
247   EXPECT_TRUE(scan.Many(Scanner::DIGIT).GetResult(&remaining, &match));
248   EXPECT_EQ("three", remaining);
249   EXPECT_EQ("one2", match);
250   EXPECT_TRUE(scan.Many(Scanner::LETTER).GetResult(&remaining, &match));
251   EXPECT_EQ("", remaining);
252   EXPECT_EQ("one2three", match);
253 }
254 
TEST_F(ScannerTest,FailedMatchDoesntChangeResult)255 TEST_F(ScannerTest, FailedMatchDoesntChangeResult) {
256   // A failed match doesn't change pointers passed to GetResult.
257   Scanner scan("name");
258   StringPiece remaining = "rem";
259   StringPiece match = "match";
260   EXPECT_FALSE(scan.One(Scanner::SPACE).GetResult(&remaining, &match));
261   EXPECT_EQ("rem", remaining);
262   EXPECT_EQ("match", match);
263 }
264 
TEST_F(ScannerTest,DefaultCapturesAll)265 TEST_F(ScannerTest, DefaultCapturesAll) {
266   // If RestartCapture() is not called, the whole string is used.
267   Scanner scan("a b");
268   StringPiece remaining = "rem";
269   StringPiece match = "match";
270   EXPECT_TRUE(scan.Any(Scanner::LETTER)
271                   .AnySpace()
272                   .Any(Scanner::LETTER)
273                   .GetResult(&remaining, &match));
274   EXPECT_EQ("", remaining);
275   EXPECT_EQ("a b", match);
276 }
277 
TEST_F(ScannerTest,AllCharClasses)278 TEST_F(ScannerTest, AllCharClasses) {
279   EXPECT_EQ(256, ClassStr(Scanner::ALL).size());
280   EXPECT_EQ("0123456789", ClassStr(Scanner::DIGIT));
281   EXPECT_EQ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
282             ClassStr(Scanner::LETTER));
283   EXPECT_EQ("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
284             ClassStr(Scanner::LETTER_DIGIT));
285   EXPECT_EQ(
286       "-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_"
287       "abcdefghijklmnopqrstuvwxyz",
288       ClassStr(Scanner::LETTER_DIGIT_DASH_UNDERSCORE));
289   EXPECT_EQ(
290       "-./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
291       "abcdefghijklmnopqrstuvwxyz",
292       ClassStr(Scanner::LETTER_DIGIT_DASH_DOT_SLASH));
293   EXPECT_EQ(
294       "-./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_"
295       "abcdefghijklmnopqrstuvwxyz",
296       ClassStr(Scanner::LETTER_DIGIT_DASH_DOT_SLASH_UNDERSCORE));
297   EXPECT_EQ(".0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
298             ClassStr(Scanner::LETTER_DIGIT_DOT));
299   EXPECT_EQ("+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
300             ClassStr(Scanner::LETTER_DIGIT_DOT_PLUS_MINUS));
301   EXPECT_EQ(".0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz",
302             ClassStr(Scanner::LETTER_DIGIT_DOT_UNDERSCORE));
303   EXPECT_EQ("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz",
304             ClassStr(Scanner::LETTER_DIGIT_UNDERSCORE));
305   EXPECT_EQ("abcdefghijklmnopqrstuvwxyz", ClassStr(Scanner::LOWERLETTER));
306   EXPECT_EQ("0123456789abcdefghijklmnopqrstuvwxyz",
307             ClassStr(Scanner::LOWERLETTER_DIGIT));
308   EXPECT_EQ("0123456789_abcdefghijklmnopqrstuvwxyz",
309             ClassStr(Scanner::LOWERLETTER_DIGIT_UNDERSCORE));
310   EXPECT_EQ("123456789", ClassStr(Scanner::NON_ZERO_DIGIT));
311   EXPECT_EQ("\t\n\v\f\r ", ClassStr(Scanner::SPACE));
312   EXPECT_EQ("ABCDEFGHIJKLMNOPQRSTUVWXYZ", ClassStr(Scanner::UPPERLETTER));
313 }
314 
TEST_F(ScannerTest,Peek)315 TEST_F(ScannerTest, Peek) {
316   EXPECT_EQ('a', Scanner("abc").Peek());
317   EXPECT_EQ('a', Scanner("abc").Peek('b'));
318   EXPECT_EQ('\0', Scanner("").Peek());
319   EXPECT_EQ('z', Scanner("").Peek('z'));
320   EXPECT_EQ('A', Scanner("0123A").Any(Scanner::DIGIT).Peek());
321   EXPECT_EQ('\0', Scanner("0123A").Any(Scanner::LETTER_DIGIT).Peek());
322 }
323 
324 }  // namespace strings
325 }  // namespace tensorflow
326