1 // Copyright 2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "util/test.h"
6 #include "util/logging.h"
7 #include "re2/prog.h"
8 #include "re2/regexp.h"
9
10 namespace re2 {
11
12 struct PCRETest {
13 const char* regexp;
14 bool should_match;
15 };
16
17 static PCRETest tests[] = {
18 // Most things should behave exactly.
19 { "abc", true },
20 { "(a|b)c", true },
21 { "(a*|b)c", true },
22 { "(a|b*)c", true },
23 { "a(b|c)d", true },
24 { "a(()|())c", true },
25 { "ab*c", true },
26 { "ab+c", true },
27 { "a(b*|c*)d", true },
28 { "\\W", true },
29 { "\\W{1,2}", true },
30 { "\\d", true },
31
32 // Check that repeated empty strings do not.
33 { "(a*)*", false },
34 { "x(a*)*y", false },
35 { "(a*)+", false },
36 { "(a+)*", true },
37 { "(a+)+", true },
38 { "(a+)+", true },
39
40 // \v is the only character class that shouldn't.
41 { "\\b", true },
42 { "\\v", false },
43 { "\\d", true },
44
45 // The handling of ^ in multi-line mode is different, as is
46 // the handling of $ in single-line mode. (Both involve
47 // boundary cases if the string ends with \n.)
48 { "\\A", true },
49 { "\\z", true },
50 { "(?m)^", false },
51 { "(?m)$", true },
52 { "(?-m)^", true },
53 { "(?-m)$", false }, // In PCRE, == \Z
54 { "(?m)\\A", true },
55 { "(?m)\\z", true },
56 { "(?-m)\\A", true },
57 { "(?-m)\\z", true },
58 };
59
TEST(MimicsPCRE,SimpleTests)60 TEST(MimicsPCRE, SimpleTests) {
61 for (size_t i = 0; i < arraysize(tests); i++) {
62 const PCRETest& t = tests[i];
63 for (size_t j = 0; j < 2; j++) {
64 Regexp::ParseFlags flags = Regexp::LikePerl;
65 if (j == 0)
66 flags = flags | Regexp::Latin1;
67 Regexp* re = Regexp::Parse(t.regexp, flags, NULL);
68 ASSERT_TRUE(re != NULL) << " " << t.regexp;
69 ASSERT_EQ(t.should_match, re->MimicsPCRE())
70 << " " << t.regexp << " "
71 << (j == 0 ? "latin1" : "utf");
72 re->Decref();
73 }
74 }
75 }
76
77 } // namespace re2
78