1 // Copyright 2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Exhaustive testing of regular expression matching.
6
7 #include "util/test.h"
8 #include "re2/testing/exhaustive_tester.h"
9
10 namespace re2 {
11
12 // Test simple character classes by themselves.
TEST(CharacterClasses,Exhaustive)13 TEST(CharacterClasses, Exhaustive) {
14 vector<string> atoms = Split(" ",
15 "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b .");
16 ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(),
17 5, Explode("ab"), "", "");
18 }
19
20 // Test simple character classes inside a___b (for example, a[a]b).
TEST(CharacterClasses,ExhaustiveAB)21 TEST(CharacterClasses, ExhaustiveAB) {
22 vector<string> atoms = Split(" ",
23 "[a] [b] [ab] [^bc] [b-d] [^b-d] []a] [-a] [a-] [^-a] [a-b-c] a b .");
24 ExhaustiveTest(2, 1, atoms, RegexpGenerator::EgrepOps(),
25 5, Explode("ab"), "a%sb", "");
26 }
27
28 // Returns UTF8 for Rune r
UTF8(Rune r)29 static string UTF8(Rune r) {
30 char buf[UTFmax+1];
31 buf[runetochar(buf, &r)] = 0;
32 return string(buf);
33 }
34
35 // Returns a vector of "interesting" UTF8 characters.
36 // Unicode is now too big to just return all of them,
37 // so UTF8Characters return a set likely to be good test cases.
InterestingUTF8()38 static const vector<string>& InterestingUTF8() {
39 static bool init;
40 static vector<string> v;
41
42 if (init)
43 return v;
44
45 init = true;
46 // All the Latin1 equivalents are interesting.
47 for (int i = 1; i < 256; i++)
48 v.push_back(UTF8(i));
49
50 // After that, the codes near bit boundaries are
51 // interesting, because they span byte sequence lengths.
52 for (int j = 0; j < 8; j++)
53 v.push_back(UTF8(256 + j));
54 for (int i = 512; i < Runemax; i <<= 1)
55 for (int j = -8; j < 8; j++)
56 v.push_back(UTF8(i + j));
57
58 // The codes near Runemax, including Runemax itself, are interesting.
59 for (int j = -8; j <= 0; j++)
60 v.push_back(UTF8(Runemax + j));
61
62 return v;
63 }
64
65 // Test interesting UTF-8 characters against character classes.
TEST(InterestingUTF8,SingleOps)66 TEST(InterestingUTF8, SingleOps) {
67 vector<string> atoms = Split(" ",
68 ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B "
69 "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] "
70 "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] "
71 "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]");
72 vector<string> ops; // no ops
73 ExhaustiveTest(1, 0, atoms, ops,
74 1, InterestingUTF8(), "", "");
75 }
76
77 // Test interesting UTF-8 characters against character classes,
78 // but wrap everything inside AB.
TEST(InterestingUTF8,AB)79 TEST(InterestingUTF8, AB) {
80 vector<string> atoms = Split(" ",
81 ". ^ $ \\a \\f \\n \\r \\t \\v \\d \\D \\s \\S \\w \\W \\b \\B "
82 "[[:alnum:]] [[:alpha:]] [[:blank:]] [[:cntrl:]] [[:digit:]] "
83 "[[:graph:]] [[:lower:]] [[:print:]] [[:punct:]] [[:space:]] "
84 "[[:upper:]] [[:xdigit:]] [\\s\\S] [\\d\\D] [^\\w\\W] [^\\d\\D]");
85 vector<string> ops; // no ops
86 vector<string> alpha = InterestingUTF8();
87 for (int i = 0; i < alpha.size(); i++)
88 alpha[i] = "a" + alpha[i] + "b";
89 ExhaustiveTest(1, 0, atoms, ops,
90 1, alpha, "a%sb", "");
91 }
92
93 } // namespace re2
94
95