1 // Copyright 2008 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Exhaustive testing of regular expression matching.
6 
7 // Each test picks an alphabet (e.g., "abc"), a maximum string length,
8 // a maximum regular expression length, and a maximum number of letters
9 // that can appear in the regular expression.  Given these parameters,
10 // it tries every possible regular expression and string, verifying that
11 // the NFA, DFA, and a trivial backtracking implementation agree about
12 // the location of the match.
13 
14 #include <stdio.h>
15 
16 #include "util/test.h"
17 #include "util/flags.h"
18 #include "util/logging.h"
19 #include "util/strutil.h"
20 #include "re2/testing/exhaustive_tester.h"
21 #include "re2/testing/tester.h"
22 
23 // For target `log' in the Makefile.
24 #ifndef LOGGING
25 #define LOGGING 0
26 #endif
27 
28 DEFINE_FLAG(bool, show_regexps, false, "show regexps during testing");
29 
30 DEFINE_FLAG(int, max_bad_regexp_inputs, 1,
31             "Stop testing a regular expression after finding this many "
32             "strings that break it.");
33 
34 namespace re2 {
35 
escape(const StringPiece & sp)36 static char* escape(const StringPiece& sp) {
37   static char buf[512];
38   char* p = buf;
39   *p++ = '\"';
40   for (size_t i = 0; i < sp.size(); i++) {
41     if(p+5 >= buf+sizeof buf)
42       LOG(FATAL) << "ExhaustiveTester escape: too long";
43     if(sp[i] == '\\' || sp[i] == '\"') {
44       *p++ = '\\';
45       *p++ = sp[i];
46     } else if(sp[i] == '\n') {
47       *p++ = '\\';
48       *p++ = 'n';
49     } else {
50       *p++ = sp[i];
51     }
52   }
53   *p++ = '\"';
54   *p = '\0';
55   return buf;
56 }
57 
PrintResult(const RE2 & re,const StringPiece & input,RE2::Anchor anchor,StringPiece * m,int n)58 static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {
59   if (!re.Match(input, 0, input.size(), anchor, m, n)) {
60     printf("-");
61     return;
62   }
63   for (int i = 0; i < n; i++) {
64     if (i > 0)
65       printf(" ");
66     if (m[i].data() == NULL)
67       printf("-");
68     else
69       printf("%td-%td",
70              m[i].begin() - input.begin(),
71              m[i].end() - input.begin());
72   }
73 }
74 
75 // Processes a single generated regexp.
76 // Compiles it using Regexp interface and PCRE, and then
77 // checks that NFA, DFA, and PCRE all return the same results.
HandleRegexp(const std::string & const_regexp)78 void ExhaustiveTester::HandleRegexp(const std::string& const_regexp) {
79   regexps_++;
80   std::string regexp = const_regexp;
81   if (!topwrapper_.empty()) {
82     regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
83   }
84 
85   if (GetFlag(FLAGS_show_regexps)) {
86     printf("\r%s", regexp.c_str());
87     fflush(stdout);
88   }
89 
90   if (LOGGING) {
91     // Write out test cases and answers for use in testing
92     // other implementations, such as Go's regexp package.
93     if (randomstrings_)
94       LOG(ERROR) << "Cannot log with random strings.";
95     if (regexps_ == 1) {  // first
96       printf("strings\n");
97       strgen_.Reset();
98       while (strgen_.HasNext())
99         printf("%s\n", escape(strgen_.Next()));
100       printf("regexps\n");
101     }
102     printf("%s\n", escape(regexp));
103 
104     RE2 re(regexp);
105     RE2::Options longest;
106     longest.set_longest_match(true);
107     RE2 relongest(regexp, longest);
108     int ngroup = re.NumberOfCapturingGroups()+1;
109     StringPiece* group = new StringPiece[ngroup];
110 
111     strgen_.Reset();
112     while (strgen_.HasNext()) {
113       StringPiece input = strgen_.Next();
114       PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
115       printf(";");
116       PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
117       printf(";");
118       PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
119       printf(";");
120       PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
121       printf("\n");
122     }
123     delete[] group;
124     return;
125   }
126 
127   Tester tester(regexp);
128   if (tester.error())
129     return;
130 
131   strgen_.Reset();
132   strgen_.GenerateNULL();
133   if (randomstrings_)
134     strgen_.Random(stringseed_, stringcount_);
135   int bad_inputs = 0;
136   while (strgen_.HasNext()) {
137     tests_++;
138     if (!tester.TestInput(strgen_.Next())) {
139       failures_++;
140       if (++bad_inputs >= GetFlag(FLAGS_max_bad_regexp_inputs))
141         break;
142     }
143   }
144 }
145 
146 // Runs an exhaustive test on the given parameters.
ExhaustiveTest(int maxatoms,int maxops,const std::vector<std::string> & alphabet,const std::vector<std::string> & ops,int maxstrlen,const std::vector<std::string> & stralphabet,const std::string & wrapper,const std::string & topwrapper)147 void ExhaustiveTest(int maxatoms, int maxops,
148                     const std::vector<std::string>& alphabet,
149                     const std::vector<std::string>& ops,
150                     int maxstrlen,
151                     const std::vector<std::string>& stralphabet,
152                     const std::string& wrapper,
153                     const std::string& topwrapper) {
154   if (RE2_DEBUG_MODE) {
155     if (maxatoms > 1)
156       maxatoms--;
157     if (maxops > 1)
158       maxops--;
159     if (maxstrlen > 1)
160       maxstrlen--;
161   }
162   ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
163                      maxstrlen, stralphabet, wrapper,
164                      topwrapper);
165   t.Generate();
166   if (!LOGGING) {
167     printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
168            t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
169   }
170   EXPECT_EQ(0, t.failures());
171 }
172 
173 // Runs an exhaustive test using the given parameters and
174 // the basic egrep operators.
EgrepTest(int maxatoms,int maxops,const std::string & alphabet,int maxstrlen,const std::string & stralphabet,const std::string & wrapper)175 void EgrepTest(int maxatoms, int maxops, const std::string& alphabet,
176                int maxstrlen, const std::string& stralphabet,
177                const std::string& wrapper) {
178   const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
179 
180   for (size_t i = 0; i < arraysize(tops); i++) {
181     ExhaustiveTest(maxatoms, maxops,
182                    Split("", alphabet),
183                    RegexpGenerator::EgrepOps(),
184                    maxstrlen,
185                    Split("", stralphabet),
186                    wrapper,
187                    tops[i]);
188   }
189 }
190 
191 }  // namespace re2
192