1 // Copyright 2008 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Comparative tester for regular expression matching.
6 // Checks all implementations against each other.
7 
8 #ifndef RE2_TESTING_TESTER_H__
9 #define RE2_TESTING_TESTER_H__
10 
11 #include "re2/stringpiece.h"
12 #include "re2/prog.h"
13 #include "re2/regexp.h"
14 #include "re2/re2.h"
15 #include "util/pcre.h"
16 
17 namespace re2 {
18 
19 class Regexp;
20 
21 // All the supported regexp engines.
22 enum Engine {
23   kEngineBacktrack = 0,    // Prog::BadSearchBacktrack
24   kEngineNFA,              // Prog::SearchNFA
25   kEngineDFA,              // Prog::SearchDFA, only ask whether it matched
26   kEngineDFA1,             // Prog::SearchDFA, ask for match[0]
27   kEngineOnePass,          // Prog::SearchOnePass, if applicable
28   kEngineBitState,         // Prog::SearchBitState
29   kEngineRE2,              // RE2, all submatches
30   kEngineRE2a,             // RE2, only ask for match[0]
31   kEngineRE2b,             // RE2, only ask whether it matched
32   kEnginePCRE,             // PCRE (util/pcre.h)
33 
34   kEngineMax,
35 };
36 
37 // Make normal math on the enum preserve the type.
38 // By default, C++ doesn't define ++ on enum, and e+1 has type int.
39 static inline void operator++(Engine& e, int unused) {
40   e = static_cast<Engine>(e+1);
41 }
42 
43 static inline Engine operator+(Engine e, int i) {
44   return static_cast<Engine>(static_cast<int>(e)+i);
45 }
46 
47 // A TestInstance caches per-regexp state for a given
48 // regular expression in a given configuration
49 // (UTF-8 vs Latin1, longest vs first match, etc.).
50 class TestInstance {
51  public:
52   struct Result;
53 
54   TestInstance(const StringPiece& regexp, Prog::MatchKind kind,
55                Regexp::ParseFlags flags);
56   ~TestInstance();
flags()57   Regexp::ParseFlags flags() { return flags_; }
error()58   bool error() { return error_; }
59 
60   // Runs a single test case: search in text, which is in context,
61   // using the given anchoring.
62   bool RunCase(const StringPiece& text, const StringPiece& context,
63                Prog::Anchor anchor);
64 
65  private:
66   // Runs a single search using the named engine type.
67   void RunSearch(Engine type,
68                  const StringPiece& text, const StringPiece& context,
69                  Prog::Anchor anchor,
70                  Result *result);
71 
72   void LogMatch(const char* prefix, Engine e, const StringPiece& text,
73                 const StringPiece& context, Prog::Anchor anchor);
74 
75   const StringPiece& regexp_str_;   // regexp being tested
76   Prog::MatchKind kind_;            // kind of match
77   Regexp::ParseFlags flags_;        // flags for parsing regexp_str_
78   bool error_;                      // error during constructor?
79 
80   Regexp* regexp_;                  // parsed regexp
81   int num_captures_;                // regexp_->NumCaptures() cached
82   Prog* prog_;                      // compiled program
83   Prog* rprog_;                     // compiled reverse program
84   PCRE* re_;                        // PCRE implementation
85   RE2* re2_;                        // RE2 implementation
86 
87   DISALLOW_EVIL_CONSTRUCTORS(TestInstance);
88 };
89 
90 // A group of TestInstances for all possible configurations.
91 class Tester {
92  public:
93   explicit Tester(const StringPiece& regexp);
94   ~Tester();
95 
error()96   bool error() { return error_; }
97 
98   // Runs a single test case: search in text, which is in context,
99   // using the given anchoring.
100   bool TestCase(const StringPiece& text, const StringPiece& context,
101                 Prog::Anchor anchor);
102 
103   // Run TestCase(text, text, anchor) for all anchoring modes.
104   bool TestInput(const StringPiece& text);
105 
106   // Run TestCase(text, context, anchor) for all anchoring modes.
107   bool TestInputInContext(const StringPiece& text, const StringPiece& context);
108 
109  private:
110   bool error_;
111   vector<TestInstance*> v_;
112 
113   DISALLOW_EVIL_CONSTRUCTORS(Tester);
114 };
115 
116 // Run all possible tests using regexp and text.
117 bool TestRegexpOnText(const StringPiece& regexp, const StringPiece& text);
118 
119 }  // namespace re2
120 
121 #endif  // RE2_TESTING_TESTER_H__
122