1 // Copyright 2008 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Regular expression generator: generates all possible
6 // regular expressions within given parameters (see below for details).
7 
8 #ifndef RE2_TESTING_REGEXP_GENERATOR_H__
9 #define RE2_TESTING_REGEXP_GENERATOR_H__
10 
11 #include <string>
12 #include <vector>
13 #include "util/random.h"
14 #include "util/util.h"
15 #include "re2/stringpiece.h"
16 
17 namespace re2 {
18 
19 // Regular expression generator.
20 //
21 // Given a set of atom expressions like "a", "b", or "."
22 // and operators like "%s*", generates all possible regular expressions
23 // using at most maxbases base expressions and maxops operators.
24 // For each such expression re, calls HandleRegexp(re).
25 //
26 // Callers are expected to subclass RegexpGenerator and provide HandleRegexp.
27 //
28 class RegexpGenerator {
29  public:
30   RegexpGenerator(int maxatoms, int maxops, const vector<string>& atoms,
31                   const vector<string>& ops);
~RegexpGenerator()32   virtual ~RegexpGenerator() {}
33 
34   // Generates all the regular expressions, calling HandleRegexp(re) for each.
35   void Generate();
36 
37   // Generates n random regular expressions, calling HandleRegexp(re) for each.
38   void GenerateRandom(int32 seed, int n);
39 
40   // Handles a regular expression.  Must be provided by subclass.
41   virtual void HandleRegexp(const string& regexp) = 0;
42 
43   // The egrep regexp operators: * + ? | and concatenation.
44   static const vector<string>& EgrepOps();
45 
46  private:
47   void RunPostfix(const vector<string>& post);
48   void GeneratePostfix(vector<string>* post, int nstk, int ops, int lits);
49   bool GenerateRandomPostfix(vector<string>* post, int nstk, int ops, int lits);
50 
51   int maxatoms_;           // Maximum number of atoms allowed in expr.
52   int maxops_;             // Maximum number of ops allowed in expr.
53   vector<string> atoms_;   // Possible atoms.
54   vector<string> ops_;     // Possible ops.
55   ACMRandom* acm_;         // Random generator.
56   DISALLOW_EVIL_CONSTRUCTORS(RegexpGenerator);
57 };
58 
59 // Helpers for preparing arguments to RegexpGenerator constructor.
60 
61 // Returns one string for each character in s.
62 vector<string> Explode(const StringPiece& s);
63 
64 // Splits string everywhere sep is found, returning
65 // vector of pieces.
66 vector<string> Split(const StringPiece& sep, const StringPiece& s);
67 
68 }  // namespace re2
69 
70 #endif  // RE2_TESTING_REGEXP_GENERATOR_H__
71