1 // Copyright 2008 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #ifndef RE2_TESTING_REGEXP_GENERATOR_H_
6 #define RE2_TESTING_REGEXP_GENERATOR_H_
7 
8 // Regular expression generator: generates all possible
9 // regular expressions within given parameters (see below for details).
10 
11 #include <stdint.h>
12 #include <random>
13 #include <string>
14 #include <vector>
15 
16 #include "util/util.h"
17 #include "re2/stringpiece.h"
18 
19 namespace re2 {
20 
21 // Regular expression generator.
22 //
23 // Given a set of atom expressions like "a", "b", or "."
24 // and operators like "%s*", generates all possible regular expressions
25 // using at most maxbases base expressions and maxops operators.
26 // For each such expression re, calls HandleRegexp(re).
27 //
28 // Callers are expected to subclass RegexpGenerator and provide HandleRegexp.
29 //
30 class RegexpGenerator {
31  public:
32   RegexpGenerator(int maxatoms, int maxops,
33                   const std::vector<std::string>& atoms,
34                   const std::vector<std::string>& ops);
~RegexpGenerator()35   virtual ~RegexpGenerator() {}
36 
37   // Generates all the regular expressions, calling HandleRegexp(re) for each.
38   void Generate();
39 
40   // Generates n random regular expressions, calling HandleRegexp(re) for each.
41   void GenerateRandom(int32_t seed, int n);
42 
43   // Handles a regular expression.  Must be provided by subclass.
44   virtual void HandleRegexp(const std::string& regexp) = 0;
45 
46   // The egrep regexp operators: * + ? | and concatenation.
47   static const std::vector<std::string>& EgrepOps();
48 
49  private:
50   void RunPostfix(const std::vector<std::string>& post);
51   void GeneratePostfix(std::vector<std::string>* post,
52                        int nstk, int ops, int lits);
53   bool GenerateRandomPostfix(std::vector<std::string>* post,
54                              int nstk, int ops, int lits);
55 
56   int maxatoms_;                    // Maximum number of atoms allowed in expr.
57   int maxops_;                      // Maximum number of ops allowed in expr.
58   std::vector<std::string> atoms_;  // Possible atoms.
59   std::vector<std::string> ops_;    // Possible ops.
60   std::minstd_rand0 rng_;           // Random number generator.
61 
62   RegexpGenerator(const RegexpGenerator&) = delete;
63   RegexpGenerator& operator=(const RegexpGenerator&) = delete;
64 };
65 
66 // Helpers for preparing arguments to RegexpGenerator constructor.
67 
68 // Returns one string for each character in s.
69 std::vector<std::string> Explode(const StringPiece& s);
70 
71 // Splits string everywhere sep is found, returning
72 // vector of pieces.
73 std::vector<std::string> Split(const StringPiece& sep, const StringPiece& s);
74 
75 }  // namespace re2
76 
77 #endif  // RE2_TESTING_REGEXP_GENERATOR_H_
78