1 // Copyright 2008 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #ifndef RE2_TESTING_STRING_GENERATOR_H_
6 #define RE2_TESTING_STRING_GENERATOR_H_
7 
8 // String generator: generates all possible strings of up to
9 // maxlen letters using the set of letters in alpha.
10 // Fetch strings using a Java-like Next()/HasNext() interface.
11 
12 #include <stdint.h>
13 #include <random>
14 #include <string>
15 #include <vector>
16 
17 #include "util/util.h"
18 #include "re2/stringpiece.h"
19 
20 namespace re2 {
21 
22 class StringGenerator {
23  public:
24   StringGenerator(int maxlen, const std::vector<std::string>& alphabet);
~StringGenerator()25   ~StringGenerator() {}
26 
27   const StringPiece& Next();
HasNext()28   bool HasNext() { return hasnext_; }
29 
30   // Resets generator to start sequence over.
31   void Reset();
32 
33   // Causes generator to emit random strings for next n calls to Next().
34   void Random(int32_t seed, int n);
35 
36   // Causes generator to emit a NULL as the next call.
37   void GenerateNULL();
38 
39  private:
40   bool IncrementDigits();
41   bool RandomDigits();
42 
43   // Global state.
44   int maxlen_;                         // Maximum length string to generate.
45   std::vector<std::string> alphabet_;  // Alphabet, one string per letter.
46 
47   // Iteration state.
48   StringPiece sp_;           // Last StringPiece returned by Next().
49   std::string s_;            // String data in last StringPiece returned by Next().
50   bool hasnext_;             // Whether Next() can be called again.
51   std::vector<int> digits_;  // Alphabet indices for next string.
52   bool generate_null_;       // Whether to generate a NULL StringPiece next.
53   bool random_;              // Whether generated strings are random.
54   int nrandom_;              // Number of random strings left to generate.
55   std::minstd_rand0 rng_;    // Random number generator.
56 
57   StringGenerator(const StringGenerator&) = delete;
58   StringGenerator& operator=(const StringGenerator&) = delete;
59 };
60 
61 // Generates and returns a string over binary alphabet {0,1} that contains
62 // all possible binary sequences of length n as subsequences.  The obvious
63 // brute force method would generate a string of length n * 2^n, but this
64 // generates a string of length n-1 + 2^n called a De Bruijn cycle.
65 // See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
66 //
67 // Such a string is useful for testing a DFA.  If you have a DFA
68 // where distinct last n bytes implies distinct states, then running on a
69 // DeBruijn string causes the DFA to need to create a new state at every
70 // position in the input, never reusing any states until it gets to the
71 // end of the string.  This is the worst possible case for DFA execution.
72 std::string DeBruijnString(int n);
73 
74 }  // namespace re2
75 
76 #endif  // RE2_TESTING_STRING_GENERATOR_H_
77