1 // Copyright 2006 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Dump the regexp into a string showing structure.
6 // Tested by parse_unittest.cc
7 
8 // This function traverses the regexp recursively,
9 // meaning that on inputs like Regexp::Simplify of
10 // a{100}{100}{100}{100}{100}{100}{100}{100}{100}{100},
11 // it takes time and space exponential in the size of the
12 // original regular expression.  It can also use stack space
13 // linear in the size of the regular expression for inputs
14 // like ((((((((((((((((a*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*)*.
15 // IT IS NOT SAFE TO CALL FROM PRODUCTION CODE.
16 // As a result, Dump is provided only in the testing
17 // library (see BUILD).
18 
19 #include <string>
20 
21 #include "util/test.h"
22 #include "util/logging.h"
23 #include "util/strutil.h"
24 #include "util/utf.h"
25 #include "re2/stringpiece.h"
26 #include "re2/regexp.h"
27 
28 namespace re2 {
29 
30 static const char* kOpcodeNames[] = {
31   "bad",
32   "no",
33   "emp",
34   "lit",
35   "str",
36   "cat",
37   "alt",
38   "star",
39   "plus",
40   "que",
41   "rep",
42   "cap",
43   "dot",
44   "byte",
45   "bol",
46   "eol",
47   "wb",   // kRegexpWordBoundary
48   "nwb",  // kRegexpNoWordBoundary
49   "bot",
50   "eot",
51   "cc",
52   "match",
53 };
54 
55 // Create string representation of regexp with explicit structure.
56 // Nothing pretty, just for testing.
DumpRegexpAppending(Regexp * re,std::string * s)57 static void DumpRegexpAppending(Regexp* re, std::string* s) {
58   if (re->op() < 0 || re->op() >= arraysize(kOpcodeNames)) {
59     *s += StringPrintf("op%d", re->op());
60   } else {
61     switch (re->op()) {
62       default:
63         break;
64       case kRegexpStar:
65       case kRegexpPlus:
66       case kRegexpQuest:
67       case kRegexpRepeat:
68         if (re->parse_flags() & Regexp::NonGreedy)
69           s->append("n");
70         break;
71     }
72     s->append(kOpcodeNames[re->op()]);
73     if (re->op() == kRegexpLiteral && (re->parse_flags() & Regexp::FoldCase)) {
74       Rune r = re->rune();
75       if ('a' <= r && r <= 'z')
76         s->append("fold");
77     }
78     if (re->op() == kRegexpLiteralString && (re->parse_flags() & Regexp::FoldCase)) {
79       for (int i = 0; i < re->nrunes(); i++) {
80         Rune r = re->runes()[i];
81         if ('a' <= r && r <= 'z') {
82           s->append("fold");
83           break;
84         }
85       }
86     }
87   }
88   s->append("{");
89   switch (re->op()) {
90     default:
91       break;
92     case kRegexpEndText:
93       if (!(re->parse_flags() & Regexp::WasDollar)) {
94         s->append("\\z");
95       }
96       break;
97     case kRegexpLiteral: {
98       Rune r = re->rune();
99       char buf[UTFmax+1];
100       buf[runetochar(buf, &r)] = 0;
101       s->append(buf);
102       break;
103     }
104     case kRegexpLiteralString:
105       for (int i = 0; i < re->nrunes(); i++) {
106         Rune r = re->runes()[i];
107         char buf[UTFmax+1];
108         buf[runetochar(buf, &r)] = 0;
109         s->append(buf);
110       }
111       break;
112     case kRegexpConcat:
113     case kRegexpAlternate:
114       for (int i = 0; i < re->nsub(); i++)
115         DumpRegexpAppending(re->sub()[i], s);
116       break;
117     case kRegexpStar:
118     case kRegexpPlus:
119     case kRegexpQuest:
120       DumpRegexpAppending(re->sub()[0], s);
121       break;
122     case kRegexpCapture:
123       if (re->cap() == 0)
124         LOG(DFATAL) << "kRegexpCapture cap() == 0";
125       if (re->name()) {
126         s->append(*re->name());
127         s->append(":");
128       }
129       DumpRegexpAppending(re->sub()[0], s);
130       break;
131     case kRegexpRepeat:
132       s->append(StringPrintf("%d,%d ", re->min(), re->max()));
133       DumpRegexpAppending(re->sub()[0], s);
134       break;
135     case kRegexpCharClass: {
136       std::string sep;
137       for (CharClass::iterator it = re->cc()->begin();
138            it != re->cc()->end(); ++it) {
139         RuneRange rr = *it;
140         s->append(sep);
141         if (rr.lo == rr.hi)
142           s->append(StringPrintf("%#x", rr.lo));
143         else
144           s->append(StringPrintf("%#x-%#x", rr.lo, rr.hi));
145         sep = " ";
146       }
147       break;
148     }
149   }
150   s->append("}");
151 }
152 
Dump()153 std::string Regexp::Dump() {
154   // Make sure that we are being called from a unit test.
155   // Should cause a link error if used outside of testing.
156   CHECK(!::testing::TempDir().empty());
157 
158   std::string s;
159   DumpRegexpAppending(this, &s);
160   return s;
161 }
162 
163 }  // namespace re2
164