1 // Copyright 2006 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Rewrite POSIX and other features in re
6 // to use simple extended regular expression features.
7 // Also sort and simplify character classes.
8 
9 #include <string>
10 
11 #include "util/util.h"
12 #include "util/logging.h"
13 #include "util/utf.h"
14 #include "re2/pod_array.h"
15 #include "re2/regexp.h"
16 #include "re2/walker-inl.h"
17 
18 namespace re2 {
19 
20 // Parses the regexp src and then simplifies it and sets *dst to the
21 // string representation of the simplified form.  Returns true on success.
22 // Returns false and sets *error (if error != NULL) on error.
SimplifyRegexp(const StringPiece & src,ParseFlags flags,std::string * dst,RegexpStatus * status)23 bool Regexp::SimplifyRegexp(const StringPiece& src, ParseFlags flags,
24                             std::string* dst, RegexpStatus* status) {
25   Regexp* re = Parse(src, flags, status);
26   if (re == NULL)
27     return false;
28   Regexp* sre = re->Simplify();
29   re->Decref();
30   if (sre == NULL) {
31     // Should not happen, since Simplify never fails.
32     LOG(ERROR) << "Simplify failed on " << src;
33     if (status) {
34       status->set_code(kRegexpInternalError);
35       status->set_error_arg(src);
36     }
37     return false;
38   }
39   *dst = sre->ToString();
40   sre->Decref();
41   return true;
42 }
43 
44 // Assuming the simple_ flags on the children are accurate,
45 // is this Regexp* simple?
ComputeSimple()46 bool Regexp::ComputeSimple() {
47   Regexp** subs;
48   switch (op_) {
49     case kRegexpNoMatch:
50     case kRegexpEmptyMatch:
51     case kRegexpLiteral:
52     case kRegexpLiteralString:
53     case kRegexpBeginLine:
54     case kRegexpEndLine:
55     case kRegexpBeginText:
56     case kRegexpWordBoundary:
57     case kRegexpNoWordBoundary:
58     case kRegexpEndText:
59     case kRegexpAnyChar:
60     case kRegexpAnyByte:
61     case kRegexpHaveMatch:
62       return true;
63     case kRegexpConcat:
64     case kRegexpAlternate:
65       // These are simple as long as the subpieces are simple.
66       subs = sub();
67       for (int i = 0; i < nsub_; i++)
68         if (!subs[i]->simple())
69           return false;
70       return true;
71     case kRegexpCharClass:
72       // Simple as long as the char class is not empty, not full.
73       if (ccb_ != NULL)
74         return !ccb_->empty() && !ccb_->full();
75       return !cc_->empty() && !cc_->full();
76     case kRegexpCapture:
77       subs = sub();
78       return subs[0]->simple();
79     case kRegexpStar:
80     case kRegexpPlus:
81     case kRegexpQuest:
82       subs = sub();
83       if (!subs[0]->simple())
84         return false;
85       switch (subs[0]->op_) {
86         case kRegexpStar:
87         case kRegexpPlus:
88         case kRegexpQuest:
89         case kRegexpEmptyMatch:
90         case kRegexpNoMatch:
91           return false;
92         default:
93           break;
94       }
95       return true;
96     case kRegexpRepeat:
97       return false;
98   }
99   LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_;
100   return false;
101 }
102 
103 // Walker subclass used by Simplify.
104 // Coalesces runs of star/plus/quest/repeat of the same literal along with any
105 // occurrences of that literal into repeats of that literal. It also works for
106 // char classes, any char and any byte.
107 // PostVisit creates the coalesced result, which should then be simplified.
108 class CoalesceWalker : public Regexp::Walker<Regexp*> {
109  public:
CoalesceWalker()110   CoalesceWalker() {}
111   virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
112                             Regexp** child_args, int nchild_args);
113   virtual Regexp* Copy(Regexp* re);
114   virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
115 
116  private:
117   // These functions are declared inside CoalesceWalker so that
118   // they can edit the private fields of the Regexps they construct.
119 
120   // Returns true if r1 and r2 can be coalesced. In particular, ensures that
121   // the parse flags are consistent. (They will not be checked again later.)
122   static bool CanCoalesce(Regexp* r1, Regexp* r2);
123 
124   // Coalesces *r1ptr and *r2ptr. In most cases, the array elements afterwards
125   // will be empty match and the coalesced op. In other cases, where part of a
126   // literal string was removed to be coalesced, the array elements afterwards
127   // will be the coalesced op and the remainder of the literal string.
128   static void DoCoalesce(Regexp** r1ptr, Regexp** r2ptr);
129 
130   CoalesceWalker(const CoalesceWalker&) = delete;
131   CoalesceWalker& operator=(const CoalesceWalker&) = delete;
132 };
133 
134 // Walker subclass used by Simplify.
135 // The simplify walk is purely post-recursive: given the simplified children,
136 // PostVisit creates the simplified result.
137 // The child_args are simplified Regexp*s.
138 class SimplifyWalker : public Regexp::Walker<Regexp*> {
139  public:
SimplifyWalker()140   SimplifyWalker() {}
141   virtual Regexp* PreVisit(Regexp* re, Regexp* parent_arg, bool* stop);
142   virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
143                             Regexp** child_args, int nchild_args);
144   virtual Regexp* Copy(Regexp* re);
145   virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
146 
147  private:
148   // These functions are declared inside SimplifyWalker so that
149   // they can edit the private fields of the Regexps they construct.
150 
151   // Creates a concatenation of two Regexp, consuming refs to re1 and re2.
152   // Caller must Decref return value when done with it.
153   static Regexp* Concat2(Regexp* re1, Regexp* re2, Regexp::ParseFlags flags);
154 
155   // Simplifies the expression re{min,max} in terms of *, +, and ?.
156   // Returns a new regexp.  Does not edit re.  Does not consume reference to re.
157   // Caller must Decref return value when done with it.
158   static Regexp* SimplifyRepeat(Regexp* re, int min, int max,
159                                 Regexp::ParseFlags parse_flags);
160 
161   // Simplifies a character class by expanding any named classes
162   // into rune ranges.  Does not edit re.  Does not consume ref to re.
163   // Caller must Decref return value when done with it.
164   static Regexp* SimplifyCharClass(Regexp* re);
165 
166   SimplifyWalker(const SimplifyWalker&) = delete;
167   SimplifyWalker& operator=(const SimplifyWalker&) = delete;
168 };
169 
170 // Simplifies a regular expression, returning a new regexp.
171 // The new regexp uses traditional Unix egrep features only,
172 // plus the Perl (?:) non-capturing parentheses.
173 // Otherwise, no POSIX or Perl additions.  The new regexp
174 // captures exactly the same subexpressions (with the same indices)
175 // as the original.
176 // Does not edit current object.
177 // Caller must Decref() return value when done with it.
178 
Simplify()179 Regexp* Regexp::Simplify() {
180   CoalesceWalker cw;
181   Regexp* cre = cw.Walk(this, NULL);
182   if (cre == NULL)
183     return cre;
184   SimplifyWalker sw;
185   Regexp* sre = sw.Walk(cre, NULL);
186   cre->Decref();
187   return sre;
188 }
189 
190 #define Simplify DontCallSimplify  // Avoid accidental recursion
191 
192 // Utility function for PostVisit implementations that compares re->sub() with
193 // child_args to determine whether any child_args changed. In the common case,
194 // where nothing changed, calls Decref() for all child_args and returns false,
195 // so PostVisit must return re->Incref(). Otherwise, returns true.
ChildArgsChanged(Regexp * re,Regexp ** child_args)196 static bool ChildArgsChanged(Regexp* re, Regexp** child_args) {
197   for (int i = 0; i < re->nsub(); i++) {
198     Regexp* sub = re->sub()[i];
199     Regexp* newsub = child_args[i];
200     if (newsub != sub)
201       return true;
202   }
203   for (int i = 0; i < re->nsub(); i++) {
204     Regexp* newsub = child_args[i];
205     newsub->Decref();
206   }
207   return false;
208 }
209 
Copy(Regexp * re)210 Regexp* CoalesceWalker::Copy(Regexp* re) {
211   return re->Incref();
212 }
213 
ShortVisit(Regexp * re,Regexp * parent_arg)214 Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
215   // Should never be called: we use Walk(), not WalkExponential().
216 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
217   LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
218 #endif
219   return re->Incref();
220 }
221 
PostVisit(Regexp * re,Regexp * parent_arg,Regexp * pre_arg,Regexp ** child_args,int nchild_args)222 Regexp* CoalesceWalker::PostVisit(Regexp* re,
223                                   Regexp* parent_arg,
224                                   Regexp* pre_arg,
225                                   Regexp** child_args,
226                                   int nchild_args) {
227   if (re->nsub() == 0)
228     return re->Incref();
229 
230   if (re->op() != kRegexpConcat) {
231     if (!ChildArgsChanged(re, child_args))
232       return re->Incref();
233 
234     // Something changed. Build a new op.
235     Regexp* nre = new Regexp(re->op(), re->parse_flags());
236     nre->AllocSub(re->nsub());
237     Regexp** nre_subs = nre->sub();
238     for (int i = 0; i < re->nsub(); i++)
239       nre_subs[i] = child_args[i];
240     // Repeats and Captures have additional data that must be copied.
241     if (re->op() == kRegexpRepeat) {
242       nre->min_ = re->min();
243       nre->max_ = re->max();
244     } else if (re->op() == kRegexpCapture) {
245       nre->cap_ = re->cap();
246     }
247     return nre;
248   }
249 
250   bool can_coalesce = false;
251   for (int i = 0; i < re->nsub(); i++) {
252     if (i+1 < re->nsub() &&
253         CanCoalesce(child_args[i], child_args[i+1])) {
254       can_coalesce = true;
255       break;
256     }
257   }
258   if (!can_coalesce) {
259     if (!ChildArgsChanged(re, child_args))
260       return re->Incref();
261 
262     // Something changed. Build a new op.
263     Regexp* nre = new Regexp(re->op(), re->parse_flags());
264     nre->AllocSub(re->nsub());
265     Regexp** nre_subs = nre->sub();
266     for (int i = 0; i < re->nsub(); i++)
267       nre_subs[i] = child_args[i];
268     return nre;
269   }
270 
271   for (int i = 0; i < re->nsub(); i++) {
272     if (i+1 < re->nsub() &&
273         CanCoalesce(child_args[i], child_args[i+1]))
274       DoCoalesce(&child_args[i], &child_args[i+1]);
275   }
276   // Determine how many empty matches were left by DoCoalesce.
277   int n = 0;
278   for (int i = n; i < re->nsub(); i++) {
279     if (child_args[i]->op() == kRegexpEmptyMatch)
280       n++;
281   }
282   // Build a new op.
283   Regexp* nre = new Regexp(re->op(), re->parse_flags());
284   nre->AllocSub(re->nsub() - n);
285   Regexp** nre_subs = nre->sub();
286   for (int i = 0, j = 0; i < re->nsub(); i++) {
287     if (child_args[i]->op() == kRegexpEmptyMatch) {
288       child_args[i]->Decref();
289       continue;
290     }
291     nre_subs[j] = child_args[i];
292     j++;
293   }
294   return nre;
295 }
296 
CanCoalesce(Regexp * r1,Regexp * r2)297 bool CoalesceWalker::CanCoalesce(Regexp* r1, Regexp* r2) {
298   // r1 must be a star/plus/quest/repeat of a literal, char class, any char or
299   // any byte.
300   if ((r1->op() == kRegexpStar ||
301        r1->op() == kRegexpPlus ||
302        r1->op() == kRegexpQuest ||
303        r1->op() == kRegexpRepeat) &&
304       (r1->sub()[0]->op() == kRegexpLiteral ||
305        r1->sub()[0]->op() == kRegexpCharClass ||
306        r1->sub()[0]->op() == kRegexpAnyChar ||
307        r1->sub()[0]->op() == kRegexpAnyByte)) {
308     // r2 must be a star/plus/quest/repeat of the same literal, char class,
309     // any char or any byte.
310     if ((r2->op() == kRegexpStar ||
311          r2->op() == kRegexpPlus ||
312          r2->op() == kRegexpQuest ||
313          r2->op() == kRegexpRepeat) &&
314         Regexp::Equal(r1->sub()[0], r2->sub()[0]) &&
315         // The parse flags must be consistent.
316         ((r1->parse_flags() & Regexp::NonGreedy) ==
317          (r2->parse_flags() & Regexp::NonGreedy))) {
318       return true;
319     }
320     // ... OR an occurrence of that literal, char class, any char or any byte
321     if (Regexp::Equal(r1->sub()[0], r2)) {
322       return true;
323     }
324     // ... OR a literal string that begins with that literal.
325     if (r1->sub()[0]->op() == kRegexpLiteral &&
326         r2->op() == kRegexpLiteralString &&
327         r2->runes()[0] == r1->sub()[0]->rune() &&
328         // The parse flags must be consistent.
329         ((r1->sub()[0]->parse_flags() & Regexp::FoldCase) ==
330          (r2->parse_flags() & Regexp::FoldCase))) {
331       return true;
332     }
333   }
334   return false;
335 }
336 
DoCoalesce(Regexp ** r1ptr,Regexp ** r2ptr)337 void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) {
338   Regexp* r1 = *r1ptr;
339   Regexp* r2 = *r2ptr;
340 
341   Regexp* nre = Regexp::Repeat(
342       r1->sub()[0]->Incref(), r1->parse_flags(), 0, 0);
343 
344   switch (r1->op()) {
345     case kRegexpStar:
346       nre->min_ = 0;
347       nre->max_ = -1;
348       break;
349 
350     case kRegexpPlus:
351       nre->min_ = 1;
352       nre->max_ = -1;
353       break;
354 
355     case kRegexpQuest:
356       nre->min_ = 0;
357       nre->max_ = 1;
358       break;
359 
360     case kRegexpRepeat:
361       nre->min_ = r1->min();
362       nre->max_ = r1->max();
363       break;
364 
365     default:
366       LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op();
367       nre->Decref();
368       return;
369   }
370 
371   switch (r2->op()) {
372     case kRegexpStar:
373       nre->max_ = -1;
374       goto LeaveEmpty;
375 
376     case kRegexpPlus:
377       nre->min_++;
378       nre->max_ = -1;
379       goto LeaveEmpty;
380 
381     case kRegexpQuest:
382       if (nre->max() != -1)
383         nre->max_++;
384       goto LeaveEmpty;
385 
386     case kRegexpRepeat:
387       nre->min_ += r2->min();
388       if (r2->max() == -1)
389         nre->max_ = -1;
390       else if (nre->max() != -1)
391         nre->max_ += r2->max();
392       goto LeaveEmpty;
393 
394     case kRegexpLiteral:
395     case kRegexpCharClass:
396     case kRegexpAnyChar:
397     case kRegexpAnyByte:
398       nre->min_++;
399       if (nre->max() != -1)
400         nre->max_++;
401       goto LeaveEmpty;
402 
403     LeaveEmpty:
404       *r1ptr = new Regexp(kRegexpEmptyMatch, Regexp::NoParseFlags);
405       *r2ptr = nre;
406       break;
407 
408     case kRegexpLiteralString: {
409       Rune r = r1->sub()[0]->rune();
410       // Determine how much of the literal string is removed.
411       // We know that we have at least one rune. :)
412       int n = 1;
413       while (n < r2->nrunes() && r2->runes()[n] == r)
414         n++;
415       nre->min_ += n;
416       if (nre->max() != -1)
417         nre->max_ += n;
418       if (n == r2->nrunes())
419         goto LeaveEmpty;
420       *r1ptr = nre;
421       *r2ptr = Regexp::LiteralString(
422           &r2->runes()[n], r2->nrunes() - n, r2->parse_flags());
423       break;
424     }
425 
426     default:
427       LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op();
428       nre->Decref();
429       return;
430   }
431 
432   r1->Decref();
433   r2->Decref();
434 }
435 
Copy(Regexp * re)436 Regexp* SimplifyWalker::Copy(Regexp* re) {
437   return re->Incref();
438 }
439 
ShortVisit(Regexp * re,Regexp * parent_arg)440 Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
441   // Should never be called: we use Walk(), not WalkExponential().
442 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
443   LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
444 #endif
445   return re->Incref();
446 }
447 
PreVisit(Regexp * re,Regexp * parent_arg,bool * stop)448 Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) {
449   if (re->simple()) {
450     *stop = true;
451     return re->Incref();
452   }
453   return NULL;
454 }
455 
PostVisit(Regexp * re,Regexp * parent_arg,Regexp * pre_arg,Regexp ** child_args,int nchild_args)456 Regexp* SimplifyWalker::PostVisit(Regexp* re,
457                                   Regexp* parent_arg,
458                                   Regexp* pre_arg,
459                                   Regexp** child_args,
460                                   int nchild_args) {
461   switch (re->op()) {
462     case kRegexpNoMatch:
463     case kRegexpEmptyMatch:
464     case kRegexpLiteral:
465     case kRegexpLiteralString:
466     case kRegexpBeginLine:
467     case kRegexpEndLine:
468     case kRegexpBeginText:
469     case kRegexpWordBoundary:
470     case kRegexpNoWordBoundary:
471     case kRegexpEndText:
472     case kRegexpAnyChar:
473     case kRegexpAnyByte:
474     case kRegexpHaveMatch:
475       // All these are always simple.
476       re->simple_ = true;
477       return re->Incref();
478 
479     case kRegexpConcat:
480     case kRegexpAlternate: {
481       // These are simple as long as the subpieces are simple.
482       if (!ChildArgsChanged(re, child_args)) {
483         re->simple_ = true;
484         return re->Incref();
485       }
486       Regexp* nre = new Regexp(re->op(), re->parse_flags());
487       nre->AllocSub(re->nsub());
488       Regexp** nre_subs = nre->sub();
489       for (int i = 0; i < re->nsub(); i++)
490         nre_subs[i] = child_args[i];
491       nre->simple_ = true;
492       return nre;
493     }
494 
495     case kRegexpCapture: {
496       Regexp* newsub = child_args[0];
497       if (newsub == re->sub()[0]) {
498         newsub->Decref();
499         re->simple_ = true;
500         return re->Incref();
501       }
502       Regexp* nre = new Regexp(kRegexpCapture, re->parse_flags());
503       nre->AllocSub(1);
504       nre->sub()[0] = newsub;
505       nre->cap_ = re->cap();
506       nre->simple_ = true;
507       return nre;
508     }
509 
510     case kRegexpStar:
511     case kRegexpPlus:
512     case kRegexpQuest: {
513       Regexp* newsub = child_args[0];
514       // Special case: repeat the empty string as much as
515       // you want, but it's still the empty string.
516       if (newsub->op() == kRegexpEmptyMatch)
517         return newsub;
518 
519       // These are simple as long as the subpiece is simple.
520       if (newsub == re->sub()[0]) {
521         newsub->Decref();
522         re->simple_ = true;
523         return re->Incref();
524       }
525 
526       // These are also idempotent if flags are constant.
527       if (re->op() == newsub->op() &&
528           re->parse_flags() == newsub->parse_flags())
529         return newsub;
530 
531       Regexp* nre = new Regexp(re->op(), re->parse_flags());
532       nre->AllocSub(1);
533       nre->sub()[0] = newsub;
534       nre->simple_ = true;
535       return nre;
536     }
537 
538     case kRegexpRepeat: {
539       Regexp* newsub = child_args[0];
540       // Special case: repeat the empty string as much as
541       // you want, but it's still the empty string.
542       if (newsub->op() == kRegexpEmptyMatch)
543         return newsub;
544 
545       Regexp* nre = SimplifyRepeat(newsub, re->min_, re->max_,
546                                    re->parse_flags());
547       newsub->Decref();
548       nre->simple_ = true;
549       return nre;
550     }
551 
552     case kRegexpCharClass: {
553       Regexp* nre = SimplifyCharClass(re);
554       nre->simple_ = true;
555       return nre;
556     }
557   }
558 
559   LOG(ERROR) << "Simplify case not handled: " << re->op();
560   return re->Incref();
561 }
562 
563 // Creates a concatenation of two Regexp, consuming refs to re1 and re2.
564 // Returns a new Regexp, handing the ref to the caller.
Concat2(Regexp * re1,Regexp * re2,Regexp::ParseFlags parse_flags)565 Regexp* SimplifyWalker::Concat2(Regexp* re1, Regexp* re2,
566                                 Regexp::ParseFlags parse_flags) {
567   Regexp* re = new Regexp(kRegexpConcat, parse_flags);
568   re->AllocSub(2);
569   Regexp** subs = re->sub();
570   subs[0] = re1;
571   subs[1] = re2;
572   return re;
573 }
574 
575 // Simplifies the expression re{min,max} in terms of *, +, and ?.
576 // Returns a new regexp.  Does not edit re.  Does not consume reference to re.
577 // Caller must Decref return value when done with it.
578 // The result will *not* necessarily have the right capturing parens
579 // if you call ToString() and re-parse it: (x){2} becomes (x)(x),
580 // but in the Regexp* representation, both (x) are marked as $1.
SimplifyRepeat(Regexp * re,int min,int max,Regexp::ParseFlags f)581 Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max,
582                                        Regexp::ParseFlags f) {
583   // x{n,} means at least n matches of x.
584   if (max == -1) {
585     // Special case: x{0,} is x*
586     if (min == 0)
587       return Regexp::Star(re->Incref(), f);
588 
589     // Special case: x{1,} is x+
590     if (min == 1)
591       return Regexp::Plus(re->Incref(), f);
592 
593     // General case: x{4,} is xxxx+
594     PODArray<Regexp*> nre_subs(min);
595     for (int i = 0; i < min-1; i++)
596       nre_subs[i] = re->Incref();
597     nre_subs[min-1] = Regexp::Plus(re->Incref(), f);
598     return Regexp::Concat(nre_subs.data(), min, f);
599   }
600 
601   // Special case: (x){0} matches only empty string.
602   if (min == 0 && max == 0)
603     return new Regexp(kRegexpEmptyMatch, f);
604 
605   // Special case: x{1} is just x.
606   if (min == 1 && max == 1)
607     return re->Incref();
608 
609   // General case: x{n,m} means n copies of x and m copies of x?.
610   // The machine will do less work if we nest the final m copies,
611   // so that x{2,5} = xx(x(x(x)?)?)?
612 
613   // Build leading prefix: xx.  Capturing only on the last one.
614   Regexp* nre = NULL;
615   if (min > 0) {
616     PODArray<Regexp*> nre_subs(min);
617     for (int i = 0; i < min; i++)
618       nre_subs[i] = re->Incref();
619     nre = Regexp::Concat(nre_subs.data(), min, f);
620   }
621 
622   // Build and attach suffix: (x(x(x)?)?)?
623   if (max > min) {
624     Regexp* suf = Regexp::Quest(re->Incref(), f);
625     for (int i = min+1; i < max; i++)
626       suf = Regexp::Quest(Concat2(re->Incref(), suf, f), f);
627     if (nre == NULL)
628       nre = suf;
629     else
630       nre = Concat2(nre, suf, f);
631   }
632 
633   if (nre == NULL) {
634     // Some degenerate case, like min > max, or min < max < 0.
635     // This shouldn't happen, because the parser rejects such regexps.
636     LOG(DFATAL) << "Malformed repeat " << re->ToString() << " " << min << " " << max;
637     return new Regexp(kRegexpNoMatch, f);
638   }
639 
640   return nre;
641 }
642 
643 // Simplifies a character class.
644 // Caller must Decref return value when done with it.
SimplifyCharClass(Regexp * re)645 Regexp* SimplifyWalker::SimplifyCharClass(Regexp* re) {
646   CharClass* cc = re->cc();
647 
648   // Special cases
649   if (cc->empty())
650     return new Regexp(kRegexpNoMatch, re->parse_flags());
651   if (cc->full())
652     return new Regexp(kRegexpAnyChar, re->parse_flags());
653 
654   return re->Incref();
655 }
656 
657 }  // namespace re2
658