1 // Copyright 2010 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #include "re2/set.h"
6 
7 #include "util/util.h"
8 #include "re2/stringpiece.h"
9 #include "re2/prog.h"
10 #include "re2/re2.h"
11 #include "re2/regexp.h"
12 
13 using namespace re2;
14 
Set(const RE2::Options & options,RE2::Anchor anchor)15 RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) {
16   options_.Copy(options);
17   anchor_ = anchor;
18   prog_ = NULL;
19   compiled_ = false;
20 }
21 
~Set()22 RE2::Set::~Set() {
23   for (int i = 0; i < re_.size(); i++)
24     re_[i]->Decref();
25   delete prog_;
26 }
27 
Add(const StringPiece & pattern,string * error)28 int RE2::Set::Add(const StringPiece& pattern, string* error) {
29   if (compiled_) {
30     LOG(DFATAL) << "RE2::Set::Add after Compile";
31     return -1;
32   }
33 
34   Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
35     options_.ParseFlags());
36 
37   RegexpStatus status;
38   re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
39   if (re == NULL) {
40     if (error != NULL)
41       *error = status.Text();
42     if (options_.log_errors())
43       LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
44     return -1;
45   }
46 
47   // Concatenate with match index and push on vector.
48   int n = re_.size();
49   re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
50   if (re->op() == kRegexpConcat) {
51     int nsub = re->nsub();
52     re2::Regexp** sub = new re2::Regexp*[nsub + 1];
53     for (int i = 0; i < nsub; i++)
54       sub[i] = re->sub()[i]->Incref();
55     sub[nsub] = m;
56     re->Decref();
57     re = re2::Regexp::Concat(sub, nsub + 1, pf);
58     delete[] sub;
59   } else {
60     re2::Regexp* sub[2];
61     sub[0] = re;
62     sub[1] = m;
63     re = re2::Regexp::Concat(sub, 2, pf);
64   }
65   re_.push_back(re);
66   return n;
67 }
68 
Compile()69 bool RE2::Set::Compile() {
70   if (compiled_) {
71     LOG(DFATAL) << "RE2::Set::Compile multiple times";
72     return false;
73   }
74   compiled_ = true;
75 
76   Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
77     options_.ParseFlags());
78   re2::Regexp* re = re2::Regexp::Alternate(const_cast<re2::Regexp**>(&re_[0]),
79                                            re_.size(), pf);
80   re_.clear();
81   re2::Regexp* sre = re->Simplify();
82   re->Decref();
83   re = sre;
84   if (re == NULL) {
85     if (options_.log_errors())
86       LOG(ERROR) << "Error simplifying during Compile.";
87     return false;
88   }
89 
90   prog_ = Prog::CompileSet(options_, anchor_, re);
91   return prog_ != NULL;
92 }
93 
Match(const StringPiece & text,vector<int> * v) const94 bool RE2::Set::Match(const StringPiece& text, vector<int>* v) const {
95   if (!compiled_) {
96     LOG(DFATAL) << "RE2::Set::Match without Compile";
97     return false;
98   }
99   v->clear();
100   bool failed;
101   bool ret = prog_->SearchDFA(text, text, Prog::kAnchored,
102                               Prog::kManyMatch, NULL, &failed, v);
103   if (failed)
104     LOG(DFATAL) << "RE2::Set::Match: DFA ran out of cache space";
105 
106   if (ret == false)
107     return false;
108   if (v->size() == 0) {
109     LOG(DFATAL) << "RE2::Set::Match: match but unknown regexp set";
110     return false;
111   }
112   return true;
113 }
114