1 // Copyright 2010 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "re2/set.h"
6
7 #include "util/util.h"
8 #include "re2/stringpiece.h"
9 #include "re2/prog.h"
10 #include "re2/re2.h"
11 #include "re2/regexp.h"
12
13 using namespace re2;
14
Set(const RE2::Options & options,RE2::Anchor anchor)15 RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) {
16 options_.Copy(options);
17 anchor_ = anchor;
18 prog_ = NULL;
19 compiled_ = false;
20 }
21
~Set()22 RE2::Set::~Set() {
23 for (int i = 0; i < re_.size(); i++)
24 re_[i]->Decref();
25 delete prog_;
26 }
27
Add(const StringPiece & pattern,string * error)28 int RE2::Set::Add(const StringPiece& pattern, string* error) {
29 if (compiled_) {
30 LOG(DFATAL) << "RE2::Set::Add after Compile";
31 return -1;
32 }
33
34 Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
35 options_.ParseFlags());
36
37 RegexpStatus status;
38 re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
39 if (re == NULL) {
40 if (error != NULL)
41 *error = status.Text();
42 if (options_.log_errors())
43 LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
44 return -1;
45 }
46
47 // Concatenate with match index and push on vector.
48 int n = re_.size();
49 re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
50 if (re->op() == kRegexpConcat) {
51 int nsub = re->nsub();
52 re2::Regexp** sub = new re2::Regexp*[nsub + 1];
53 for (int i = 0; i < nsub; i++)
54 sub[i] = re->sub()[i]->Incref();
55 sub[nsub] = m;
56 re->Decref();
57 re = re2::Regexp::Concat(sub, nsub + 1, pf);
58 delete[] sub;
59 } else {
60 re2::Regexp* sub[2];
61 sub[0] = re;
62 sub[1] = m;
63 re = re2::Regexp::Concat(sub, 2, pf);
64 }
65 re_.push_back(re);
66 return n;
67 }
68
Compile()69 bool RE2::Set::Compile() {
70 if (compiled_) {
71 LOG(DFATAL) << "RE2::Set::Compile multiple times";
72 return false;
73 }
74 compiled_ = true;
75
76 Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
77 options_.ParseFlags());
78 re2::Regexp* re = re2::Regexp::Alternate(const_cast<re2::Regexp**>(&re_[0]),
79 re_.size(), pf);
80 re_.clear();
81 re2::Regexp* sre = re->Simplify();
82 re->Decref();
83 re = sre;
84 if (re == NULL) {
85 if (options_.log_errors())
86 LOG(ERROR) << "Error simplifying during Compile.";
87 return false;
88 }
89
90 prog_ = Prog::CompileSet(options_, anchor_, re);
91 return prog_ != NULL;
92 }
93
Match(const StringPiece & text,vector<int> * v) const94 bool RE2::Set::Match(const StringPiece& text, vector<int>* v) const {
95 if (!compiled_) {
96 LOG(DFATAL) << "RE2::Set::Match without Compile";
97 return false;
98 }
99 v->clear();
100 bool failed;
101 bool ret = prog_->SearchDFA(text, text, Prog::kAnchored,
102 Prog::kManyMatch, NULL, &failed, v);
103 if (failed)
104 LOG(DFATAL) << "RE2::Set::Match: DFA ran out of cache space";
105
106 if (ret == false)
107 return false;
108 if (v->size() == 0) {
109 LOG(DFATAL) << "RE2::Set::Match: match but unknown regexp set";
110 return false;
111 }
112 return true;
113 }
114