• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // Copyright 2010 The RE2 Authors.  All Rights Reserved.
2  // Use of this source code is governed by a BSD-style
3  // license that can be found in the LICENSE file.
4  
5  #include "re2/set.h"
6  
7  #include "util/util.h"
8  #include "re2/stringpiece.h"
9  #include "re2/prog.h"
10  #include "re2/re2.h"
11  #include "re2/regexp.h"
12  
13  using namespace re2;
14  
Set(const RE2::Options & options,RE2::Anchor anchor)15  RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) {
16    options_.Copy(options);
17    anchor_ = anchor;
18    prog_ = NULL;
19    compiled_ = false;
20  }
21  
~Set()22  RE2::Set::~Set() {
23    for (int i = 0; i < re_.size(); i++)
24      re_[i]->Decref();
25    delete prog_;
26  }
27  
Add(const StringPiece & pattern,string * error)28  int RE2::Set::Add(const StringPiece& pattern, string* error) {
29    if (compiled_) {
30      LOG(DFATAL) << "RE2::Set::Add after Compile";
31      return -1;
32    }
33  
34    Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
35      options_.ParseFlags());
36  
37    RegexpStatus status;
38    re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
39    if (re == NULL) {
40      if (error != NULL)
41        *error = status.Text();
42      if (options_.log_errors())
43        LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
44      return -1;
45    }
46  
47    // Concatenate with match index and push on vector.
48    int n = re_.size();
49    re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
50    if (re->op() == kRegexpConcat) {
51      int nsub = re->nsub();
52      re2::Regexp** sub = new re2::Regexp*[nsub + 1];
53      for (int i = 0; i < nsub; i++)
54        sub[i] = re->sub()[i]->Incref();
55      sub[nsub] = m;
56      re->Decref();
57      re = re2::Regexp::Concat(sub, nsub + 1, pf);
58      delete[] sub;
59    } else {
60      re2::Regexp* sub[2];
61      sub[0] = re;
62      sub[1] = m;
63      re = re2::Regexp::Concat(sub, 2, pf);
64    }
65    re_.push_back(re);
66    return n;
67  }
68  
Compile()69  bool RE2::Set::Compile() {
70    if (compiled_) {
71      LOG(DFATAL) << "RE2::Set::Compile multiple times";
72      return false;
73    }
74    compiled_ = true;
75  
76    Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
77      options_.ParseFlags());
78    re2::Regexp* re = re2::Regexp::Alternate(const_cast<re2::Regexp**>(&re_[0]),
79                                             re_.size(), pf);
80    re_.clear();
81    re2::Regexp* sre = re->Simplify();
82    re->Decref();
83    re = sre;
84    if (re == NULL) {
85      if (options_.log_errors())
86        LOG(ERROR) << "Error simplifying during Compile.";
87      return false;
88    }
89  
90    prog_ = Prog::CompileSet(options_, anchor_, re);
91    return prog_ != NULL;
92  }
93  
Match(const StringPiece & text,vector<int> * v) const94  bool RE2::Set::Match(const StringPiece& text, vector<int>* v) const {
95    if (!compiled_) {
96      LOG(DFATAL) << "RE2::Set::Match without Compile";
97      return false;
98    }
99    v->clear();
100    bool failed;
101    bool ret = prog_->SearchDFA(text, text, Prog::kAnchored,
102                                Prog::kManyMatch, NULL, &failed, v);
103    if (failed)
104      LOG(DFATAL) << "RE2::Set::Match: DFA ran out of cache space";
105  
106    if (ret == false)
107      return false;
108    if (v->size() == 0) {
109      LOG(DFATAL) << "RE2::Set::Match: match but unknown regexp set";
110      return false;
111    }
112    return true;
113  }
114