• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // Copyright 2016 the V8 project authors. All rights reserved.
2  // Use of this source code is governed by a BSD-style license that can be
3  // found in the LICENSE file.
4  
5  #include "src/ostreams.h"
6  #include "src/regexp/regexp-ast.h"
7  
8  namespace v8 {
9  namespace internal {
10  
11  #define MAKE_ACCEPT(Name)                                          \
12    void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
13      return visitor->Visit##Name(this, data);                       \
14    }
15  FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
16  #undef MAKE_ACCEPT
17  
18  #define MAKE_TYPE_CASE(Name)                            \
19    RegExp##Name* RegExpTree::As##Name() { return NULL; } \
20    bool RegExpTree::Is##Name() { return false; }
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)21  FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
22  #undef MAKE_TYPE_CASE
23  
24  #define MAKE_TYPE_CASE(Name)                              \
25    RegExp##Name* RegExp##Name::As##Name() { return this; } \
26    bool RegExp##Name::Is##Name() { return true; }
27  FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
28  #undef MAKE_TYPE_CASE
29  
30  
31  static Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) {
32    Interval result = Interval::Empty();
33    for (int i = 0; i < children->length(); i++)
34      result = result.Union(children->at(i)->CaptureRegisters());
35    return result;
36  }
37  
38  
CaptureRegisters()39  Interval RegExpAlternative::CaptureRegisters() {
40    return ListCaptureRegisters(nodes());
41  }
42  
43  
CaptureRegisters()44  Interval RegExpDisjunction::CaptureRegisters() {
45    return ListCaptureRegisters(alternatives());
46  }
47  
48  
CaptureRegisters()49  Interval RegExpLookaround::CaptureRegisters() {
50    return body()->CaptureRegisters();
51  }
52  
53  
CaptureRegisters()54  Interval RegExpCapture::CaptureRegisters() {
55    Interval self(StartRegister(index()), EndRegister(index()));
56    return self.Union(body()->CaptureRegisters());
57  }
58  
59  
CaptureRegisters()60  Interval RegExpQuantifier::CaptureRegisters() {
61    return body()->CaptureRegisters();
62  }
63  
64  
IsAnchoredAtStart()65  bool RegExpAssertion::IsAnchoredAtStart() {
66    return assertion_type() == RegExpAssertion::START_OF_INPUT;
67  }
68  
69  
IsAnchoredAtEnd()70  bool RegExpAssertion::IsAnchoredAtEnd() {
71    return assertion_type() == RegExpAssertion::END_OF_INPUT;
72  }
73  
74  
IsAnchoredAtStart()75  bool RegExpAlternative::IsAnchoredAtStart() {
76    ZoneList<RegExpTree*>* nodes = this->nodes();
77    for (int i = 0; i < nodes->length(); i++) {
78      RegExpTree* node = nodes->at(i);
79      if (node->IsAnchoredAtStart()) {
80        return true;
81      }
82      if (node->max_match() > 0) {
83        return false;
84      }
85    }
86    return false;
87  }
88  
89  
IsAnchoredAtEnd()90  bool RegExpAlternative::IsAnchoredAtEnd() {
91    ZoneList<RegExpTree*>* nodes = this->nodes();
92    for (int i = nodes->length() - 1; i >= 0; i--) {
93      RegExpTree* node = nodes->at(i);
94      if (node->IsAnchoredAtEnd()) {
95        return true;
96      }
97      if (node->max_match() > 0) {
98        return false;
99      }
100    }
101    return false;
102  }
103  
104  
IsAnchoredAtStart()105  bool RegExpDisjunction::IsAnchoredAtStart() {
106    ZoneList<RegExpTree*>* alternatives = this->alternatives();
107    for (int i = 0; i < alternatives->length(); i++) {
108      if (!alternatives->at(i)->IsAnchoredAtStart()) return false;
109    }
110    return true;
111  }
112  
113  
IsAnchoredAtEnd()114  bool RegExpDisjunction::IsAnchoredAtEnd() {
115    ZoneList<RegExpTree*>* alternatives = this->alternatives();
116    for (int i = 0; i < alternatives->length(); i++) {
117      if (!alternatives->at(i)->IsAnchoredAtEnd()) return false;
118    }
119    return true;
120  }
121  
122  
IsAnchoredAtStart()123  bool RegExpLookaround::IsAnchoredAtStart() {
124    return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart();
125  }
126  
127  
IsAnchoredAtStart()128  bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); }
129  
130  
IsAnchoredAtEnd()131  bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); }
132  
133  
134  // Convert regular expression trees to a simple sexp representation.
135  // This representation should be different from the input grammar
136  // in as many cases as possible, to make it more difficult for incorrect
137  // parses to look as correct ones which is likely if the input and
138  // output formats are alike.
139  class RegExpUnparser final : public RegExpVisitor {
140   public:
RegExpUnparser(std::ostream & os,Zone * zone)141    RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {}
142    void VisitCharacterRange(CharacterRange that);
143  #define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override;
144    FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
145  #undef MAKE_CASE
146   private:
147    std::ostream& os_;
148    Zone* zone_;
149  };
150  
151  
VisitDisjunction(RegExpDisjunction * that,void * data)152  void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
153    os_ << "(|";
154    for (int i = 0; i < that->alternatives()->length(); i++) {
155      os_ << " ";
156      that->alternatives()->at(i)->Accept(this, data);
157    }
158    os_ << ")";
159    return NULL;
160  }
161  
162  
VisitAlternative(RegExpAlternative * that,void * data)163  void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) {
164    os_ << "(:";
165    for (int i = 0; i < that->nodes()->length(); i++) {
166      os_ << " ";
167      that->nodes()->at(i)->Accept(this, data);
168    }
169    os_ << ")";
170    return NULL;
171  }
172  
173  
VisitCharacterRange(CharacterRange that)174  void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
175    os_ << AsUC16(that.from());
176    if (!that.IsSingleton()) {
177      os_ << "-" << AsUC16(that.to());
178    }
179  }
180  
181  
VisitCharacterClass(RegExpCharacterClass * that,void * data)182  void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that,
183                                            void* data) {
184    if (that->is_negated()) os_ << "^";
185    os_ << "[";
186    for (int i = 0; i < that->ranges(zone_)->length(); i++) {
187      if (i > 0) os_ << " ";
188      VisitCharacterRange(that->ranges(zone_)->at(i));
189    }
190    os_ << "]";
191    return NULL;
192  }
193  
194  
VisitAssertion(RegExpAssertion * that,void * data)195  void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
196    switch (that->assertion_type()) {
197      case RegExpAssertion::START_OF_INPUT:
198        os_ << "@^i";
199        break;
200      case RegExpAssertion::END_OF_INPUT:
201        os_ << "@$i";
202        break;
203      case RegExpAssertion::START_OF_LINE:
204        os_ << "@^l";
205        break;
206      case RegExpAssertion::END_OF_LINE:
207        os_ << "@$l";
208        break;
209      case RegExpAssertion::BOUNDARY:
210        os_ << "@b";
211        break;
212      case RegExpAssertion::NON_BOUNDARY:
213        os_ << "@B";
214        break;
215    }
216    return NULL;
217  }
218  
219  
VisitAtom(RegExpAtom * that,void * data)220  void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
221    os_ << "'";
222    Vector<const uc16> chardata = that->data();
223    for (int i = 0; i < chardata.length(); i++) {
224      os_ << AsUC16(chardata[i]);
225    }
226    os_ << "'";
227    return NULL;
228  }
229  
230  
VisitText(RegExpText * that,void * data)231  void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
232    if (that->elements()->length() == 1) {
233      that->elements()->at(0).tree()->Accept(this, data);
234    } else {
235      os_ << "(!";
236      for (int i = 0; i < that->elements()->length(); i++) {
237        os_ << " ";
238        that->elements()->at(i).tree()->Accept(this, data);
239      }
240      os_ << ")";
241    }
242    return NULL;
243  }
244  
245  
VisitQuantifier(RegExpQuantifier * that,void * data)246  void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
247    os_ << "(# " << that->min() << " ";
248    if (that->max() == RegExpTree::kInfinity) {
249      os_ << "- ";
250    } else {
251      os_ << that->max() << " ";
252    }
253    os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n ");
254    that->body()->Accept(this, data);
255    os_ << ")";
256    return NULL;
257  }
258  
259  
VisitCapture(RegExpCapture * that,void * data)260  void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
261    os_ << "(^ ";
262    that->body()->Accept(this, data);
263    os_ << ")";
264    return NULL;
265  }
266  
267  
VisitLookaround(RegExpLookaround * that,void * data)268  void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) {
269    os_ << "(";
270    os_ << (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-");
271    os_ << (that->is_positive() ? " + " : " - ");
272    that->body()->Accept(this, data);
273    os_ << ")";
274    return NULL;
275  }
276  
277  
VisitBackReference(RegExpBackReference * that,void * data)278  void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
279                                           void* data) {
280    os_ << "(<- " << that->index() << ")";
281    return NULL;
282  }
283  
284  
VisitEmpty(RegExpEmpty * that,void * data)285  void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) {
286    os_ << '%';
287    return NULL;
288  }
289  
290  
Print(std::ostream & os,Zone * zone)291  std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) {  // NOLINT
292    RegExpUnparser unparser(os, zone);
293    Accept(&unparser, NULL);
294    return os;
295  }
296  
297  
RegExpDisjunction(ZoneList<RegExpTree * > * alternatives)298  RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
299      : alternatives_(alternatives) {
300    DCHECK(alternatives->length() > 1);
301    RegExpTree* first_alternative = alternatives->at(0);
302    min_match_ = first_alternative->min_match();
303    max_match_ = first_alternative->max_match();
304    for (int i = 1; i < alternatives->length(); i++) {
305      RegExpTree* alternative = alternatives->at(i);
306      min_match_ = Min(min_match_, alternative->min_match());
307      max_match_ = Max(max_match_, alternative->max_match());
308    }
309  }
310  
311  
IncreaseBy(int previous,int increase)312  static int IncreaseBy(int previous, int increase) {
313    if (RegExpTree::kInfinity - previous < increase) {
314      return RegExpTree::kInfinity;
315    } else {
316      return previous + increase;
317    }
318  }
319  
320  
RegExpAlternative(ZoneList<RegExpTree * > * nodes)321  RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
322      : nodes_(nodes) {
323    DCHECK(nodes->length() > 1);
324    min_match_ = 0;
325    max_match_ = 0;
326    for (int i = 0; i < nodes->length(); i++) {
327      RegExpTree* node = nodes->at(i);
328      int node_min_match = node->min_match();
329      min_match_ = IncreaseBy(min_match_, node_min_match);
330      int node_max_match = node->max_match();
331      max_match_ = IncreaseBy(max_match_, node_max_match);
332    }
333  }
334  
335  
336  }  // namespace internal
337  }  // namespace v8
338