1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/ostreams.h"
6 #include "src/regexp/regexp-ast.h"
7 
8 namespace v8 {
9 namespace internal {
10 
11 #define MAKE_ACCEPT(Name)                                          \
12   void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
13     return visitor->Visit##Name(this, data);                       \
14   }
15 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
16 #undef MAKE_ACCEPT
17 
18 #define MAKE_TYPE_CASE(Name)                            \
19   RegExp##Name* RegExpTree::As##Name() { return NULL; } \
20   bool RegExpTree::Is##Name() { return false; }
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)21 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
22 #undef MAKE_TYPE_CASE
23 
24 #define MAKE_TYPE_CASE(Name)                              \
25   RegExp##Name* RegExp##Name::As##Name() { return this; } \
26   bool RegExp##Name::Is##Name() { return true; }
27 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
28 #undef MAKE_TYPE_CASE
29 
30 
31 static Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) {
32   Interval result = Interval::Empty();
33   for (int i = 0; i < children->length(); i++)
34     result = result.Union(children->at(i)->CaptureRegisters());
35   return result;
36 }
37 
38 
CaptureRegisters()39 Interval RegExpAlternative::CaptureRegisters() {
40   return ListCaptureRegisters(nodes());
41 }
42 
43 
CaptureRegisters()44 Interval RegExpDisjunction::CaptureRegisters() {
45   return ListCaptureRegisters(alternatives());
46 }
47 
48 
CaptureRegisters()49 Interval RegExpLookaround::CaptureRegisters() {
50   return body()->CaptureRegisters();
51 }
52 
53 
CaptureRegisters()54 Interval RegExpCapture::CaptureRegisters() {
55   Interval self(StartRegister(index()), EndRegister(index()));
56   return self.Union(body()->CaptureRegisters());
57 }
58 
59 
CaptureRegisters()60 Interval RegExpQuantifier::CaptureRegisters() {
61   return body()->CaptureRegisters();
62 }
63 
64 
IsAnchoredAtStart()65 bool RegExpAssertion::IsAnchoredAtStart() {
66   return assertion_type() == RegExpAssertion::START_OF_INPUT;
67 }
68 
69 
IsAnchoredAtEnd()70 bool RegExpAssertion::IsAnchoredAtEnd() {
71   return assertion_type() == RegExpAssertion::END_OF_INPUT;
72 }
73 
74 
IsAnchoredAtStart()75 bool RegExpAlternative::IsAnchoredAtStart() {
76   ZoneList<RegExpTree*>* nodes = this->nodes();
77   for (int i = 0; i < nodes->length(); i++) {
78     RegExpTree* node = nodes->at(i);
79     if (node->IsAnchoredAtStart()) {
80       return true;
81     }
82     if (node->max_match() > 0) {
83       return false;
84     }
85   }
86   return false;
87 }
88 
89 
IsAnchoredAtEnd()90 bool RegExpAlternative::IsAnchoredAtEnd() {
91   ZoneList<RegExpTree*>* nodes = this->nodes();
92   for (int i = nodes->length() - 1; i >= 0; i--) {
93     RegExpTree* node = nodes->at(i);
94     if (node->IsAnchoredAtEnd()) {
95       return true;
96     }
97     if (node->max_match() > 0) {
98       return false;
99     }
100   }
101   return false;
102 }
103 
104 
IsAnchoredAtStart()105 bool RegExpDisjunction::IsAnchoredAtStart() {
106   ZoneList<RegExpTree*>* alternatives = this->alternatives();
107   for (int i = 0; i < alternatives->length(); i++) {
108     if (!alternatives->at(i)->IsAnchoredAtStart()) return false;
109   }
110   return true;
111 }
112 
113 
IsAnchoredAtEnd()114 bool RegExpDisjunction::IsAnchoredAtEnd() {
115   ZoneList<RegExpTree*>* alternatives = this->alternatives();
116   for (int i = 0; i < alternatives->length(); i++) {
117     if (!alternatives->at(i)->IsAnchoredAtEnd()) return false;
118   }
119   return true;
120 }
121 
122 
IsAnchoredAtStart()123 bool RegExpLookaround::IsAnchoredAtStart() {
124   return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart();
125 }
126 
127 
IsAnchoredAtStart()128 bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); }
129 
130 
IsAnchoredAtEnd()131 bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); }
132 
133 
134 // Convert regular expression trees to a simple sexp representation.
135 // This representation should be different from the input grammar
136 // in as many cases as possible, to make it more difficult for incorrect
137 // parses to look as correct ones which is likely if the input and
138 // output formats are alike.
139 class RegExpUnparser final : public RegExpVisitor {
140  public:
RegExpUnparser(std::ostream & os,Zone * zone)141   RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {}
142   void VisitCharacterRange(CharacterRange that);
143 #define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override;
144   FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
145 #undef MAKE_CASE
146  private:
147   std::ostream& os_;
148   Zone* zone_;
149 };
150 
151 
VisitDisjunction(RegExpDisjunction * that,void * data)152 void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
153   os_ << "(|";
154   for (int i = 0; i < that->alternatives()->length(); i++) {
155     os_ << " ";
156     that->alternatives()->at(i)->Accept(this, data);
157   }
158   os_ << ")";
159   return NULL;
160 }
161 
162 
VisitAlternative(RegExpAlternative * that,void * data)163 void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) {
164   os_ << "(:";
165   for (int i = 0; i < that->nodes()->length(); i++) {
166     os_ << " ";
167     that->nodes()->at(i)->Accept(this, data);
168   }
169   os_ << ")";
170   return NULL;
171 }
172 
173 
VisitCharacterRange(CharacterRange that)174 void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
175   os_ << AsUC32(that.from());
176   if (!that.IsSingleton()) {
177     os_ << "-" << AsUC32(that.to());
178   }
179 }
180 
181 
VisitCharacterClass(RegExpCharacterClass * that,void * data)182 void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that,
183                                           void* data) {
184   if (that->is_negated()) os_ << "^";
185   os_ << "[";
186   for (int i = 0; i < that->ranges(zone_)->length(); i++) {
187     if (i > 0) os_ << " ";
188     VisitCharacterRange(that->ranges(zone_)->at(i));
189   }
190   os_ << "]";
191   return NULL;
192 }
193 
194 
VisitAssertion(RegExpAssertion * that,void * data)195 void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
196   switch (that->assertion_type()) {
197     case RegExpAssertion::START_OF_INPUT:
198       os_ << "@^i";
199       break;
200     case RegExpAssertion::END_OF_INPUT:
201       os_ << "@$i";
202       break;
203     case RegExpAssertion::START_OF_LINE:
204       os_ << "@^l";
205       break;
206     case RegExpAssertion::END_OF_LINE:
207       os_ << "@$l";
208       break;
209     case RegExpAssertion::BOUNDARY:
210       os_ << "@b";
211       break;
212     case RegExpAssertion::NON_BOUNDARY:
213       os_ << "@B";
214       break;
215   }
216   return NULL;
217 }
218 
219 
VisitAtom(RegExpAtom * that,void * data)220 void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
221   os_ << "'";
222   Vector<const uc16> chardata = that->data();
223   for (int i = 0; i < chardata.length(); i++) {
224     os_ << AsUC16(chardata[i]);
225   }
226   os_ << "'";
227   return NULL;
228 }
229 
230 
VisitText(RegExpText * that,void * data)231 void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
232   if (that->elements()->length() == 1) {
233     that->elements()->at(0).tree()->Accept(this, data);
234   } else {
235     os_ << "(!";
236     for (int i = 0; i < that->elements()->length(); i++) {
237       os_ << " ";
238       that->elements()->at(i).tree()->Accept(this, data);
239     }
240     os_ << ")";
241   }
242   return NULL;
243 }
244 
245 
VisitQuantifier(RegExpQuantifier * that,void * data)246 void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
247   os_ << "(# " << that->min() << " ";
248   if (that->max() == RegExpTree::kInfinity) {
249     os_ << "- ";
250   } else {
251     os_ << that->max() << " ";
252   }
253   os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n ");
254   that->body()->Accept(this, data);
255   os_ << ")";
256   return NULL;
257 }
258 
259 
VisitCapture(RegExpCapture * that,void * data)260 void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
261   os_ << "(^ ";
262   that->body()->Accept(this, data);
263   os_ << ")";
264   return NULL;
265 }
266 
267 
VisitLookaround(RegExpLookaround * that,void * data)268 void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) {
269   os_ << "(";
270   os_ << (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-");
271   os_ << (that->is_positive() ? " + " : " - ");
272   that->body()->Accept(this, data);
273   os_ << ")";
274   return NULL;
275 }
276 
277 
VisitBackReference(RegExpBackReference * that,void * data)278 void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
279                                          void* data) {
280   os_ << "(<- " << that->index() << ")";
281   return NULL;
282 }
283 
284 
VisitEmpty(RegExpEmpty * that,void * data)285 void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) {
286   os_ << '%';
287   return NULL;
288 }
289 
290 
Print(std::ostream & os,Zone * zone)291 std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) {  // NOLINT
292   RegExpUnparser unparser(os, zone);
293   Accept(&unparser, NULL);
294   return os;
295 }
296 
297 
RegExpDisjunction(ZoneList<RegExpTree * > * alternatives)298 RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
299     : alternatives_(alternatives) {
300   DCHECK(alternatives->length() > 1);
301   RegExpTree* first_alternative = alternatives->at(0);
302   min_match_ = first_alternative->min_match();
303   max_match_ = first_alternative->max_match();
304   for (int i = 1; i < alternatives->length(); i++) {
305     RegExpTree* alternative = alternatives->at(i);
306     min_match_ = Min(min_match_, alternative->min_match());
307     max_match_ = Max(max_match_, alternative->max_match());
308   }
309 }
310 
311 
IncreaseBy(int previous,int increase)312 static int IncreaseBy(int previous, int increase) {
313   if (RegExpTree::kInfinity - previous < increase) {
314     return RegExpTree::kInfinity;
315   } else {
316     return previous + increase;
317   }
318 }
319 
320 
RegExpAlternative(ZoneList<RegExpTree * > * nodes)321 RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
322     : nodes_(nodes) {
323   DCHECK(nodes->length() > 1);
324   min_match_ = 0;
325   max_match_ = 0;
326   for (int i = 0; i < nodes->length(); i++) {
327     RegExpTree* node = nodes->at(i);
328     int node_min_match = node->min_match();
329     min_match_ = IncreaseBy(min_match_, node_min_match);
330     int node_max_match = node->max_match();
331     max_match_ = IncreaseBy(max_match_, node_max_match);
332   }
333 }
334 
335 
336 }  // namespace internal
337 }  // namespace v8
338