1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <functional>
6 
7 #include "src/arguments-inl.h"
8 #include "src/conversions-inl.h"
9 #include "src/isolate-inl.h"
10 #include "src/messages.h"
11 #include "src/objects/js-array-inl.h"
12 #include "src/regexp/jsregexp-inl.h"
13 #include "src/regexp/jsregexp.h"
14 #include "src/regexp/regexp-utils.h"
15 #include "src/runtime/runtime-utils.h"
16 #include "src/string-builder-inl.h"
17 #include "src/string-search.h"
18 #include "src/zone/zone-chunk-list.h"
19 
20 namespace v8 {
21 namespace internal {
22 
23 namespace {
24 
25 // Returns -1 for failure.
GetArgcForReplaceCallable(uint32_t num_captures,bool has_named_captures)26 uint32_t GetArgcForReplaceCallable(uint32_t num_captures,
27                                    bool has_named_captures) {
28   const uint32_t kAdditionalArgsWithoutNamedCaptures = 2;
29   const uint32_t kAdditionalArgsWithNamedCaptures = 3;
30   if (num_captures > Code::kMaxArguments) return -1;
31   uint32_t argc = has_named_captures
32                       ? num_captures + kAdditionalArgsWithNamedCaptures
33                       : num_captures + kAdditionalArgsWithoutNamedCaptures;
34   STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() -
35                                           kAdditionalArgsWithNamedCaptures);
36   return (argc > Code::kMaxArguments) ? -1 : argc;
37 }
38 
39 // Looks up the capture of the given name. Returns the (1-based) numbered
40 // capture index or -1 on failure.
LookupNamedCapture(std::function<bool (String *)> name_matches,FixedArray * capture_name_map)41 int LookupNamedCapture(std::function<bool(String*)> name_matches,
42                        FixedArray* capture_name_map) {
43   // TODO(jgruber): Sort capture_name_map and do binary search via
44   // internalized strings.
45 
46   int maybe_capture_index = -1;
47   const int named_capture_count = capture_name_map->length() >> 1;
48   for (int j = 0; j < named_capture_count; j++) {
49     // The format of {capture_name_map} is documented at
50     // JSRegExp::kIrregexpCaptureNameMapIndex.
51     const int name_ix = j * 2;
52     const int index_ix = j * 2 + 1;
53 
54     String* capture_name = String::cast(capture_name_map->get(name_ix));
55     if (!name_matches(capture_name)) continue;
56 
57     maybe_capture_index = Smi::ToInt(capture_name_map->get(index_ix));
58     break;
59   }
60 
61   return maybe_capture_index;
62 }
63 
64 }  // namespace
65 
66 class CompiledReplacement {
67  public:
CompiledReplacement(Zone * zone)68   explicit CompiledReplacement(Zone* zone)
69       : parts_(zone), replacement_substrings_(zone) {}
70 
71   // Return whether the replacement is simple.
72   bool Compile(Isolate* isolate, Handle<JSRegExp> regexp,
73                Handle<String> replacement, int capture_count,
74                int subject_length);
75 
76   // Use Apply only if Compile returned false.
77   void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
78              int32_t* match);
79 
80   // Number of distinct parts of the replacement pattern.
parts()81   int parts() { return static_cast<int>(parts_.size()); }
82 
83  private:
84   enum PartType {
85     SUBJECT_PREFIX = 1,
86     SUBJECT_SUFFIX,
87     SUBJECT_CAPTURE,
88     REPLACEMENT_SUBSTRING,
89     REPLACEMENT_STRING,
90     EMPTY_REPLACEMENT,
91     NUMBER_OF_PART_TYPES
92   };
93 
94   struct ReplacementPart {
SubjectMatchv8::internal::CompiledReplacement::ReplacementPart95     static inline ReplacementPart SubjectMatch() {
96       return ReplacementPart(SUBJECT_CAPTURE, 0);
97     }
SubjectCapturev8::internal::CompiledReplacement::ReplacementPart98     static inline ReplacementPart SubjectCapture(int capture_index) {
99       return ReplacementPart(SUBJECT_CAPTURE, capture_index);
100     }
SubjectPrefixv8::internal::CompiledReplacement::ReplacementPart101     static inline ReplacementPart SubjectPrefix() {
102       return ReplacementPart(SUBJECT_PREFIX, 0);
103     }
SubjectSuffixv8::internal::CompiledReplacement::ReplacementPart104     static inline ReplacementPart SubjectSuffix(int subject_length) {
105       return ReplacementPart(SUBJECT_SUFFIX, subject_length);
106     }
ReplacementStringv8::internal::CompiledReplacement::ReplacementPart107     static inline ReplacementPart ReplacementString() {
108       return ReplacementPart(REPLACEMENT_STRING, 0);
109     }
EmptyReplacementv8::internal::CompiledReplacement::ReplacementPart110     static inline ReplacementPart EmptyReplacement() {
111       return ReplacementPart(EMPTY_REPLACEMENT, 0);
112     }
ReplacementSubStringv8::internal::CompiledReplacement::ReplacementPart113     static inline ReplacementPart ReplacementSubString(int from, int to) {
114       DCHECK_LE(0, from);
115       DCHECK_GT(to, from);
116       return ReplacementPart(-from, to);
117     }
118 
119     // If tag <= 0 then it is the negation of a start index of a substring of
120     // the replacement pattern, otherwise it's a value from PartType.
ReplacementPartv8::internal::CompiledReplacement::ReplacementPart121     ReplacementPart(int tag, int data) : tag(tag), data(data) {
122       // Must be non-positive or a PartType value.
123       DCHECK(tag < NUMBER_OF_PART_TYPES);
124     }
125     // Either a value of PartType or a non-positive number that is
126     // the negation of an index into the replacement string.
127     int tag;
128     // The data value's interpretation depends on the value of tag:
129     // tag == SUBJECT_PREFIX ||
130     // tag == SUBJECT_SUFFIX:  data is unused.
131     // tag == SUBJECT_CAPTURE: data is the number of the capture.
132     // tag == REPLACEMENT_SUBSTRING ||
133     // tag == REPLACEMENT_STRING:    data is index into array of substrings
134     //                               of the replacement string.
135     // tag == EMPTY_REPLACEMENT: data is unused.
136     // tag <= 0: Temporary representation of the substring of the replacement
137     //           string ranging over -tag .. data.
138     //           Is replaced by REPLACEMENT_{SUB,}STRING when we create the
139     //           substring objects.
140     int data;
141   };
142 
143   template <typename Char>
ParseReplacementPattern(ZoneChunkList<ReplacementPart> * parts,Vector<Char> characters,FixedArray * capture_name_map,int capture_count,int subject_length)144   bool ParseReplacementPattern(ZoneChunkList<ReplacementPart>* parts,
145                                Vector<Char> characters,
146                                FixedArray* capture_name_map, int capture_count,
147                                int subject_length) {
148     // Equivalent to String::GetSubstitution, except that this method converts
149     // the replacement string into an internal representation that avoids
150     // repeated parsing when used repeatedly.
151     int length = characters.length();
152     int last = 0;
153     for (int i = 0; i < length; i++) {
154       Char c = characters[i];
155       if (c == '$') {
156         int next_index = i + 1;
157         if (next_index == length) {  // No next character!
158           break;
159         }
160         Char c2 = characters[next_index];
161         switch (c2) {
162           case '$':
163             if (i > last) {
164               // There is a substring before. Include the first "$".
165               parts->push_back(
166                   ReplacementPart::ReplacementSubString(last, next_index));
167               last = next_index + 1;  // Continue after the second "$".
168             } else {
169               // Let the next substring start with the second "$".
170               last = next_index;
171             }
172             i = next_index;
173             break;
174           case '`':
175             if (i > last) {
176               parts->push_back(ReplacementPart::ReplacementSubString(last, i));
177             }
178             parts->push_back(ReplacementPart::SubjectPrefix());
179             i = next_index;
180             last = i + 1;
181             break;
182           case '\'':
183             if (i > last) {
184               parts->push_back(ReplacementPart::ReplacementSubString(last, i));
185             }
186             parts->push_back(ReplacementPart::SubjectSuffix(subject_length));
187             i = next_index;
188             last = i + 1;
189             break;
190           case '&':
191             if (i > last) {
192               parts->push_back(ReplacementPart::ReplacementSubString(last, i));
193             }
194             parts->push_back(ReplacementPart::SubjectMatch());
195             i = next_index;
196             last = i + 1;
197             break;
198           case '0':
199           case '1':
200           case '2':
201           case '3':
202           case '4':
203           case '5':
204           case '6':
205           case '7':
206           case '8':
207           case '9': {
208             int capture_ref = c2 - '0';
209             if (capture_ref > capture_count) {
210               i = next_index;
211               continue;
212             }
213             int second_digit_index = next_index + 1;
214             if (second_digit_index < length) {
215               // Peek ahead to see if we have two digits.
216               Char c3 = characters[second_digit_index];
217               if ('0' <= c3 && c3 <= '9') {  // Double digits.
218                 int double_digit_ref = capture_ref * 10 + c3 - '0';
219                 if (double_digit_ref <= capture_count) {
220                   next_index = second_digit_index;
221                   capture_ref = double_digit_ref;
222                 }
223               }
224             }
225             if (capture_ref > 0) {
226               if (i > last) {
227                 parts->push_back(
228                     ReplacementPart::ReplacementSubString(last, i));
229               }
230               DCHECK(capture_ref <= capture_count);
231               parts->push_back(ReplacementPart::SubjectCapture(capture_ref));
232               last = next_index + 1;
233             }
234             i = next_index;
235             break;
236           }
237           case '<': {
238             if (capture_name_map == nullptr) {
239               i = next_index;
240               break;
241             }
242 
243             // Scan until the next '>', and let the enclosed substring be the
244             // groupName.
245 
246             const int name_start_index = next_index + 1;
247             int closing_bracket_index = -1;
248             for (int j = name_start_index; j < length; j++) {
249               if (characters[j] == '>') {
250                 closing_bracket_index = j;
251                 break;
252               }
253             }
254 
255             // If no closing bracket is found, '$<' is treated as a string
256             // literal.
257             if (closing_bracket_index == -1) {
258               i = next_index;
259               break;
260             }
261 
262             Vector<Char> requested_name =
263                 characters.SubVector(name_start_index, closing_bracket_index);
264 
265             // Let capture be ? Get(namedCaptures, groupName).
266 
267             const int capture_index = LookupNamedCapture(
268                 [=](String* capture_name) {
269                   return capture_name->IsEqualTo(requested_name);
270                 },
271                 capture_name_map);
272 
273             // If capture is undefined or does not exist, replace the text
274             // through the following '>' with the empty string.
275             // Otherwise, replace the text through the following '>' with
276             // ? ToString(capture).
277 
278             DCHECK(capture_index == -1 ||
279                    (1 <= capture_index && capture_index <= capture_count));
280 
281             if (i > last) {
282               parts->push_back(ReplacementPart::ReplacementSubString(last, i));
283             }
284             parts->push_back(
285                 (capture_index == -1)
286                     ? ReplacementPart::EmptyReplacement()
287                     : ReplacementPart::SubjectCapture(capture_index));
288             last = closing_bracket_index + 1;
289             i = closing_bracket_index;
290             break;
291           }
292           default:
293             i = next_index;
294             break;
295         }
296       }
297     }
298     if (length > last) {
299       if (last == 0) {
300         // Replacement is simple.  Do not use Apply to do the replacement.
301         return true;
302       } else {
303         parts->push_back(ReplacementPart::ReplacementSubString(last, length));
304       }
305     }
306     return false;
307   }
308 
309   ZoneChunkList<ReplacementPart> parts_;
310   ZoneVector<Handle<String>> replacement_substrings_;
311 };
312 
Compile(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> replacement,int capture_count,int subject_length)313 bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp,
314                                   Handle<String> replacement, int capture_count,
315                                   int subject_length) {
316   {
317     DisallowHeapAllocation no_gc;
318     String::FlatContent content = replacement->GetFlatContent();
319     DCHECK(content.IsFlat());
320 
321     FixedArray* capture_name_map = nullptr;
322     if (capture_count > 0) {
323       DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
324       Object* maybe_capture_name_map = regexp->CaptureNameMap();
325       if (maybe_capture_name_map->IsFixedArray()) {
326         capture_name_map = FixedArray::cast(maybe_capture_name_map);
327       }
328     }
329 
330     bool simple;
331     if (content.IsOneByte()) {
332       simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
333                                        capture_name_map, capture_count,
334                                        subject_length);
335     } else {
336       DCHECK(content.IsTwoByte());
337       simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
338                                        capture_name_map, capture_count,
339                                        subject_length);
340     }
341     if (simple) return true;
342   }
343 
344   // Find substrings of replacement string and create them as String objects.
345   int substring_index = 0;
346   for (ReplacementPart& part : parts_) {
347     int tag = part.tag;
348     if (tag <= 0) {  // A replacement string slice.
349       int from = -tag;
350       int to = part.data;
351       replacement_substrings_.push_back(
352           isolate->factory()->NewSubString(replacement, from, to));
353       part.tag = REPLACEMENT_SUBSTRING;
354       part.data = substring_index;
355       substring_index++;
356     } else if (tag == REPLACEMENT_STRING) {
357       replacement_substrings_.push_back(replacement);
358       part.data = substring_index;
359       substring_index++;
360     }
361   }
362   return false;
363 }
364 
365 
Apply(ReplacementStringBuilder * builder,int match_from,int match_to,int32_t * match)366 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
367                                 int match_from, int match_to, int32_t* match) {
368   DCHECK_LT(0, parts_.size());
369   for (ReplacementPart& part : parts_) {
370     switch (part.tag) {
371       case SUBJECT_PREFIX:
372         if (match_from > 0) builder->AddSubjectSlice(0, match_from);
373         break;
374       case SUBJECT_SUFFIX: {
375         int subject_length = part.data;
376         if (match_to < subject_length) {
377           builder->AddSubjectSlice(match_to, subject_length);
378         }
379         break;
380       }
381       case SUBJECT_CAPTURE: {
382         int capture = part.data;
383         int from = match[capture * 2];
384         int to = match[capture * 2 + 1];
385         if (from >= 0 && to > from) {
386           builder->AddSubjectSlice(from, to);
387         }
388         break;
389       }
390       case REPLACEMENT_SUBSTRING:
391       case REPLACEMENT_STRING:
392         builder->AddString(replacement_substrings_[part.data]);
393         break;
394       case EMPTY_REPLACEMENT:
395         break;
396       default:
397         UNREACHABLE();
398     }
399   }
400 }
401 
FindOneByteStringIndices(Vector<const uint8_t> subject,uint8_t pattern,std::vector<int> * indices,unsigned int limit)402 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
403                               std::vector<int>* indices, unsigned int limit) {
404   DCHECK_LT(0, limit);
405   // Collect indices of pattern in subject using memchr.
406   // Stop after finding at most limit values.
407   const uint8_t* subject_start = subject.start();
408   const uint8_t* subject_end = subject_start + subject.length();
409   const uint8_t* pos = subject_start;
410   while (limit > 0) {
411     pos = reinterpret_cast<const uint8_t*>(
412         memchr(pos, pattern, subject_end - pos));
413     if (pos == nullptr) return;
414     indices->push_back(static_cast<int>(pos - subject_start));
415     pos++;
416     limit--;
417   }
418 }
419 
FindTwoByteStringIndices(const Vector<const uc16> subject,uc16 pattern,std::vector<int> * indices,unsigned int limit)420 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
421                               std::vector<int>* indices, unsigned int limit) {
422   DCHECK_LT(0, limit);
423   const uc16* subject_start = subject.start();
424   const uc16* subject_end = subject_start + subject.length();
425   for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
426     if (*pos == pattern) {
427       indices->push_back(static_cast<int>(pos - subject_start));
428       limit--;
429     }
430   }
431 }
432 
433 template <typename SubjectChar, typename PatternChar>
FindStringIndices(Isolate * isolate,Vector<const SubjectChar> subject,Vector<const PatternChar> pattern,std::vector<int> * indices,unsigned int limit)434 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
435                        Vector<const PatternChar> pattern,
436                        std::vector<int>* indices, unsigned int limit) {
437   DCHECK_LT(0, limit);
438   // Collect indices of pattern in subject.
439   // Stop after finding at most limit values.
440   int pattern_length = pattern.length();
441   int index = 0;
442   StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
443   while (limit > 0) {
444     index = search.Search(subject, index);
445     if (index < 0) return;
446     indices->push_back(index);
447     index += pattern_length;
448     limit--;
449   }
450 }
451 
FindStringIndicesDispatch(Isolate * isolate,String * subject,String * pattern,std::vector<int> * indices,unsigned int limit)452 void FindStringIndicesDispatch(Isolate* isolate, String* subject,
453                                String* pattern, std::vector<int>* indices,
454                                unsigned int limit) {
455   {
456     DisallowHeapAllocation no_gc;
457     String::FlatContent subject_content = subject->GetFlatContent();
458     String::FlatContent pattern_content = pattern->GetFlatContent();
459     DCHECK(subject_content.IsFlat());
460     DCHECK(pattern_content.IsFlat());
461     if (subject_content.IsOneByte()) {
462       Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
463       if (pattern_content.IsOneByte()) {
464         Vector<const uint8_t> pattern_vector =
465             pattern_content.ToOneByteVector();
466         if (pattern_vector.length() == 1) {
467           FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
468                                    limit);
469         } else {
470           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
471                             limit);
472         }
473       } else {
474         FindStringIndices(isolate, subject_vector,
475                           pattern_content.ToUC16Vector(), indices, limit);
476       }
477     } else {
478       Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
479       if (pattern_content.IsOneByte()) {
480         Vector<const uint8_t> pattern_vector =
481             pattern_content.ToOneByteVector();
482         if (pattern_vector.length() == 1) {
483           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
484                                    limit);
485         } else {
486           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
487                             limit);
488         }
489       } else {
490         Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
491         if (pattern_vector.length() == 1) {
492           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
493                                    limit);
494         } else {
495           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
496                             limit);
497         }
498       }
499     }
500   }
501 }
502 
503 namespace {
GetRewoundRegexpIndicesList(Isolate * isolate)504 std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
505   std::vector<int>* list = isolate->regexp_indices();
506   list->clear();
507   return list;
508 }
509 
TruncateRegexpIndicesList(Isolate * isolate)510 void TruncateRegexpIndicesList(Isolate* isolate) {
511   // Same size as smallest zone segment, preserving behavior from the
512   // runtime zone.
513   static const int kMaxRegexpIndicesListCapacity = 8 * KB;
514   std::vector<int>* indicies = isolate->regexp_indices();
515   if (indicies->capacity() > kMaxRegexpIndicesListCapacity) {
516     // Throw away backing storage.
517     indicies->clear();
518     indicies->shrink_to_fit();
519   }
520 }
521 }  // namespace
522 
523 template <typename ResultSeqString>
StringReplaceGlobalAtomRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> pattern_regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)524 V8_WARN_UNUSED_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
525     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
526     Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
527   DCHECK(subject->IsFlat());
528   DCHECK(replacement->IsFlat());
529 
530   std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
531 
532   DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
533   String* pattern =
534       String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
535   int subject_len = subject->length();
536   int pattern_len = pattern->length();
537   int replacement_len = replacement->length();
538 
539   FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF);
540 
541   if (indices->empty()) return *subject;
542 
543   // Detect integer overflow.
544   int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
545                            static_cast<int64_t>(pattern_len)) *
546                               static_cast<int64_t>(indices->size()) +
547                           static_cast<int64_t>(subject_len);
548   int result_len;
549   if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
550     STATIC_ASSERT(String::kMaxLength < kMaxInt);
551     result_len = kMaxInt;  // Provoke exception.
552   } else {
553     result_len = static_cast<int>(result_len_64);
554   }
555   if (result_len == 0) {
556     return ReadOnlyRoots(isolate).empty_string();
557   }
558 
559   int subject_pos = 0;
560   int result_pos = 0;
561 
562   MaybeHandle<SeqString> maybe_res;
563   if (ResultSeqString::kHasOneByteEncoding) {
564     maybe_res = isolate->factory()->NewRawOneByteString(result_len);
565   } else {
566     maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
567   }
568   Handle<SeqString> untyped_res;
569   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
570   Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
571 
572   for (int index : *indices) {
573     // Copy non-matched subject content.
574     if (subject_pos < index) {
575       String::WriteToFlat(*subject, result->GetChars() + result_pos,
576                           subject_pos, index);
577       result_pos += index - subject_pos;
578     }
579 
580     // Replace match.
581     if (replacement_len > 0) {
582       String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
583                           replacement_len);
584       result_pos += replacement_len;
585     }
586 
587     subject_pos = index + pattern_len;
588   }
589   // Add remaining subject content at the end.
590   if (subject_pos < subject_len) {
591     String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
592                         subject_len);
593   }
594 
595   int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
596   RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, 0,
597                                match_indices);
598 
599   TruncateRegexpIndicesList(isolate);
600 
601   return *result;
602 }
603 
StringReplaceGlobalRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)604 V8_WARN_UNUSED_RESULT static Object* StringReplaceGlobalRegExpWithString(
605     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
606     Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
607   DCHECK(subject->IsFlat());
608   DCHECK(replacement->IsFlat());
609 
610   int capture_count = regexp->CaptureCount();
611   int subject_length = subject->length();
612 
613   JSRegExp::Type typeTag = regexp->TypeTag();
614   if (typeTag == JSRegExp::IRREGEXP) {
615     // Ensure the RegExp is compiled so we can access the capture-name map.
616     if (RegExpImpl::IrregexpPrepare(isolate, regexp, subject) == -1) {
617       DCHECK(isolate->has_pending_exception());
618       return ReadOnlyRoots(isolate).exception();
619     }
620   }
621 
622   // CompiledReplacement uses zone allocation.
623   Zone zone(isolate->allocator(), ZONE_NAME);
624   CompiledReplacement compiled_replacement(&zone);
625   const bool simple_replace = compiled_replacement.Compile(
626       isolate, regexp, replacement, capture_count, subject_length);
627 
628   // Shortcut for simple non-regexp global replacements
629   if (typeTag == JSRegExp::ATOM && simple_replace) {
630     if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
631       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
632           isolate, subject, regexp, replacement, last_match_info);
633     } else {
634       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
635           isolate, subject, regexp, replacement, last_match_info);
636     }
637   }
638 
639   RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
640   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
641 
642   int32_t* current_match = global_cache.FetchNext();
643   if (current_match == nullptr) {
644     if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
645     return *subject;
646   }
647 
648   // Guessing the number of parts that the final result string is built
649   // from. Global regexps can match any number of times, so we guess
650   // conservatively.
651   int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
652   ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
653 
654   // Number of parts added by compiled replacement plus preceding
655   // string and possibly suffix after last match.  It is possible for
656   // all components to use two elements when encoded as two smis.
657   const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
658 
659   int prev = 0;
660 
661   do {
662     builder.EnsureCapacity(parts_added_per_loop);
663 
664     int start = current_match[0];
665     int end = current_match[1];
666 
667     if (prev < start) {
668       builder.AddSubjectSlice(prev, start);
669     }
670 
671     if (simple_replace) {
672       builder.AddString(replacement);
673     } else {
674       compiled_replacement.Apply(&builder, start, end, current_match);
675     }
676     prev = end;
677 
678     current_match = global_cache.FetchNext();
679   } while (current_match != nullptr);
680 
681   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
682 
683   if (prev < subject_length) {
684     builder.EnsureCapacity(2);
685     builder.AddSubjectSlice(prev, subject_length);
686   }
687 
688   RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
689                                global_cache.LastSuccessfulMatch());
690 
691   RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
692 }
693 
694 template <typename ResultSeqString>
StringReplaceGlobalRegExpWithEmptyString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_info)695 V8_WARN_UNUSED_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
696     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
697     Handle<RegExpMatchInfo> last_match_info) {
698   DCHECK(subject->IsFlat());
699 
700   // Shortcut for simple non-regexp global replacements
701   if (regexp->TypeTag() == JSRegExp::ATOM) {
702     Handle<String> empty_string = isolate->factory()->empty_string();
703     if (subject->IsOneByteRepresentation()) {
704       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
705           isolate, subject, regexp, empty_string, last_match_info);
706     } else {
707       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
708           isolate, subject, regexp, empty_string, last_match_info);
709     }
710   }
711 
712   RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
713   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
714 
715   int32_t* current_match = global_cache.FetchNext();
716   if (current_match == nullptr) {
717     if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
718     return *subject;
719   }
720 
721   int start = current_match[0];
722   int end = current_match[1];
723   int capture_count = regexp->CaptureCount();
724   int subject_length = subject->length();
725 
726   int new_length = subject_length - (end - start);
727   if (new_length == 0) return ReadOnlyRoots(isolate).empty_string();
728 
729   Handle<ResultSeqString> answer;
730   if (ResultSeqString::kHasOneByteEncoding) {
731     answer = Handle<ResultSeqString>::cast(
732         isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
733   } else {
734     answer = Handle<ResultSeqString>::cast(
735         isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
736   }
737 
738   int prev = 0;
739   int position = 0;
740 
741   do {
742     start = current_match[0];
743     end = current_match[1];
744     if (prev < start) {
745       // Add substring subject[prev;start] to answer string.
746       String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
747       position += start - prev;
748     }
749     prev = end;
750 
751     current_match = global_cache.FetchNext();
752   } while (current_match != nullptr);
753 
754   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
755 
756   RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
757                                global_cache.LastSuccessfulMatch());
758 
759   if (prev < subject_length) {
760     // Add substring subject[prev;length] to answer string.
761     String::WriteToFlat(*subject, answer->GetChars() + position, prev,
762                         subject_length);
763     position += subject_length - prev;
764   }
765 
766   if (position == 0) return ReadOnlyRoots(isolate).empty_string();
767 
768   // Shorten string and fill
769   int string_size = ResultSeqString::SizeFor(position);
770   int allocated_string_size = ResultSeqString::SizeFor(new_length);
771   int delta = allocated_string_size - string_size;
772 
773   answer->set_length(position);
774   if (delta == 0) return *answer;
775 
776   Address end_of_string = answer->address() + string_size;
777   Heap* heap = isolate->heap();
778 
779   // The trimming is performed on a newly allocated object, which is on a
780   // freshly allocated page or on an already swept page. Hence, the sweeper
781   // thread can not get confused with the filler creation. No synchronization
782   // needed.
783   // TODO(hpayer): We should shrink the large object page if the size
784   // of the object changed significantly.
785   if (!heap->lo_space()->Contains(*answer)) {
786     heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
787   }
788   return *answer;
789 }
790 
791 namespace {
792 
StringReplaceGlobalRegExpWithStringHelper(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> subject,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)793 Object* StringReplaceGlobalRegExpWithStringHelper(
794     Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
795     Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
796   CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
797 
798   subject = String::Flatten(isolate, subject);
799 
800   if (replacement->length() == 0) {
801     if (subject->HasOnlyOneByteChars()) {
802       return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
803           isolate, subject, regexp, last_match_info);
804     } else {
805       return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
806           isolate, subject, regexp, last_match_info);
807     }
808   }
809 
810   replacement = String::Flatten(isolate, replacement);
811 
812   return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
813                                              replacement, last_match_info);
814 }
815 
816 }  // namespace
817 
RUNTIME_FUNCTION(Runtime_StringSplit)818 RUNTIME_FUNCTION(Runtime_StringSplit) {
819   HandleScope handle_scope(isolate);
820   DCHECK_EQ(3, args.length());
821   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
822   CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
823   CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
824   CHECK_LT(0, limit);
825 
826   int subject_length = subject->length();
827   int pattern_length = pattern->length();
828   CHECK_LT(0, pattern_length);
829 
830   if (limit == 0xFFFFFFFFu) {
831     FixedArray* last_match_cache_unused;
832     Handle<Object> cached_answer(
833         RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
834                                    &last_match_cache_unused,
835                                    RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
836         isolate);
837     if (*cached_answer != Smi::kZero) {
838       // The cache FixedArray is a COW-array and can therefore be reused.
839       Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
840           Handle<FixedArray>::cast(cached_answer));
841       return *result;
842     }
843   }
844 
845   // The limit can be very large (0xFFFFFFFFu), but since the pattern
846   // isn't empty, we can never create more parts than ~half the length
847   // of the subject.
848 
849   subject = String::Flatten(isolate, subject);
850   pattern = String::Flatten(isolate, pattern);
851 
852   std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
853 
854   FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
855 
856   if (static_cast<uint32_t>(indices->size()) < limit) {
857     indices->push_back(subject_length);
858   }
859 
860   // The list indices now contains the end of each part to create.
861 
862   // Create JSArray of substrings separated by separator.
863   int part_count = static_cast<int>(indices->size());
864 
865   Handle<JSArray> result =
866       isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count,
867                                      INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
868 
869   DCHECK(result->HasObjectElements());
870 
871   Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate);
872 
873   if (part_count == 1 && indices->at(0) == subject_length) {
874     elements->set(0, *subject);
875   } else {
876     int part_start = 0;
877     FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
878       int part_end = indices->at(i);
879       Handle<String> substring =
880           isolate->factory()->NewProperSubString(subject, part_start, part_end);
881       elements->set(i, *substring);
882       part_start = part_end + pattern_length;
883     });
884   }
885 
886   if (limit == 0xFFFFFFFFu) {
887     if (result->HasObjectElements()) {
888       RegExpResultsCache::Enter(isolate, subject, pattern, elements,
889                                 isolate->factory()->empty_fixed_array(),
890                                 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
891     }
892   }
893 
894   TruncateRegexpIndicesList(isolate);
895 
896   return *result;
897 }
898 
RUNTIME_FUNCTION(Runtime_RegExpExec)899 RUNTIME_FUNCTION(Runtime_RegExpExec) {
900   HandleScope scope(isolate);
901   DCHECK_EQ(4, args.length());
902   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
903   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
904   CONVERT_INT32_ARG_CHECKED(index, 2);
905   CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
906   // Due to the way the JS calls are constructed this must be less than the
907   // length of a string, i.e. it is always a Smi.  We check anyway for security.
908   CHECK_LE(0, index);
909   CHECK_GE(subject->length(), index);
910   isolate->counters()->regexp_entry_runtime()->Increment();
911   RETURN_RESULT_OR_FAILURE(isolate, RegExpImpl::Exec(isolate, regexp, subject,
912                                                      index, last_match_info));
913 }
914 
RUNTIME_FUNCTION(Runtime_RegExpInternalReplace)915 RUNTIME_FUNCTION(Runtime_RegExpInternalReplace) {
916   HandleScope scope(isolate);
917   DCHECK_EQ(3, args.length());
918   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
919   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
920   CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
921 
922   Handle<RegExpMatchInfo> internal_match_info =
923       isolate->regexp_internal_match_info();
924 
925   return StringReplaceGlobalRegExpWithStringHelper(
926       isolate, regexp, subject, replacement, internal_match_info);
927 }
928 
929 namespace {
930 
931 class MatchInfoBackedMatch : public String::Match {
932  public:
MatchInfoBackedMatch(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> subject,Handle<RegExpMatchInfo> match_info)933   MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
934                        Handle<String> subject,
935                        Handle<RegExpMatchInfo> match_info)
936       : isolate_(isolate), match_info_(match_info) {
937     subject_ = String::Flatten(isolate, subject);
938 
939     if (regexp->TypeTag() == JSRegExp::IRREGEXP) {
940       Object* o = regexp->CaptureNameMap();
941       has_named_captures_ = o->IsFixedArray();
942       if (has_named_captures_) {
943         capture_name_map_ = handle(FixedArray::cast(o), isolate);
944       }
945     } else {
946       has_named_captures_ = false;
947     }
948   }
949 
GetMatch()950   Handle<String> GetMatch() override {
951     return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
952   }
953 
GetPrefix()954   Handle<String> GetPrefix() override {
955     const int match_start = match_info_->Capture(0);
956     return isolate_->factory()->NewSubString(subject_, 0, match_start);
957   }
958 
GetSuffix()959   Handle<String> GetSuffix() override {
960     const int match_end = match_info_->Capture(1);
961     return isolate_->factory()->NewSubString(subject_, match_end,
962                                              subject_->length());
963   }
964 
HasNamedCaptures()965   bool HasNamedCaptures() override { return has_named_captures_; }
966 
CaptureCount()967   int CaptureCount() override {
968     return match_info_->NumberOfCaptureRegisters() / 2;
969   }
970 
GetCapture(int i,bool * capture_exists)971   MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
972     Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
973         isolate_, match_info_, i, capture_exists);
974     return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
975                              : isolate_->factory()->empty_string();
976   }
977 
GetNamedCapture(Handle<String> name,CaptureState * state)978   MaybeHandle<String> GetNamedCapture(Handle<String> name,
979                                       CaptureState* state) override {
980     DCHECK(has_named_captures_);
981     const int capture_index = LookupNamedCapture(
982         [=](String* capture_name) { return capture_name->Equals(*name); },
983         *capture_name_map_);
984 
985     if (capture_index == -1) {
986       *state = INVALID;
987       return name;  // Arbitrary string handle.
988     }
989 
990     DCHECK(1 <= capture_index && capture_index <= CaptureCount());
991 
992     bool capture_exists;
993     Handle<String> capture_value;
994     ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
995                                GetCapture(capture_index, &capture_exists),
996                                String);
997 
998     if (!capture_exists) {
999       *state = UNMATCHED;
1000       return isolate_->factory()->empty_string();
1001     } else {
1002       *state = MATCHED;
1003       return capture_value;
1004     }
1005   }
1006 
1007  private:
1008   Isolate* isolate_;
1009   Handle<String> subject_;
1010   Handle<RegExpMatchInfo> match_info_;
1011 
1012   bool has_named_captures_;
1013   Handle<FixedArray> capture_name_map_;
1014 };
1015 
1016 class VectorBackedMatch : public String::Match {
1017  public:
VectorBackedMatch(Isolate * isolate,Handle<String> subject,Handle<String> match,int match_position,ZoneVector<Handle<Object>> * captures,Handle<Object> groups_obj)1018   VectorBackedMatch(Isolate* isolate, Handle<String> subject,
1019                     Handle<String> match, int match_position,
1020                     ZoneVector<Handle<Object>>* captures,
1021                     Handle<Object> groups_obj)
1022       : isolate_(isolate),
1023         match_(match),
1024         match_position_(match_position),
1025         captures_(captures) {
1026     subject_ = String::Flatten(isolate, subject);
1027 
1028     DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
1029     has_named_captures_ = !groups_obj->IsUndefined(isolate);
1030     if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
1031   }
1032 
GetMatch()1033   Handle<String> GetMatch() override { return match_; }
1034 
GetPrefix()1035   Handle<String> GetPrefix() override {
1036     return isolate_->factory()->NewSubString(subject_, 0, match_position_);
1037   }
1038 
GetSuffix()1039   Handle<String> GetSuffix() override {
1040     const int match_end_position = match_position_ + match_->length();
1041     return isolate_->factory()->NewSubString(subject_, match_end_position,
1042                                              subject_->length());
1043   }
1044 
HasNamedCaptures()1045   bool HasNamedCaptures() override { return has_named_captures_; }
1046 
CaptureCount()1047   int CaptureCount() override { return static_cast<int>(captures_->size()); }
1048 
GetCapture(int i,bool * capture_exists)1049   MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1050     Handle<Object> capture_obj = captures_->at(i);
1051     if (capture_obj->IsUndefined(isolate_)) {
1052       *capture_exists = false;
1053       return isolate_->factory()->empty_string();
1054     }
1055     *capture_exists = true;
1056     return Object::ToString(isolate_, capture_obj);
1057   }
1058 
GetNamedCapture(Handle<String> name,CaptureState * state)1059   MaybeHandle<String> GetNamedCapture(Handle<String> name,
1060                                       CaptureState* state) override {
1061     DCHECK(has_named_captures_);
1062 
1063     Maybe<bool> maybe_capture_exists =
1064         JSReceiver::HasProperty(groups_obj_, name);
1065     if (maybe_capture_exists.IsNothing()) return MaybeHandle<String>();
1066 
1067     if (!maybe_capture_exists.FromJust()) {
1068       *state = INVALID;
1069       return name;  // Arbitrary string handle.
1070     }
1071 
1072     Handle<Object> capture_obj;
1073     ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1074                                Object::GetProperty(isolate_, groups_obj_, name),
1075                                String);
1076     if (capture_obj->IsUndefined(isolate_)) {
1077       *state = UNMATCHED;
1078       return isolate_->factory()->empty_string();
1079     } else {
1080       *state = MATCHED;
1081       return Object::ToString(isolate_, capture_obj);
1082     }
1083   }
1084 
1085  private:
1086   Isolate* isolate_;
1087   Handle<String> subject_;
1088   Handle<String> match_;
1089   const int match_position_;
1090   ZoneVector<Handle<Object>>* captures_;
1091 
1092   bool has_named_captures_;
1093   Handle<JSReceiver> groups_obj_;
1094 };
1095 
1096 // Create the groups object (see also the RegExp result creation in
1097 // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
ConstructNamedCaptureGroupsObject(Isolate * isolate,Handle<FixedArray> capture_map,std::function<Object * (int)> f_get_capture)1098 Handle<JSObject> ConstructNamedCaptureGroupsObject(
1099     Isolate* isolate, Handle<FixedArray> capture_map,
1100     std::function<Object*(int)> f_get_capture) {
1101   Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
1102 
1103   const int capture_count = capture_map->length() >> 1;
1104   for (int i = 0; i < capture_count; i++) {
1105     const int name_ix = i * 2;
1106     const int index_ix = i * 2 + 1;
1107 
1108     Handle<String> capture_name(String::cast(capture_map->get(name_ix)),
1109                                 isolate);
1110     const int capture_ix = Smi::ToInt(capture_map->get(index_ix));
1111     DCHECK(1 <= capture_ix && capture_ix <= capture_count);
1112 
1113     Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
1114     DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString());
1115 
1116     JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE);
1117   }
1118 
1119   return groups;
1120 }
1121 
1122 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1123 // separate last match info.  See comment on that function.
1124 template <bool has_capture>
SearchRegExpMultiple(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_array,Handle<JSArray> result_array)1125 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
1126                                     Handle<JSRegExp> regexp,
1127                                     Handle<RegExpMatchInfo> last_match_array,
1128                                     Handle<JSArray> result_array) {
1129   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1130   DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
1131   DCHECK(subject->IsFlat());
1132 
1133   int capture_count = regexp->CaptureCount();
1134   int subject_length = subject->length();
1135 
1136   static const int kMinLengthToCache = 0x1000;
1137 
1138   if (subject_length > kMinLengthToCache) {
1139     FixedArray* last_match_cache;
1140     Object* cached_answer = RegExpResultsCache::Lookup(
1141         isolate->heap(), *subject, regexp->data(), &last_match_cache,
1142         RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1143     if (cached_answer->IsFixedArray()) {
1144       int capture_registers = (capture_count + 1) * 2;
1145       int32_t* last_match = NewArray<int32_t>(capture_registers);
1146       for (int i = 0; i < capture_registers; i++) {
1147         last_match[i] = Smi::ToInt(last_match_cache->get(i));
1148       }
1149       Handle<FixedArray> cached_fixed_array =
1150           Handle<FixedArray>(FixedArray::cast(cached_answer), isolate);
1151       // The cache FixedArray is a COW-array and we need to return a copy.
1152       Handle<FixedArray> copied_fixed_array =
1153           isolate->factory()->CopyFixedArrayWithMap(
1154               cached_fixed_array, isolate->factory()->fixed_array_map());
1155       JSArray::SetContent(result_array, copied_fixed_array);
1156       RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
1157                                    capture_count, last_match);
1158       DeleteArray(last_match);
1159       return *result_array;
1160     }
1161   }
1162 
1163   RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
1164   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1165 
1166   // Ensured in Runtime_RegExpExecMultiple.
1167   DCHECK(result_array->HasObjectElements());
1168   Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()),
1169                                      isolate);
1170   if (result_elements->length() < 16) {
1171     result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
1172   }
1173 
1174   FixedArrayBuilder builder(result_elements);
1175 
1176   // Position to search from.
1177   int match_start = -1;
1178   int match_end = 0;
1179   bool first = true;
1180 
1181   // Two smis before and after the match, for very long strings.
1182   static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1183 
1184   while (true) {
1185     int32_t* current_match = global_cache.FetchNext();
1186     if (current_match == nullptr) break;
1187     match_start = current_match[0];
1188     builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch);
1189     if (match_end < match_start) {
1190       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1191                                                 match_start);
1192     }
1193     match_end = current_match[1];
1194     {
1195       // Avoid accumulating new handles inside loop.
1196       HandleScope temp_scope(isolate);
1197       Handle<String> match;
1198       if (!first) {
1199         match = isolate->factory()->NewProperSubString(subject, match_start,
1200                                                        match_end);
1201       } else {
1202         match =
1203             isolate->factory()->NewSubString(subject, match_start, match_end);
1204         first = false;
1205       }
1206 
1207       if (has_capture) {
1208         // Arguments array to replace function is match, captures, index and
1209         // subject, i.e., 3 + capture count in total. If the RegExp contains
1210         // named captures, they are also passed as the last argument.
1211 
1212         Handle<Object> maybe_capture_map(regexp->CaptureNameMap(), isolate);
1213         const bool has_named_captures = maybe_capture_map->IsFixedArray();
1214 
1215         const int argc =
1216             has_named_captures ? 4 + capture_count : 3 + capture_count;
1217 
1218         Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1219         int cursor = 0;
1220 
1221         elements->set(cursor++, *match);
1222         for (int i = 1; i <= capture_count; i++) {
1223           int start = current_match[i * 2];
1224           if (start >= 0) {
1225             int end = current_match[i * 2 + 1];
1226             DCHECK(start <= end);
1227             Handle<String> substring =
1228                 isolate->factory()->NewSubString(subject, start, end);
1229             elements->set(cursor++, *substring);
1230           } else {
1231             DCHECK_GT(0, current_match[i * 2 + 1]);
1232             elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value());
1233           }
1234         }
1235 
1236         elements->set(cursor++, Smi::FromInt(match_start));
1237         elements->set(cursor++, *subject);
1238 
1239         if (has_named_captures) {
1240           Handle<FixedArray> capture_map =
1241               Handle<FixedArray>::cast(maybe_capture_map);
1242           Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1243               isolate, capture_map, [=](int ix) { return elements->get(ix); });
1244           elements->set(cursor++, *groups);
1245         }
1246 
1247         DCHECK_EQ(cursor, argc);
1248         builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1249       } else {
1250         builder.Add(*match);
1251       }
1252     }
1253   }
1254 
1255   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1256 
1257   if (match_start >= 0) {
1258     // Finished matching, with at least one match.
1259     if (match_end < subject_length) {
1260       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1261                                                 subject_length);
1262     }
1263 
1264     RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
1265                                  capture_count,
1266                                  global_cache.LastSuccessfulMatch());
1267 
1268     if (subject_length > kMinLengthToCache) {
1269       // Store the last successful match into the array for caching.
1270       // TODO(yangguo): do not expose last match to JS and simplify caching.
1271       int capture_registers = (capture_count + 1) * 2;
1272       Handle<FixedArray> last_match_cache =
1273           isolate->factory()->NewFixedArray(capture_registers);
1274       int32_t* last_match = global_cache.LastSuccessfulMatch();
1275       for (int i = 0; i < capture_registers; i++) {
1276         last_match_cache->set(i, Smi::FromInt(last_match[i]));
1277       }
1278       Handle<FixedArray> result_fixed_array =
1279           FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length());
1280       // Cache the result and copy the FixedArray into a COW array.
1281       Handle<FixedArray> copied_fixed_array =
1282           isolate->factory()->CopyFixedArrayWithMap(
1283               result_fixed_array, isolate->factory()->fixed_array_map());
1284       RegExpResultsCache::Enter(
1285           isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1286           last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1287     }
1288     return *builder.ToJSArray(result_array);
1289   } else {
1290     return ReadOnlyRoots(isolate).null_value();  // No matches at all.
1291   }
1292 }
1293 
1294 // Legacy implementation of RegExp.prototype[Symbol.replace] which
1295 // doesn't properly call the underlying exec method.
RegExpReplace(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> string,Handle<Object> replace_obj)1296 V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace(
1297     Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string,
1298     Handle<Object> replace_obj) {
1299   // Functional fast-paths are dispatched directly by replace builtin.
1300   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1301   DCHECK(!replace_obj->IsCallable());
1302 
1303   Factory* factory = isolate->factory();
1304 
1305   const int flags = regexp->GetFlags();
1306   const bool global = (flags & JSRegExp::kGlobal) != 0;
1307   const bool sticky = (flags & JSRegExp::kSticky) != 0;
1308 
1309   Handle<String> replace;
1310   ASSIGN_RETURN_ON_EXCEPTION(isolate, replace,
1311                              Object::ToString(isolate, replace_obj), String);
1312   replace = String::Flatten(isolate, replace);
1313 
1314   Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1315 
1316   if (!global) {
1317     // Non-global regexp search, string replace.
1318 
1319     uint32_t last_index = 0;
1320     if (sticky) {
1321       Handle<Object> last_index_obj(regexp->last_index(), isolate);
1322       ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1323                                  Object::ToLength(isolate, last_index_obj),
1324                                  String);
1325       last_index = PositiveNumberToUint32(*last_index_obj);
1326     }
1327 
1328     Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1329                                      isolate);
1330 
1331     // A lastIndex exceeding the string length always always returns null
1332     // (signalling failure) in RegExpBuiltinExec, thus we can skip the call.
1333     if (last_index <= static_cast<uint32_t>(string->length())) {
1334       ASSIGN_RETURN_ON_EXCEPTION(isolate, match_indices_obj,
1335                                  RegExpImpl::Exec(isolate, regexp, string,
1336                                                   last_index, last_match_info),
1337                                  String);
1338     }
1339 
1340     if (match_indices_obj->IsNull(isolate)) {
1341       if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1342       return string;
1343     }
1344 
1345     auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1346 
1347     const int start_index = match_indices->Capture(0);
1348     const int end_index = match_indices->Capture(1);
1349 
1350     if (sticky)
1351       regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER);
1352 
1353     IncrementalStringBuilder builder(isolate);
1354     builder.AppendString(factory->NewSubString(string, 0, start_index));
1355 
1356     if (replace->length() > 0) {
1357       MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1358       Handle<String> replacement;
1359       ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1360                                  String::GetSubstitution(isolate, &m, replace),
1361                                  String);
1362       builder.AppendString(replacement);
1363     }
1364 
1365     builder.AppendString(
1366         factory->NewSubString(string, end_index, string->length()));
1367     return builder.Finish();
1368   } else {
1369     // Global regexp search, string replace.
1370     DCHECK(global);
1371     RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1372                         String);
1373 
1374     if (replace->length() == 0) {
1375       if (string->HasOnlyOneByteChars()) {
1376         Object* result =
1377             StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1378                 isolate, string, regexp, last_match_info);
1379         return handle(String::cast(result), isolate);
1380       } else {
1381         Object* result =
1382             StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1383                 isolate, string, regexp, last_match_info);
1384         return handle(String::cast(result), isolate);
1385       }
1386     }
1387 
1388     Object* result = StringReplaceGlobalRegExpWithString(
1389         isolate, string, regexp, replace, last_match_info);
1390     if (result->IsString()) {
1391       return handle(String::cast(result), isolate);
1392     } else {
1393       return MaybeHandle<String>();
1394     }
1395   }
1396 
1397   UNREACHABLE();
1398 }
1399 
1400 }  // namespace
1401 
1402 // This is only called for StringReplaceGlobalRegExpWithFunction.
RUNTIME_FUNCTION(Runtime_RegExpExecMultiple)1403 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1404   HandleScope handles(isolate);
1405   DCHECK_EQ(4, args.length());
1406 
1407   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1408   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1409   CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2);
1410   CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1411   CHECK(result_array->HasObjectElements());
1412 
1413   subject = String::Flatten(isolate, subject);
1414   CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
1415 
1416   if (regexp->CaptureCount() == 0) {
1417     return SearchRegExpMultiple<false>(isolate, subject, regexp,
1418                                        last_match_info, result_array);
1419   } else {
1420     return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
1421                                       result_array);
1422   }
1423 }
1424 
RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction)1425 RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1426   HandleScope scope(isolate);
1427   DCHECK_EQ(3, args.length());
1428   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
1429   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
1430   CONVERT_ARG_HANDLE_CHECKED(JSReceiver, replace_obj, 2);
1431 
1432   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1433   DCHECK(replace_obj->map()->is_callable());
1434 
1435   Factory* factory = isolate->factory();
1436   Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1437 
1438   const int flags = regexp->GetFlags();
1439   DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1440 
1441   // TODO(jgruber): This should be an easy port to CSA with massive payback.
1442 
1443   const bool sticky = (flags & JSRegExp::kSticky) != 0;
1444   uint32_t last_index = 0;
1445   if (sticky) {
1446     Handle<Object> last_index_obj(regexp->last_index(), isolate);
1447     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1448         isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1449     last_index = PositiveNumberToUint32(*last_index_obj);
1450 
1451     if (last_index > static_cast<uint32_t>(subject->length())) last_index = 0;
1452   }
1453 
1454   Handle<Object> match_indices_obj;
1455   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1456       isolate, match_indices_obj,
1457       RegExpImpl::Exec(isolate, regexp, subject, last_index, last_match_info));
1458 
1459   if (match_indices_obj->IsNull(isolate)) {
1460     if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1461     return *subject;
1462   }
1463 
1464   Handle<RegExpMatchInfo> match_indices =
1465       Handle<RegExpMatchInfo>::cast(match_indices_obj);
1466 
1467   const int index = match_indices->Capture(0);
1468   const int end_of_match = match_indices->Capture(1);
1469 
1470   if (sticky)
1471     regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER);
1472 
1473   IncrementalStringBuilder builder(isolate);
1474   builder.AppendString(factory->NewSubString(subject, 0, index));
1475 
1476   // Compute the parameter list consisting of the match, captures, index,
1477   // and subject for the replace function invocation. If the RegExp contains
1478   // named captures, they are also passed as the last argument.
1479 
1480   // The number of captures plus one for the match.
1481   const int m = match_indices->NumberOfCaptureRegisters() / 2;
1482 
1483   bool has_named_captures = false;
1484   Handle<FixedArray> capture_map;
1485   if (m > 1) {
1486     // The existence of capture groups implies IRREGEXP kind.
1487     DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
1488 
1489     Object* maybe_capture_map = regexp->CaptureNameMap();
1490     if (maybe_capture_map->IsFixedArray()) {
1491       has_named_captures = true;
1492       capture_map = handle(FixedArray::cast(maybe_capture_map), isolate);
1493     }
1494   }
1495 
1496   const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures);
1497   if (argc == static_cast<uint32_t>(-1)) {
1498     THROW_NEW_ERROR_RETURN_FAILURE(
1499         isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1500   }
1501   ScopedVector<Handle<Object>> argv(argc);
1502 
1503   int cursor = 0;
1504   for (int j = 0; j < m; j++) {
1505     bool ok;
1506     Handle<String> capture =
1507         RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1508     if (ok) {
1509       argv[cursor++] = capture;
1510     } else {
1511       argv[cursor++] = factory->undefined_value();
1512     }
1513   }
1514 
1515   argv[cursor++] = handle(Smi::FromInt(index), isolate);
1516   argv[cursor++] = subject;
1517 
1518   if (has_named_captures) {
1519     argv[cursor++] = ConstructNamedCaptureGroupsObject(
1520         isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1521   }
1522 
1523   DCHECK_EQ(cursor, argc);
1524 
1525   Handle<Object> replacement_obj;
1526   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1527       isolate, replacement_obj,
1528       Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1529                       argv.start()));
1530 
1531   Handle<String> replacement;
1532   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1533       isolate, replacement, Object::ToString(isolate, replacement_obj));
1534 
1535   builder.AppendString(replacement);
1536   builder.AppendString(
1537       factory->NewSubString(subject, end_of_match, subject->length()));
1538 
1539   RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1540 }
1541 
1542 namespace {
1543 
ToUint32(Isolate * isolate,Handle<Object> object,uint32_t * out)1544 V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1545                                                    Handle<Object> object,
1546                                                    uint32_t* out) {
1547   if (object->IsUndefined(isolate)) {
1548     *out = kMaxUInt32;
1549     return object;
1550   }
1551 
1552   Handle<Object> number;
1553   ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object),
1554                              Object);
1555   *out = NumberToUint32(*number);
1556   return object;
1557 }
1558 
NewJSArrayWithElements(Isolate * isolate,Handle<FixedArray> elems,int num_elems)1559 Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1560                                        Handle<FixedArray> elems,
1561                                        int num_elems) {
1562   return isolate->factory()->NewJSArrayWithElements(
1563       FixedArray::ShrinkOrEmpty(isolate, elems, num_elems));
1564 }
1565 
1566 }  // namespace
1567 
1568 // Slow path for:
1569 // ES#sec-regexp.prototype-@@replace
1570 // RegExp.prototype [ @@split ] ( string, limit )
RUNTIME_FUNCTION(Runtime_RegExpSplit)1571 RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1572   HandleScope scope(isolate);
1573   DCHECK_EQ(3, args.length());
1574 
1575   DCHECK(args[1]->IsString());
1576 
1577   CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1578   CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1579   CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);
1580 
1581   Factory* factory = isolate->factory();
1582 
1583   Handle<JSFunction> regexp_fun = isolate->regexp_function();
1584   Handle<Object> ctor;
1585   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1586       isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1587 
1588   Handle<Object> flags_obj;
1589   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1590       isolate, flags_obj,
1591       JSObject::GetProperty(isolate, recv, factory->flags_string()));
1592 
1593   Handle<String> flags;
1594   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1595                                      Object::ToString(isolate, flags_obj));
1596 
1597   Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1598   const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1599 
1600   Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1601   const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1602 
1603   Handle<String> new_flags = flags;
1604   if (!sticky) {
1605     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1606                                        factory->NewConsString(flags, y_str));
1607   }
1608 
1609   Handle<JSReceiver> splitter;
1610   {
1611     const int argc = 2;
1612 
1613     ScopedVector<Handle<Object>> argv(argc);
1614     argv[0] = recv;
1615     argv[1] = new_flags;
1616 
1617     Handle<Object> splitter_obj;
1618     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1619         isolate, splitter_obj,
1620         Execution::New(isolate, ctor, argc, argv.start()));
1621 
1622     splitter = Handle<JSReceiver>::cast(splitter_obj);
1623   }
1624 
1625   uint32_t limit;
1626   RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1627 
1628   const uint32_t length = string->length();
1629 
1630   if (limit == 0) return *factory->NewJSArray(0);
1631 
1632   if (length == 0) {
1633     Handle<Object> result;
1634     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1635         isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1636                                                  factory->undefined_value()));
1637 
1638     if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1639 
1640     Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
1641     elems->set(0, *string);
1642     return *factory->NewJSArrayWithElements(elems);
1643   }
1644 
1645   static const int kInitialArraySize = 8;
1646   Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1647   uint32_t num_elems = 0;
1648 
1649   uint32_t string_index = 0;
1650   uint32_t prev_string_index = 0;
1651   while (string_index < length) {
1652     RETURN_FAILURE_ON_EXCEPTION(
1653         isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1654 
1655     Handle<Object> result;
1656     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1657         isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1658                                                  factory->undefined_value()));
1659 
1660     if (result->IsNull(isolate)) {
1661       string_index = static_cast<uint32_t>(
1662           RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1663       continue;
1664     }
1665 
1666     Handle<Object> last_index_obj;
1667     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1668         isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1669 
1670     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1671         isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1672 
1673     const uint32_t end =
1674         std::min(PositiveNumberToUint32(*last_index_obj), length);
1675     if (end == prev_string_index) {
1676       string_index = static_cast<uint32_t>(
1677           RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1678       continue;
1679     }
1680 
1681     {
1682       Handle<String> substr =
1683           factory->NewSubString(string, prev_string_index, string_index);
1684       elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1685       if (num_elems == limit) {
1686         return *NewJSArrayWithElements(isolate, elems, num_elems);
1687       }
1688     }
1689 
1690     prev_string_index = end;
1691 
1692     Handle<Object> num_captures_obj;
1693     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1694         isolate, num_captures_obj,
1695         Object::GetProperty(isolate, result,
1696                             isolate->factory()->length_string()));
1697 
1698     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1699         isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1700     const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj);
1701 
1702     for (uint32_t i = 1; i < num_captures; i++) {
1703       Handle<Object> capture;
1704       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1705           isolate, capture, Object::GetElement(isolate, result, i));
1706       elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture);
1707       if (num_elems == limit) {
1708         return *NewJSArrayWithElements(isolate, elems, num_elems);
1709       }
1710     }
1711 
1712     string_index = prev_string_index;
1713   }
1714 
1715   {
1716     Handle<String> substr =
1717         factory->NewSubString(string, prev_string_index, length);
1718     elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1719   }
1720 
1721   return *NewJSArrayWithElements(isolate, elems, num_elems);
1722 }
1723 
1724 // Slow path for:
1725 // ES#sec-regexp.prototype-@@replace
1726 // RegExp.prototype [ @@replace ] ( string, replaceValue )
RUNTIME_FUNCTION(Runtime_RegExpReplace)1727 RUNTIME_FUNCTION(Runtime_RegExpReplace) {
1728   HandleScope scope(isolate);
1729   DCHECK_EQ(3, args.length());
1730 
1731   CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1732   CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1733   Handle<Object> replace_obj = args.at(2);
1734 
1735   Factory* factory = isolate->factory();
1736 
1737   string = String::Flatten(isolate, string);
1738 
1739   // Fast-path for unmodified JSRegExps.
1740   if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1741     RETURN_RESULT_OR_FAILURE(
1742         isolate, RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string,
1743                                replace_obj));
1744   }
1745 
1746   const uint32_t length = string->length();
1747   const bool functional_replace = replace_obj->IsCallable();
1748 
1749   Handle<String> replace;
1750   if (!functional_replace) {
1751     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1752                                        Object::ToString(isolate, replace_obj));
1753   }
1754 
1755   Handle<Object> global_obj;
1756   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1757       isolate, global_obj,
1758       JSReceiver::GetProperty(isolate, recv, factory->global_string()));
1759   const bool global = global_obj->BooleanValue(isolate);
1760 
1761   bool unicode = false;
1762   if (global) {
1763     Handle<Object> unicode_obj;
1764     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1765         isolate, unicode_obj,
1766         JSReceiver::GetProperty(isolate, recv, factory->unicode_string()));
1767     unicode = unicode_obj->BooleanValue(isolate);
1768 
1769     RETURN_FAILURE_ON_EXCEPTION(isolate,
1770                                 RegExpUtils::SetLastIndex(isolate, recv, 0));
1771   }
1772 
1773   Zone zone(isolate->allocator(), ZONE_NAME);
1774   ZoneVector<Handle<Object>> results(&zone);
1775 
1776   while (true) {
1777     Handle<Object> result;
1778     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1779         isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1780                                                  factory->undefined_value()));
1781 
1782     if (result->IsNull(isolate)) break;
1783 
1784     results.push_back(result);
1785     if (!global) break;
1786 
1787     Handle<Object> match_obj;
1788     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1789                                        Object::GetElement(isolate, result, 0));
1790 
1791     Handle<String> match;
1792     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1793                                        Object::ToString(isolate, match_obj));
1794 
1795     if (match->length() == 0) {
1796       RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1797                                                isolate, recv, string, unicode));
1798     }
1799   }
1800 
1801   // TODO(jgruber): Look into ReplacementStringBuilder instead.
1802   IncrementalStringBuilder builder(isolate);
1803   uint32_t next_source_position = 0;
1804 
1805   for (const auto& result : results) {
1806     HandleScope handle_scope(isolate);
1807     Handle<Object> captures_length_obj;
1808     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1809         isolate, captures_length_obj,
1810         Object::GetProperty(isolate, result, factory->length_string()));
1811 
1812     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1813         isolate, captures_length_obj,
1814         Object::ToLength(isolate, captures_length_obj));
1815     const uint32_t captures_length =
1816         PositiveNumberToUint32(*captures_length_obj);
1817 
1818     Handle<Object> match_obj;
1819     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1820                                        Object::GetElement(isolate, result, 0));
1821 
1822     Handle<String> match;
1823     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1824                                        Object::ToString(isolate, match_obj));
1825 
1826     const int match_length = match->length();
1827 
1828     Handle<Object> position_obj;
1829     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1830         isolate, position_obj,
1831         Object::GetProperty(isolate, result, factory->index_string()));
1832 
1833     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1834         isolate, position_obj, Object::ToInteger(isolate, position_obj));
1835     const uint32_t position =
1836         std::min(PositiveNumberToUint32(*position_obj), length);
1837 
1838     // Do not reserve capacity since captures_length is user-controlled.
1839     ZoneVector<Handle<Object>> captures(&zone);
1840 
1841     for (uint32_t n = 0; n < captures_length; n++) {
1842       Handle<Object> capture;
1843       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1844           isolate, capture, Object::GetElement(isolate, result, n));
1845 
1846       if (!capture->IsUndefined(isolate)) {
1847         ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1848                                            Object::ToString(isolate, capture));
1849       }
1850       captures.push_back(capture);
1851     }
1852 
1853     Handle<Object> groups_obj = isolate->factory()->undefined_value();
1854     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1855         isolate, groups_obj,
1856         Object::GetProperty(isolate, result, factory->groups_string()));
1857 
1858     const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1859 
1860     Handle<String> replacement;
1861     if (functional_replace) {
1862       const uint32_t argc =
1863           GetArgcForReplaceCallable(captures_length, has_named_captures);
1864       if (argc == static_cast<uint32_t>(-1)) {
1865         THROW_NEW_ERROR_RETURN_FAILURE(
1866             isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1867       }
1868 
1869       ScopedVector<Handle<Object>> argv(argc);
1870 
1871       int cursor = 0;
1872       for (uint32_t j = 0; j < captures_length; j++) {
1873         argv[cursor++] = captures[j];
1874       }
1875 
1876       argv[cursor++] = handle(Smi::FromInt(position), isolate);
1877       argv[cursor++] = string;
1878       if (has_named_captures) argv[cursor++] = groups_obj;
1879 
1880       DCHECK_EQ(cursor, argc);
1881 
1882       Handle<Object> replacement_obj;
1883       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1884           isolate, replacement_obj,
1885           Execution::Call(isolate, replace_obj, factory->undefined_value(),
1886                           argc, argv.start()));
1887 
1888       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1889           isolate, replacement, Object::ToString(isolate, replacement_obj));
1890     } else {
1891       DCHECK(!functional_replace);
1892       if (!groups_obj->IsUndefined(isolate)) {
1893         ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1894             isolate, groups_obj, JSReceiver::ToObject(isolate, groups_obj));
1895       }
1896       VectorBackedMatch m(isolate, string, match, position, &captures,
1897                           groups_obj);
1898       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1899           isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1900     }
1901 
1902     if (position >= next_source_position) {
1903       builder.AppendString(
1904           factory->NewSubString(string, next_source_position, position));
1905       builder.AppendString(replacement);
1906 
1907       next_source_position = position + match_length;
1908     }
1909   }
1910 
1911   if (next_source_position < length) {
1912     builder.AppendString(
1913         factory->NewSubString(string, next_source_position, length));
1914   }
1915 
1916   RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1917 }
1918 
RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile)1919 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1920   HandleScope scope(isolate);
1921   DCHECK_EQ(3, args.length());
1922   // TODO(pwong): To follow the spec more closely and simplify calling code,
1923   // this could handle the canonicalization of pattern and flags. See
1924   // https://tc39.github.io/ecma262/#sec-regexpinitialize
1925   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1926   CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
1927   CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
1928 
1929   RETURN_FAILURE_ON_EXCEPTION(isolate,
1930                               JSRegExp::Initialize(regexp, source, flags));
1931 
1932   return *regexp;
1933 }
1934 
RUNTIME_FUNCTION(Runtime_IsRegExp)1935 RUNTIME_FUNCTION(Runtime_IsRegExp) {
1936   SealHandleScope shs(isolate);
1937   DCHECK_EQ(1, args.length());
1938   CONVERT_ARG_CHECKED(Object, obj, 0);
1939   return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1940 }
1941 
1942 }  // namespace internal
1943 }  // namespace v8
1944