1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/runtime/runtime-utils.h"
6 
7 #include "src/arguments.h"
8 #include "src/conversions-inl.h"
9 #include "src/isolate-inl.h"
10 #include "src/messages.h"
11 #include "src/regexp/jsregexp-inl.h"
12 #include "src/regexp/jsregexp.h"
13 #include "src/string-builder.h"
14 #include "src/string-search.h"
15 
16 namespace v8 {
17 namespace internal {
18 
19 class CompiledReplacement {
20  public:
CompiledReplacement(Zone * zone)21   explicit CompiledReplacement(Zone* zone)
22       : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {}
23 
24   // Return whether the replacement is simple.
25   bool Compile(Handle<String> replacement, int capture_count,
26                int subject_length);
27 
28   // Use Apply only if Compile returned false.
29   void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
30              int32_t* match);
31 
32   // Number of distinct parts of the replacement pattern.
parts()33   int parts() { return parts_.length(); }
34 
zone() const35   Zone* zone() const { return zone_; }
36 
37  private:
38   enum PartType {
39     SUBJECT_PREFIX = 1,
40     SUBJECT_SUFFIX,
41     SUBJECT_CAPTURE,
42     REPLACEMENT_SUBSTRING,
43     REPLACEMENT_STRING,
44     NUMBER_OF_PART_TYPES
45   };
46 
47   struct ReplacementPart {
SubjectMatchv8::internal::CompiledReplacement::ReplacementPart48     static inline ReplacementPart SubjectMatch() {
49       return ReplacementPart(SUBJECT_CAPTURE, 0);
50     }
SubjectCapturev8::internal::CompiledReplacement::ReplacementPart51     static inline ReplacementPart SubjectCapture(int capture_index) {
52       return ReplacementPart(SUBJECT_CAPTURE, capture_index);
53     }
SubjectPrefixv8::internal::CompiledReplacement::ReplacementPart54     static inline ReplacementPart SubjectPrefix() {
55       return ReplacementPart(SUBJECT_PREFIX, 0);
56     }
SubjectSuffixv8::internal::CompiledReplacement::ReplacementPart57     static inline ReplacementPart SubjectSuffix(int subject_length) {
58       return ReplacementPart(SUBJECT_SUFFIX, subject_length);
59     }
ReplacementStringv8::internal::CompiledReplacement::ReplacementPart60     static inline ReplacementPart ReplacementString() {
61       return ReplacementPart(REPLACEMENT_STRING, 0);
62     }
ReplacementSubStringv8::internal::CompiledReplacement::ReplacementPart63     static inline ReplacementPart ReplacementSubString(int from, int to) {
64       DCHECK(from >= 0);
65       DCHECK(to > from);
66       return ReplacementPart(-from, to);
67     }
68 
69     // If tag <= 0 then it is the negation of a start index of a substring of
70     // the replacement pattern, otherwise it's a value from PartType.
ReplacementPartv8::internal::CompiledReplacement::ReplacementPart71     ReplacementPart(int tag, int data) : tag(tag), data(data) {
72       // Must be non-positive or a PartType value.
73       DCHECK(tag < NUMBER_OF_PART_TYPES);
74     }
75     // Either a value of PartType or a non-positive number that is
76     // the negation of an index into the replacement string.
77     int tag;
78     // The data value's interpretation depends on the value of tag:
79     // tag == SUBJECT_PREFIX ||
80     // tag == SUBJECT_SUFFIX:  data is unused.
81     // tag == SUBJECT_CAPTURE: data is the number of the capture.
82     // tag == REPLACEMENT_SUBSTRING ||
83     // tag == REPLACEMENT_STRING:    data is index into array of substrings
84     //                               of the replacement string.
85     // tag <= 0: Temporary representation of the substring of the replacement
86     //           string ranging over -tag .. data.
87     //           Is replaced by REPLACEMENT_{SUB,}STRING when we create the
88     //           substring objects.
89     int data;
90   };
91 
92   template <typename Char>
ParseReplacementPattern(ZoneList<ReplacementPart> * parts,Vector<Char> characters,int capture_count,int subject_length,Zone * zone)93   bool ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
94                                Vector<Char> characters, int capture_count,
95                                int subject_length, Zone* zone) {
96     int length = characters.length();
97     int last = 0;
98     for (int i = 0; i < length; i++) {
99       Char c = characters[i];
100       if (c == '$') {
101         int next_index = i + 1;
102         if (next_index == length) {  // No next character!
103           break;
104         }
105         Char c2 = characters[next_index];
106         switch (c2) {
107           case '$':
108             if (i > last) {
109               // There is a substring before. Include the first "$".
110               parts->Add(
111                   ReplacementPart::ReplacementSubString(last, next_index),
112                   zone);
113               last = next_index + 1;  // Continue after the second "$".
114             } else {
115               // Let the next substring start with the second "$".
116               last = next_index;
117             }
118             i = next_index;
119             break;
120           case '`':
121             if (i > last) {
122               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
123             }
124             parts->Add(ReplacementPart::SubjectPrefix(), zone);
125             i = next_index;
126             last = i + 1;
127             break;
128           case '\'':
129             if (i > last) {
130               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
131             }
132             parts->Add(ReplacementPart::SubjectSuffix(subject_length), zone);
133             i = next_index;
134             last = i + 1;
135             break;
136           case '&':
137             if (i > last) {
138               parts->Add(ReplacementPart::ReplacementSubString(last, i), zone);
139             }
140             parts->Add(ReplacementPart::SubjectMatch(), zone);
141             i = next_index;
142             last = i + 1;
143             break;
144           case '0':
145           case '1':
146           case '2':
147           case '3':
148           case '4':
149           case '5':
150           case '6':
151           case '7':
152           case '8':
153           case '9': {
154             int capture_ref = c2 - '0';
155             if (capture_ref > capture_count) {
156               i = next_index;
157               continue;
158             }
159             int second_digit_index = next_index + 1;
160             if (second_digit_index < length) {
161               // Peek ahead to see if we have two digits.
162               Char c3 = characters[second_digit_index];
163               if ('0' <= c3 && c3 <= '9') {  // Double digits.
164                 int double_digit_ref = capture_ref * 10 + c3 - '0';
165                 if (double_digit_ref <= capture_count) {
166                   next_index = second_digit_index;
167                   capture_ref = double_digit_ref;
168                 }
169               }
170             }
171             if (capture_ref > 0) {
172               if (i > last) {
173                 parts->Add(ReplacementPart::ReplacementSubString(last, i),
174                            zone);
175               }
176               DCHECK(capture_ref <= capture_count);
177               parts->Add(ReplacementPart::SubjectCapture(capture_ref), zone);
178               last = next_index + 1;
179             }
180             i = next_index;
181             break;
182           }
183           default:
184             i = next_index;
185             break;
186         }
187       }
188     }
189     if (length > last) {
190       if (last == 0) {
191         // Replacement is simple.  Do not use Apply to do the replacement.
192         return true;
193       } else {
194         parts->Add(ReplacementPart::ReplacementSubString(last, length), zone);
195       }
196     }
197     return false;
198   }
199 
200   ZoneList<ReplacementPart> parts_;
201   ZoneList<Handle<String> > replacement_substrings_;
202   Zone* zone_;
203 };
204 
205 
Compile(Handle<String> replacement,int capture_count,int subject_length)206 bool CompiledReplacement::Compile(Handle<String> replacement, int capture_count,
207                                   int subject_length) {
208   {
209     DisallowHeapAllocation no_gc;
210     String::FlatContent content = replacement->GetFlatContent();
211     DCHECK(content.IsFlat());
212     bool simple = false;
213     if (content.IsOneByte()) {
214       simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
215                                        capture_count, subject_length, zone());
216     } else {
217       DCHECK(content.IsTwoByte());
218       simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
219                                        capture_count, subject_length, zone());
220     }
221     if (simple) return true;
222   }
223 
224   Isolate* isolate = replacement->GetIsolate();
225   // Find substrings of replacement string and create them as String objects.
226   int substring_index = 0;
227   for (int i = 0, n = parts_.length(); i < n; i++) {
228     int tag = parts_[i].tag;
229     if (tag <= 0) {  // A replacement string slice.
230       int from = -tag;
231       int to = parts_[i].data;
232       replacement_substrings_.Add(
233           isolate->factory()->NewSubString(replacement, from, to), zone());
234       parts_[i].tag = REPLACEMENT_SUBSTRING;
235       parts_[i].data = substring_index;
236       substring_index++;
237     } else if (tag == REPLACEMENT_STRING) {
238       replacement_substrings_.Add(replacement, zone());
239       parts_[i].data = substring_index;
240       substring_index++;
241     }
242   }
243   return false;
244 }
245 
246 
Apply(ReplacementStringBuilder * builder,int match_from,int match_to,int32_t * match)247 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
248                                 int match_from, int match_to, int32_t* match) {
249   DCHECK_LT(0, parts_.length());
250   for (int i = 0, n = parts_.length(); i < n; i++) {
251     ReplacementPart part = parts_[i];
252     switch (part.tag) {
253       case SUBJECT_PREFIX:
254         if (match_from > 0) builder->AddSubjectSlice(0, match_from);
255         break;
256       case SUBJECT_SUFFIX: {
257         int subject_length = part.data;
258         if (match_to < subject_length) {
259           builder->AddSubjectSlice(match_to, subject_length);
260         }
261         break;
262       }
263       case SUBJECT_CAPTURE: {
264         int capture = part.data;
265         int from = match[capture * 2];
266         int to = match[capture * 2 + 1];
267         if (from >= 0 && to > from) {
268           builder->AddSubjectSlice(from, to);
269         }
270         break;
271       }
272       case REPLACEMENT_SUBSTRING:
273       case REPLACEMENT_STRING:
274         builder->AddString(replacement_substrings_[part.data]);
275         break;
276       default:
277         UNREACHABLE();
278     }
279   }
280 }
281 
282 
FindOneByteStringIndices(Vector<const uint8_t> subject,uint8_t pattern,ZoneList<int> * indices,unsigned int limit,Zone * zone)283 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
284                               ZoneList<int>* indices, unsigned int limit,
285                               Zone* zone) {
286   DCHECK(limit > 0);
287   // Collect indices of pattern in subject using memchr.
288   // Stop after finding at most limit values.
289   const uint8_t* subject_start = subject.start();
290   const uint8_t* subject_end = subject_start + subject.length();
291   const uint8_t* pos = subject_start;
292   while (limit > 0) {
293     pos = reinterpret_cast<const uint8_t*>(
294         memchr(pos, pattern, subject_end - pos));
295     if (pos == NULL) return;
296     indices->Add(static_cast<int>(pos - subject_start), zone);
297     pos++;
298     limit--;
299   }
300 }
301 
302 
FindTwoByteStringIndices(const Vector<const uc16> subject,uc16 pattern,ZoneList<int> * indices,unsigned int limit,Zone * zone)303 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
304                               ZoneList<int>* indices, unsigned int limit,
305                               Zone* zone) {
306   DCHECK(limit > 0);
307   const uc16* subject_start = subject.start();
308   const uc16* subject_end = subject_start + subject.length();
309   for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
310     if (*pos == pattern) {
311       indices->Add(static_cast<int>(pos - subject_start), zone);
312       limit--;
313     }
314   }
315 }
316 
317 
318 template <typename SubjectChar, typename PatternChar>
FindStringIndices(Isolate * isolate,Vector<const SubjectChar> subject,Vector<const PatternChar> pattern,ZoneList<int> * indices,unsigned int limit,Zone * zone)319 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
320                        Vector<const PatternChar> pattern,
321                        ZoneList<int>* indices, unsigned int limit, Zone* zone) {
322   DCHECK(limit > 0);
323   // Collect indices of pattern in subject.
324   // Stop after finding at most limit values.
325   int pattern_length = pattern.length();
326   int index = 0;
327   StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
328   while (limit > 0) {
329     index = search.Search(subject, index);
330     if (index < 0) return;
331     indices->Add(index, zone);
332     index += pattern_length;
333     limit--;
334   }
335 }
336 
337 
FindStringIndicesDispatch(Isolate * isolate,String * subject,String * pattern,ZoneList<int> * indices,unsigned int limit,Zone * zone)338 void FindStringIndicesDispatch(Isolate* isolate, String* subject,
339                                String* pattern, ZoneList<int>* indices,
340                                unsigned int limit, Zone* zone) {
341   {
342     DisallowHeapAllocation no_gc;
343     String::FlatContent subject_content = subject->GetFlatContent();
344     String::FlatContent pattern_content = pattern->GetFlatContent();
345     DCHECK(subject_content.IsFlat());
346     DCHECK(pattern_content.IsFlat());
347     if (subject_content.IsOneByte()) {
348       Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
349       if (pattern_content.IsOneByte()) {
350         Vector<const uint8_t> pattern_vector =
351             pattern_content.ToOneByteVector();
352         if (pattern_vector.length() == 1) {
353           FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
354                                    limit, zone);
355         } else {
356           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
357                             limit, zone);
358         }
359       } else {
360         FindStringIndices(isolate, subject_vector,
361                           pattern_content.ToUC16Vector(), indices, limit, zone);
362       }
363     } else {
364       Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
365       if (pattern_content.IsOneByte()) {
366         Vector<const uint8_t> pattern_vector =
367             pattern_content.ToOneByteVector();
368         if (pattern_vector.length() == 1) {
369           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
370                                    limit, zone);
371         } else {
372           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
373                             limit, zone);
374         }
375       } else {
376         Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
377         if (pattern_vector.length() == 1) {
378           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
379                                    limit, zone);
380         } else {
381           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
382                             limit, zone);
383         }
384       }
385     }
386   }
387 }
388 
389 
390 template <typename ResultSeqString>
StringReplaceGlobalAtomRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> pattern_regexp,Handle<String> replacement,Handle<JSArray> last_match_info)391 MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
392     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
393     Handle<String> replacement, Handle<JSArray> last_match_info) {
394   DCHECK(subject->IsFlat());
395   DCHECK(replacement->IsFlat());
396 
397   ZoneScope zone_scope(isolate->runtime_zone());
398   ZoneList<int> indices(8, zone_scope.zone());
399   DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
400   String* pattern =
401       String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
402   int subject_len = subject->length();
403   int pattern_len = pattern->length();
404   int replacement_len = replacement->length();
405 
406   FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff,
407                             zone_scope.zone());
408 
409   int matches = indices.length();
410   if (matches == 0) return *subject;
411 
412   // Detect integer overflow.
413   int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
414                            static_cast<int64_t>(pattern_len)) *
415                               static_cast<int64_t>(matches) +
416                           static_cast<int64_t>(subject_len);
417   int result_len;
418   if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
419     STATIC_ASSERT(String::kMaxLength < kMaxInt);
420     result_len = kMaxInt;  // Provoke exception.
421   } else {
422     result_len = static_cast<int>(result_len_64);
423   }
424 
425   int subject_pos = 0;
426   int result_pos = 0;
427 
428   MaybeHandle<SeqString> maybe_res;
429   if (ResultSeqString::kHasOneByteEncoding) {
430     maybe_res = isolate->factory()->NewRawOneByteString(result_len);
431   } else {
432     maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
433   }
434   Handle<SeqString> untyped_res;
435   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
436   Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
437 
438   for (int i = 0; i < matches; i++) {
439     // Copy non-matched subject content.
440     if (subject_pos < indices.at(i)) {
441       String::WriteToFlat(*subject, result->GetChars() + result_pos,
442                           subject_pos, indices.at(i));
443       result_pos += indices.at(i) - subject_pos;
444     }
445 
446     // Replace match.
447     if (replacement_len > 0) {
448       String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
449                           replacement_len);
450       result_pos += replacement_len;
451     }
452 
453     subject_pos = indices.at(i) + pattern_len;
454   }
455   // Add remaining subject content at the end.
456   if (subject_pos < subject_len) {
457     String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
458                         subject_len);
459   }
460 
461   int32_t match_indices[] = {indices.at(matches - 1),
462                              indices.at(matches - 1) + pattern_len};
463   RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices);
464 
465   return *result;
466 }
467 
468 
StringReplaceGlobalRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<String> replacement,Handle<JSArray> last_match_info)469 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
470     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
471     Handle<String> replacement, Handle<JSArray> last_match_info) {
472   DCHECK(subject->IsFlat());
473   DCHECK(replacement->IsFlat());
474 
475   int capture_count = regexp->CaptureCount();
476   int subject_length = subject->length();
477 
478   // CompiledReplacement uses zone allocation.
479   ZoneScope zone_scope(isolate->runtime_zone());
480   CompiledReplacement compiled_replacement(zone_scope.zone());
481   bool simple_replace =
482       compiled_replacement.Compile(replacement, capture_count, subject_length);
483 
484   // Shortcut for simple non-regexp global replacements
485   if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
486     if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
487       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
488           isolate, subject, regexp, replacement, last_match_info);
489     } else {
490       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
491           isolate, subject, regexp, replacement, last_match_info);
492     }
493   }
494 
495   RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
496   if (global_cache.HasException()) return isolate->heap()->exception();
497 
498   int32_t* current_match = global_cache.FetchNext();
499   if (current_match == NULL) {
500     if (global_cache.HasException()) return isolate->heap()->exception();
501     return *subject;
502   }
503 
504   // Guessing the number of parts that the final result string is built
505   // from. Global regexps can match any number of times, so we guess
506   // conservatively.
507   int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
508   ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
509 
510   // Number of parts added by compiled replacement plus preceeding
511   // string and possibly suffix after last match.  It is possible for
512   // all components to use two elements when encoded as two smis.
513   const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
514 
515   int prev = 0;
516 
517   do {
518     builder.EnsureCapacity(parts_added_per_loop);
519 
520     int start = current_match[0];
521     int end = current_match[1];
522 
523     if (prev < start) {
524       builder.AddSubjectSlice(prev, start);
525     }
526 
527     if (simple_replace) {
528       builder.AddString(replacement);
529     } else {
530       compiled_replacement.Apply(&builder, start, end, current_match);
531     }
532     prev = end;
533 
534     current_match = global_cache.FetchNext();
535   } while (current_match != NULL);
536 
537   if (global_cache.HasException()) return isolate->heap()->exception();
538 
539   if (prev < subject_length) {
540     builder.EnsureCapacity(2);
541     builder.AddSubjectSlice(prev, subject_length);
542   }
543 
544   RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
545                                global_cache.LastSuccessfulMatch());
546 
547   Handle<String> result;
548   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, result, builder.ToString());
549   return *result;
550 }
551 
552 
553 template <typename ResultSeqString>
StringReplaceGlobalRegExpWithEmptyString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<JSArray> last_match_info)554 MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
555     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
556     Handle<JSArray> last_match_info) {
557   DCHECK(subject->IsFlat());
558 
559   // Shortcut for simple non-regexp global replacements
560   if (regexp->TypeTag() == JSRegExp::ATOM) {
561     Handle<String> empty_string = isolate->factory()->empty_string();
562     if (subject->IsOneByteRepresentation()) {
563       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
564           isolate, subject, regexp, empty_string, last_match_info);
565     } else {
566       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
567           isolate, subject, regexp, empty_string, last_match_info);
568     }
569   }
570 
571   RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
572   if (global_cache.HasException()) return isolate->heap()->exception();
573 
574   int32_t* current_match = global_cache.FetchNext();
575   if (current_match == NULL) {
576     if (global_cache.HasException()) return isolate->heap()->exception();
577     return *subject;
578   }
579 
580   int start = current_match[0];
581   int end = current_match[1];
582   int capture_count = regexp->CaptureCount();
583   int subject_length = subject->length();
584 
585   int new_length = subject_length - (end - start);
586   if (new_length == 0) return isolate->heap()->empty_string();
587 
588   Handle<ResultSeqString> answer;
589   if (ResultSeqString::kHasOneByteEncoding) {
590     answer = Handle<ResultSeqString>::cast(
591         isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
592   } else {
593     answer = Handle<ResultSeqString>::cast(
594         isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
595   }
596 
597   int prev = 0;
598   int position = 0;
599 
600   do {
601     start = current_match[0];
602     end = current_match[1];
603     if (prev < start) {
604       // Add substring subject[prev;start] to answer string.
605       String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
606       position += start - prev;
607     }
608     prev = end;
609 
610     current_match = global_cache.FetchNext();
611   } while (current_match != NULL);
612 
613   if (global_cache.HasException()) return isolate->heap()->exception();
614 
615   RegExpImpl::SetLastMatchInfo(last_match_info, subject, capture_count,
616                                global_cache.LastSuccessfulMatch());
617 
618   if (prev < subject_length) {
619     // Add substring subject[prev;length] to answer string.
620     String::WriteToFlat(*subject, answer->GetChars() + position, prev,
621                         subject_length);
622     position += subject_length - prev;
623   }
624 
625   if (position == 0) return isolate->heap()->empty_string();
626 
627   // Shorten string and fill
628   int string_size = ResultSeqString::SizeFor(position);
629   int allocated_string_size = ResultSeqString::SizeFor(new_length);
630   int delta = allocated_string_size - string_size;
631 
632   answer->set_length(position);
633   if (delta == 0) return *answer;
634 
635   Address end_of_string = answer->address() + string_size;
636   Heap* heap = isolate->heap();
637 
638   // The trimming is performed on a newly allocated object, which is on a
639   // fresly allocated page or on an already swept page. Hence, the sweeper
640   // thread can not get confused with the filler creation. No synchronization
641   // needed.
642   // TODO(hpayer): We should shrink the large object page if the size
643   // of the object changed significantly.
644   if (!heap->lo_space()->Contains(*answer)) {
645     heap->CreateFillerObjectAt(end_of_string, delta);
646   }
647   heap->AdjustLiveBytes(*answer, -delta, Heap::CONCURRENT_TO_SWEEPER);
648   return *answer;
649 }
650 
651 
RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString)652 RUNTIME_FUNCTION(Runtime_StringReplaceGlobalRegExpWithString) {
653   HandleScope scope(isolate);
654   DCHECK(args.length() == 4);
655 
656   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
657   CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
658   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
659   CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
660 
661   RUNTIME_ASSERT(regexp->GetFlags() & JSRegExp::kGlobal);
662   RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
663 
664   subject = String::Flatten(subject);
665 
666   if (replacement->length() == 0) {
667     if (subject->HasOnlyOneByteChars()) {
668       return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
669           isolate, subject, regexp, last_match_info);
670     } else {
671       return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
672           isolate, subject, regexp, last_match_info);
673     }
674   }
675 
676   replacement = String::Flatten(replacement);
677 
678   return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
679                                              replacement, last_match_info);
680 }
681 
682 
RUNTIME_FUNCTION(Runtime_StringSplit)683 RUNTIME_FUNCTION(Runtime_StringSplit) {
684   HandleScope handle_scope(isolate);
685   DCHECK(args.length() == 3);
686   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
687   CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
688   CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
689   RUNTIME_ASSERT(limit > 0);
690 
691   int subject_length = subject->length();
692   int pattern_length = pattern->length();
693   RUNTIME_ASSERT(pattern_length > 0);
694 
695   if (limit == 0xffffffffu) {
696     FixedArray* last_match_cache_unused;
697     Handle<Object> cached_answer(
698         RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
699                                    &last_match_cache_unused,
700                                    RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
701         isolate);
702     if (*cached_answer != Smi::FromInt(0)) {
703       // The cache FixedArray is a COW-array and can therefore be reused.
704       Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
705           Handle<FixedArray>::cast(cached_answer));
706       return *result;
707     }
708   }
709 
710   // The limit can be very large (0xffffffffu), but since the pattern
711   // isn't empty, we can never create more parts than ~half the length
712   // of the subject.
713 
714   subject = String::Flatten(subject);
715   pattern = String::Flatten(pattern);
716 
717   static const int kMaxInitialListCapacity = 16;
718 
719   ZoneScope zone_scope(isolate->runtime_zone());
720 
721   // Find (up to limit) indices of separator and end-of-string in subject
722   int initial_capacity = Min<uint32_t>(kMaxInitialListCapacity, limit);
723   ZoneList<int> indices(initial_capacity, zone_scope.zone());
724 
725   FindStringIndicesDispatch(isolate, *subject, *pattern, &indices, limit,
726                             zone_scope.zone());
727 
728   if (static_cast<uint32_t>(indices.length()) < limit) {
729     indices.Add(subject_length, zone_scope.zone());
730   }
731 
732   // The list indices now contains the end of each part to create.
733 
734   // Create JSArray of substrings separated by separator.
735   int part_count = indices.length();
736 
737   Handle<JSArray> result = isolate->factory()->NewJSArray(part_count);
738   JSObject::EnsureCanContainHeapObjectElements(result);
739   result->set_length(Smi::FromInt(part_count));
740 
741   DCHECK(result->HasFastObjectElements());
742 
743   Handle<FixedArray> elements(FixedArray::cast(result->elements()));
744 
745   if (part_count == 1 && indices.at(0) == subject_length) {
746     elements->set(0, *subject);
747   } else {
748     int part_start = 0;
749     for (int i = 0; i < part_count; i++) {
750       HandleScope local_loop_handle(isolate);
751       int part_end = indices.at(i);
752       Handle<String> substring =
753           isolate->factory()->NewProperSubString(subject, part_start, part_end);
754       elements->set(i, *substring);
755       part_start = part_end + pattern_length;
756     }
757   }
758 
759   if (limit == 0xffffffffu) {
760     if (result->HasFastObjectElements()) {
761       RegExpResultsCache::Enter(isolate, subject, pattern, elements,
762                                 isolate->factory()->empty_fixed_array(),
763                                 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
764     }
765   }
766 
767   return *result;
768 }
769 
770 
RUNTIME_FUNCTION(Runtime_RegExpExec)771 RUNTIME_FUNCTION(Runtime_RegExpExec) {
772   HandleScope scope(isolate);
773   DCHECK(args.length() == 4);
774   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
775   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
776   CONVERT_INT32_ARG_CHECKED(index, 2);
777   CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3);
778   // Due to the way the JS calls are constructed this must be less than the
779   // length of a string, i.e. it is always a Smi.  We check anyway for security.
780   RUNTIME_ASSERT(index >= 0);
781   RUNTIME_ASSERT(index <= subject->length());
782   isolate->counters()->regexp_entry_runtime()->Increment();
783   Handle<Object> result;
784   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
785       isolate, result,
786       RegExpImpl::Exec(regexp, subject, index, last_match_info));
787   return *result;
788 }
789 
790 
RUNTIME_FUNCTION(Runtime_RegExpFlags)791 RUNTIME_FUNCTION(Runtime_RegExpFlags) {
792   SealHandleScope shs(isolate);
793   DCHECK(args.length() == 1);
794   CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
795   return regexp->flags();
796 }
797 
798 
RUNTIME_FUNCTION(Runtime_RegExpSource)799 RUNTIME_FUNCTION(Runtime_RegExpSource) {
800   SealHandleScope shs(isolate);
801   DCHECK(args.length() == 1);
802   CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
803   return regexp->source();
804 }
805 
806 
RUNTIME_FUNCTION(Runtime_RegExpConstructResult)807 RUNTIME_FUNCTION(Runtime_RegExpConstructResult) {
808   HandleScope handle_scope(isolate);
809   DCHECK(args.length() == 3);
810   CONVERT_SMI_ARG_CHECKED(size, 0);
811   RUNTIME_ASSERT(size >= 0 && size <= FixedArray::kMaxLength);
812   CONVERT_ARG_HANDLE_CHECKED(Object, index, 1);
813   CONVERT_ARG_HANDLE_CHECKED(Object, input, 2);
814   Handle<FixedArray> elements = isolate->factory()->NewFixedArray(size);
815   Handle<Map> regexp_map(isolate->native_context()->regexp_result_map());
816   Handle<JSObject> object =
817       isolate->factory()->NewJSObjectFromMap(regexp_map, NOT_TENURED);
818   Handle<JSArray> array = Handle<JSArray>::cast(object);
819   array->set_elements(*elements);
820   array->set_length(Smi::FromInt(size));
821   // Write in-object properties after the length of the array.
822   array->InObjectPropertyAtPut(JSRegExpResult::kIndexIndex, *index);
823   array->InObjectPropertyAtPut(JSRegExpResult::kInputIndex, *input);
824   return *array;
825 }
826 
827 
RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile)828 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
829   HandleScope scope(isolate);
830   DCHECK(args.length() == 3);
831   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
832   CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
833   CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
834 
835   RETURN_FAILURE_ON_EXCEPTION(isolate,
836                               JSRegExp::Initialize(regexp, source, flags));
837 
838   return *regexp;
839 }
840 
841 
842 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
843 // separate last match info.  See comment on that function.
844 template <bool has_capture>
SearchRegExpMultiple(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<JSArray> last_match_array,Handle<JSArray> result_array)845 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
846                                     Handle<JSRegExp> regexp,
847                                     Handle<JSArray> last_match_array,
848                                     Handle<JSArray> result_array) {
849   DCHECK(subject->IsFlat());
850   DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
851 
852   int capture_count = regexp->CaptureCount();
853   int subject_length = subject->length();
854 
855   static const int kMinLengthToCache = 0x1000;
856 
857   if (subject_length > kMinLengthToCache) {
858     FixedArray* last_match_cache;
859     Object* cached_answer = RegExpResultsCache::Lookup(
860         isolate->heap(), *subject, regexp->data(), &last_match_cache,
861         RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
862     if (cached_answer->IsFixedArray()) {
863       int capture_registers = (capture_count + 1) * 2;
864       int32_t* last_match = NewArray<int32_t>(capture_registers);
865       for (int i = 0; i < capture_registers; i++) {
866         last_match[i] = Smi::cast(last_match_cache->get(i))->value();
867       }
868       Handle<FixedArray> cached_fixed_array =
869           Handle<FixedArray>(FixedArray::cast(cached_answer));
870       // The cache FixedArray is a COW-array and can therefore be reused.
871       JSArray::SetContent(result_array, cached_fixed_array);
872       RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
873                                    last_match);
874       DeleteArray(last_match);
875       return *result_array;
876     }
877   }
878 
879   RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
880   if (global_cache.HasException()) return isolate->heap()->exception();
881 
882   // Ensured in Runtime_RegExpExecMultiple.
883   DCHECK(result_array->HasFastObjectElements());
884   Handle<FixedArray> result_elements(
885       FixedArray::cast(result_array->elements()));
886   if (result_elements->length() < 16) {
887     result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
888   }
889 
890   FixedArrayBuilder builder(result_elements);
891 
892   // Position to search from.
893   int match_start = -1;
894   int match_end = 0;
895   bool first = true;
896 
897   // Two smis before and after the match, for very long strings.
898   static const int kMaxBuilderEntriesPerRegExpMatch = 5;
899 
900   while (true) {
901     int32_t* current_match = global_cache.FetchNext();
902     if (current_match == NULL) break;
903     match_start = current_match[0];
904     builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
905     if (match_end < match_start) {
906       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
907                                                 match_start);
908     }
909     match_end = current_match[1];
910     {
911       // Avoid accumulating new handles inside loop.
912       HandleScope temp_scope(isolate);
913       Handle<String> match;
914       if (!first) {
915         match = isolate->factory()->NewProperSubString(subject, match_start,
916                                                        match_end);
917       } else {
918         match =
919             isolate->factory()->NewSubString(subject, match_start, match_end);
920         first = false;
921       }
922 
923       if (has_capture) {
924         // Arguments array to replace function is match, captures, index and
925         // subject, i.e., 3 + capture count in total.
926         Handle<FixedArray> elements =
927             isolate->factory()->NewFixedArray(3 + capture_count);
928 
929         elements->set(0, *match);
930         for (int i = 1; i <= capture_count; i++) {
931           int start = current_match[i * 2];
932           if (start >= 0) {
933             int end = current_match[i * 2 + 1];
934             DCHECK(start <= end);
935             Handle<String> substring =
936                 isolate->factory()->NewSubString(subject, start, end);
937             elements->set(i, *substring);
938           } else {
939             DCHECK(current_match[i * 2 + 1] < 0);
940             elements->set(i, isolate->heap()->undefined_value());
941           }
942         }
943         elements->set(capture_count + 1, Smi::FromInt(match_start));
944         elements->set(capture_count + 2, *subject);
945         builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
946       } else {
947         builder.Add(*match);
948       }
949     }
950   }
951 
952   if (global_cache.HasException()) return isolate->heap()->exception();
953 
954   if (match_start >= 0) {
955     // Finished matching, with at least one match.
956     if (match_end < subject_length) {
957       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
958                                                 subject_length);
959     }
960 
961     RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
962                                  global_cache.LastSuccessfulMatch());
963 
964     if (subject_length > kMinLengthToCache) {
965       // Store the last successful match into the array for caching.
966       // TODO(yangguo): do not expose last match to JS and simplify caching.
967       int capture_registers = (capture_count + 1) * 2;
968       Handle<FixedArray> last_match_cache =
969           isolate->factory()->NewFixedArray(capture_registers);
970       int32_t* last_match = global_cache.LastSuccessfulMatch();
971       for (int i = 0; i < capture_registers; i++) {
972         last_match_cache->set(i, Smi::FromInt(last_match[i]));
973       }
974       Handle<FixedArray> result_fixed_array = builder.array();
975       result_fixed_array->Shrink(builder.length());
976       // Cache the result and turn the FixedArray into a COW array.
977       RegExpResultsCache::Enter(
978           isolate, subject, handle(regexp->data(), isolate), result_fixed_array,
979           last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
980     }
981     return *builder.ToJSArray(result_array);
982   } else {
983     return isolate->heap()->null_value();  // No matches at all.
984   }
985 }
986 
987 
988 // This is only called for StringReplaceGlobalRegExpWithFunction.  This sets
989 // lastMatchInfoOverride to maintain the last match info, so we don't need to
990 // set any other last match array info.
RUNTIME_FUNCTION(Runtime_RegExpExecMultiple)991 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
992   HandleScope handles(isolate);
993   DCHECK(args.length() == 4);
994 
995   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
996   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
997   CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
998   CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
999   RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
1000   RUNTIME_ASSERT(result_array->HasFastObjectElements());
1001 
1002   subject = String::Flatten(subject);
1003   RUNTIME_ASSERT(regexp->GetFlags() & JSRegExp::kGlobal);
1004 
1005   if (regexp->CaptureCount() == 0) {
1006     return SearchRegExpMultiple<false>(isolate, subject, regexp,
1007                                        last_match_info, result_array);
1008   } else {
1009     return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
1010                                       result_array);
1011   }
1012 }
1013 
1014 
RUNTIME_FUNCTION(Runtime_RegExpExecReThrow)1015 RUNTIME_FUNCTION(Runtime_RegExpExecReThrow) {
1016   SealHandleScope shs(isolate);
1017   DCHECK(args.length() == 4);
1018   Object* exception = isolate->pending_exception();
1019   isolate->clear_pending_exception();
1020   return isolate->ReThrow(exception);
1021 }
1022 
1023 
RUNTIME_FUNCTION(Runtime_IsRegExp)1024 RUNTIME_FUNCTION(Runtime_IsRegExp) {
1025   SealHandleScope shs(isolate);
1026   DCHECK(args.length() == 1);
1027   CONVERT_ARG_CHECKED(Object, obj, 0);
1028   return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1029 }
1030 }  // namespace internal
1031 }  // namespace v8
1032