1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <functional>
6
7 #include "src/arguments-inl.h"
8 #include "src/conversions-inl.h"
9 #include "src/isolate-inl.h"
10 #include "src/messages.h"
11 #include "src/objects/js-array-inl.h"
12 #include "src/regexp/jsregexp-inl.h"
13 #include "src/regexp/jsregexp.h"
14 #include "src/regexp/regexp-utils.h"
15 #include "src/runtime/runtime-utils.h"
16 #include "src/string-builder-inl.h"
17 #include "src/string-search.h"
18 #include "src/zone/zone-chunk-list.h"
19
20 namespace v8 {
21 namespace internal {
22
23 namespace {
24
25 // Returns -1 for failure.
GetArgcForReplaceCallable(uint32_t num_captures,bool has_named_captures)26 uint32_t GetArgcForReplaceCallable(uint32_t num_captures,
27 bool has_named_captures) {
28 const uint32_t kAdditionalArgsWithoutNamedCaptures = 2;
29 const uint32_t kAdditionalArgsWithNamedCaptures = 3;
30 if (num_captures > Code::kMaxArguments) return -1;
31 uint32_t argc = has_named_captures
32 ? num_captures + kAdditionalArgsWithNamedCaptures
33 : num_captures + kAdditionalArgsWithoutNamedCaptures;
34 STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() -
35 kAdditionalArgsWithNamedCaptures);
36 return (argc > Code::kMaxArguments) ? -1 : argc;
37 }
38
39 // Looks up the capture of the given name. Returns the (1-based) numbered
40 // capture index or -1 on failure.
LookupNamedCapture(std::function<bool (String *)> name_matches,FixedArray * capture_name_map)41 int LookupNamedCapture(std::function<bool(String*)> name_matches,
42 FixedArray* capture_name_map) {
43 // TODO(jgruber): Sort capture_name_map and do binary search via
44 // internalized strings.
45
46 int maybe_capture_index = -1;
47 const int named_capture_count = capture_name_map->length() >> 1;
48 for (int j = 0; j < named_capture_count; j++) {
49 // The format of {capture_name_map} is documented at
50 // JSRegExp::kIrregexpCaptureNameMapIndex.
51 const int name_ix = j * 2;
52 const int index_ix = j * 2 + 1;
53
54 String* capture_name = String::cast(capture_name_map->get(name_ix));
55 if (!name_matches(capture_name)) continue;
56
57 maybe_capture_index = Smi::ToInt(capture_name_map->get(index_ix));
58 break;
59 }
60
61 return maybe_capture_index;
62 }
63
64 } // namespace
65
66 class CompiledReplacement {
67 public:
CompiledReplacement(Zone * zone)68 explicit CompiledReplacement(Zone* zone)
69 : parts_(zone), replacement_substrings_(zone) {}
70
71 // Return whether the replacement is simple.
72 bool Compile(Isolate* isolate, Handle<JSRegExp> regexp,
73 Handle<String> replacement, int capture_count,
74 int subject_length);
75
76 // Use Apply only if Compile returned false.
77 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
78 int32_t* match);
79
80 // Number of distinct parts of the replacement pattern.
parts()81 int parts() { return static_cast<int>(parts_.size()); }
82
83 private:
84 enum PartType {
85 SUBJECT_PREFIX = 1,
86 SUBJECT_SUFFIX,
87 SUBJECT_CAPTURE,
88 REPLACEMENT_SUBSTRING,
89 REPLACEMENT_STRING,
90 EMPTY_REPLACEMENT,
91 NUMBER_OF_PART_TYPES
92 };
93
94 struct ReplacementPart {
SubjectMatchv8::internal::CompiledReplacement::ReplacementPart95 static inline ReplacementPart SubjectMatch() {
96 return ReplacementPart(SUBJECT_CAPTURE, 0);
97 }
SubjectCapturev8::internal::CompiledReplacement::ReplacementPart98 static inline ReplacementPart SubjectCapture(int capture_index) {
99 return ReplacementPart(SUBJECT_CAPTURE, capture_index);
100 }
SubjectPrefixv8::internal::CompiledReplacement::ReplacementPart101 static inline ReplacementPart SubjectPrefix() {
102 return ReplacementPart(SUBJECT_PREFIX, 0);
103 }
SubjectSuffixv8::internal::CompiledReplacement::ReplacementPart104 static inline ReplacementPart SubjectSuffix(int subject_length) {
105 return ReplacementPart(SUBJECT_SUFFIX, subject_length);
106 }
ReplacementStringv8::internal::CompiledReplacement::ReplacementPart107 static inline ReplacementPart ReplacementString() {
108 return ReplacementPart(REPLACEMENT_STRING, 0);
109 }
EmptyReplacementv8::internal::CompiledReplacement::ReplacementPart110 static inline ReplacementPart EmptyReplacement() {
111 return ReplacementPart(EMPTY_REPLACEMENT, 0);
112 }
ReplacementSubStringv8::internal::CompiledReplacement::ReplacementPart113 static inline ReplacementPart ReplacementSubString(int from, int to) {
114 DCHECK_LE(0, from);
115 DCHECK_GT(to, from);
116 return ReplacementPart(-from, to);
117 }
118
119 // If tag <= 0 then it is the negation of a start index of a substring of
120 // the replacement pattern, otherwise it's a value from PartType.
ReplacementPartv8::internal::CompiledReplacement::ReplacementPart121 ReplacementPart(int tag, int data) : tag(tag), data(data) {
122 // Must be non-positive or a PartType value.
123 DCHECK(tag < NUMBER_OF_PART_TYPES);
124 }
125 // Either a value of PartType or a non-positive number that is
126 // the negation of an index into the replacement string.
127 int tag;
128 // The data value's interpretation depends on the value of tag:
129 // tag == SUBJECT_PREFIX ||
130 // tag == SUBJECT_SUFFIX: data is unused.
131 // tag == SUBJECT_CAPTURE: data is the number of the capture.
132 // tag == REPLACEMENT_SUBSTRING ||
133 // tag == REPLACEMENT_STRING: data is index into array of substrings
134 // of the replacement string.
135 // tag == EMPTY_REPLACEMENT: data is unused.
136 // tag <= 0: Temporary representation of the substring of the replacement
137 // string ranging over -tag .. data.
138 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
139 // substring objects.
140 int data;
141 };
142
143 template <typename Char>
ParseReplacementPattern(ZoneChunkList<ReplacementPart> * parts,Vector<Char> characters,FixedArray * capture_name_map,int capture_count,int subject_length)144 bool ParseReplacementPattern(ZoneChunkList<ReplacementPart>* parts,
145 Vector<Char> characters,
146 FixedArray* capture_name_map, int capture_count,
147 int subject_length) {
148 // Equivalent to String::GetSubstitution, except that this method converts
149 // the replacement string into an internal representation that avoids
150 // repeated parsing when used repeatedly.
151 int length = characters.length();
152 int last = 0;
153 for (int i = 0; i < length; i++) {
154 Char c = characters[i];
155 if (c == '$') {
156 int next_index = i + 1;
157 if (next_index == length) { // No next character!
158 break;
159 }
160 Char c2 = characters[next_index];
161 switch (c2) {
162 case '$':
163 if (i > last) {
164 // There is a substring before. Include the first "$".
165 parts->push_back(
166 ReplacementPart::ReplacementSubString(last, next_index));
167 last = next_index + 1; // Continue after the second "$".
168 } else {
169 // Let the next substring start with the second "$".
170 last = next_index;
171 }
172 i = next_index;
173 break;
174 case '`':
175 if (i > last) {
176 parts->push_back(ReplacementPart::ReplacementSubString(last, i));
177 }
178 parts->push_back(ReplacementPart::SubjectPrefix());
179 i = next_index;
180 last = i + 1;
181 break;
182 case '\'':
183 if (i > last) {
184 parts->push_back(ReplacementPart::ReplacementSubString(last, i));
185 }
186 parts->push_back(ReplacementPart::SubjectSuffix(subject_length));
187 i = next_index;
188 last = i + 1;
189 break;
190 case '&':
191 if (i > last) {
192 parts->push_back(ReplacementPart::ReplacementSubString(last, i));
193 }
194 parts->push_back(ReplacementPart::SubjectMatch());
195 i = next_index;
196 last = i + 1;
197 break;
198 case '0':
199 case '1':
200 case '2':
201 case '3':
202 case '4':
203 case '5':
204 case '6':
205 case '7':
206 case '8':
207 case '9': {
208 int capture_ref = c2 - '0';
209 if (capture_ref > capture_count) {
210 i = next_index;
211 continue;
212 }
213 int second_digit_index = next_index + 1;
214 if (second_digit_index < length) {
215 // Peek ahead to see if we have two digits.
216 Char c3 = characters[second_digit_index];
217 if ('0' <= c3 && c3 <= '9') { // Double digits.
218 int double_digit_ref = capture_ref * 10 + c3 - '0';
219 if (double_digit_ref <= capture_count) {
220 next_index = second_digit_index;
221 capture_ref = double_digit_ref;
222 }
223 }
224 }
225 if (capture_ref > 0) {
226 if (i > last) {
227 parts->push_back(
228 ReplacementPart::ReplacementSubString(last, i));
229 }
230 DCHECK(capture_ref <= capture_count);
231 parts->push_back(ReplacementPart::SubjectCapture(capture_ref));
232 last = next_index + 1;
233 }
234 i = next_index;
235 break;
236 }
237 case '<': {
238 if (capture_name_map == nullptr) {
239 i = next_index;
240 break;
241 }
242
243 // Scan until the next '>', and let the enclosed substring be the
244 // groupName.
245
246 const int name_start_index = next_index + 1;
247 int closing_bracket_index = -1;
248 for (int j = name_start_index; j < length; j++) {
249 if (characters[j] == '>') {
250 closing_bracket_index = j;
251 break;
252 }
253 }
254
255 // If no closing bracket is found, '$<' is treated as a string
256 // literal.
257 if (closing_bracket_index == -1) {
258 i = next_index;
259 break;
260 }
261
262 Vector<Char> requested_name =
263 characters.SubVector(name_start_index, closing_bracket_index);
264
265 // Let capture be ? Get(namedCaptures, groupName).
266
267 const int capture_index = LookupNamedCapture(
268 [=](String* capture_name) {
269 return capture_name->IsEqualTo(requested_name);
270 },
271 capture_name_map);
272
273 // If capture is undefined or does not exist, replace the text
274 // through the following '>' with the empty string.
275 // Otherwise, replace the text through the following '>' with
276 // ? ToString(capture).
277
278 DCHECK(capture_index == -1 ||
279 (1 <= capture_index && capture_index <= capture_count));
280
281 if (i > last) {
282 parts->push_back(ReplacementPart::ReplacementSubString(last, i));
283 }
284 parts->push_back(
285 (capture_index == -1)
286 ? ReplacementPart::EmptyReplacement()
287 : ReplacementPart::SubjectCapture(capture_index));
288 last = closing_bracket_index + 1;
289 i = closing_bracket_index;
290 break;
291 }
292 default:
293 i = next_index;
294 break;
295 }
296 }
297 }
298 if (length > last) {
299 if (last == 0) {
300 // Replacement is simple. Do not use Apply to do the replacement.
301 return true;
302 } else {
303 parts->push_back(ReplacementPart::ReplacementSubString(last, length));
304 }
305 }
306 return false;
307 }
308
309 ZoneChunkList<ReplacementPart> parts_;
310 ZoneVector<Handle<String>> replacement_substrings_;
311 };
312
Compile(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> replacement,int capture_count,int subject_length)313 bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp,
314 Handle<String> replacement, int capture_count,
315 int subject_length) {
316 {
317 DisallowHeapAllocation no_gc;
318 String::FlatContent content = replacement->GetFlatContent();
319 DCHECK(content.IsFlat());
320
321 FixedArray* capture_name_map = nullptr;
322 if (capture_count > 0) {
323 DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
324 Object* maybe_capture_name_map = regexp->CaptureNameMap();
325 if (maybe_capture_name_map->IsFixedArray()) {
326 capture_name_map = FixedArray::cast(maybe_capture_name_map);
327 }
328 }
329
330 bool simple;
331 if (content.IsOneByte()) {
332 simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
333 capture_name_map, capture_count,
334 subject_length);
335 } else {
336 DCHECK(content.IsTwoByte());
337 simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
338 capture_name_map, capture_count,
339 subject_length);
340 }
341 if (simple) return true;
342 }
343
344 // Find substrings of replacement string and create them as String objects.
345 int substring_index = 0;
346 for (ReplacementPart& part : parts_) {
347 int tag = part.tag;
348 if (tag <= 0) { // A replacement string slice.
349 int from = -tag;
350 int to = part.data;
351 replacement_substrings_.push_back(
352 isolate->factory()->NewSubString(replacement, from, to));
353 part.tag = REPLACEMENT_SUBSTRING;
354 part.data = substring_index;
355 substring_index++;
356 } else if (tag == REPLACEMENT_STRING) {
357 replacement_substrings_.push_back(replacement);
358 part.data = substring_index;
359 substring_index++;
360 }
361 }
362 return false;
363 }
364
365
Apply(ReplacementStringBuilder * builder,int match_from,int match_to,int32_t * match)366 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
367 int match_from, int match_to, int32_t* match) {
368 DCHECK_LT(0, parts_.size());
369 for (ReplacementPart& part : parts_) {
370 switch (part.tag) {
371 case SUBJECT_PREFIX:
372 if (match_from > 0) builder->AddSubjectSlice(0, match_from);
373 break;
374 case SUBJECT_SUFFIX: {
375 int subject_length = part.data;
376 if (match_to < subject_length) {
377 builder->AddSubjectSlice(match_to, subject_length);
378 }
379 break;
380 }
381 case SUBJECT_CAPTURE: {
382 int capture = part.data;
383 int from = match[capture * 2];
384 int to = match[capture * 2 + 1];
385 if (from >= 0 && to > from) {
386 builder->AddSubjectSlice(from, to);
387 }
388 break;
389 }
390 case REPLACEMENT_SUBSTRING:
391 case REPLACEMENT_STRING:
392 builder->AddString(replacement_substrings_[part.data]);
393 break;
394 case EMPTY_REPLACEMENT:
395 break;
396 default:
397 UNREACHABLE();
398 }
399 }
400 }
401
FindOneByteStringIndices(Vector<const uint8_t> subject,uint8_t pattern,std::vector<int> * indices,unsigned int limit)402 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
403 std::vector<int>* indices, unsigned int limit) {
404 DCHECK_LT(0, limit);
405 // Collect indices of pattern in subject using memchr.
406 // Stop after finding at most limit values.
407 const uint8_t* subject_start = subject.start();
408 const uint8_t* subject_end = subject_start + subject.length();
409 const uint8_t* pos = subject_start;
410 while (limit > 0) {
411 pos = reinterpret_cast<const uint8_t*>(
412 memchr(pos, pattern, subject_end - pos));
413 if (pos == nullptr) return;
414 indices->push_back(static_cast<int>(pos - subject_start));
415 pos++;
416 limit--;
417 }
418 }
419
FindTwoByteStringIndices(const Vector<const uc16> subject,uc16 pattern,std::vector<int> * indices,unsigned int limit)420 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
421 std::vector<int>* indices, unsigned int limit) {
422 DCHECK_LT(0, limit);
423 const uc16* subject_start = subject.start();
424 const uc16* subject_end = subject_start + subject.length();
425 for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
426 if (*pos == pattern) {
427 indices->push_back(static_cast<int>(pos - subject_start));
428 limit--;
429 }
430 }
431 }
432
433 template <typename SubjectChar, typename PatternChar>
FindStringIndices(Isolate * isolate,Vector<const SubjectChar> subject,Vector<const PatternChar> pattern,std::vector<int> * indices,unsigned int limit)434 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
435 Vector<const PatternChar> pattern,
436 std::vector<int>* indices, unsigned int limit) {
437 DCHECK_LT(0, limit);
438 // Collect indices of pattern in subject.
439 // Stop after finding at most limit values.
440 int pattern_length = pattern.length();
441 int index = 0;
442 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
443 while (limit > 0) {
444 index = search.Search(subject, index);
445 if (index < 0) return;
446 indices->push_back(index);
447 index += pattern_length;
448 limit--;
449 }
450 }
451
FindStringIndicesDispatch(Isolate * isolate,String * subject,String * pattern,std::vector<int> * indices,unsigned int limit)452 void FindStringIndicesDispatch(Isolate* isolate, String* subject,
453 String* pattern, std::vector<int>* indices,
454 unsigned int limit) {
455 {
456 DisallowHeapAllocation no_gc;
457 String::FlatContent subject_content = subject->GetFlatContent();
458 String::FlatContent pattern_content = pattern->GetFlatContent();
459 DCHECK(subject_content.IsFlat());
460 DCHECK(pattern_content.IsFlat());
461 if (subject_content.IsOneByte()) {
462 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
463 if (pattern_content.IsOneByte()) {
464 Vector<const uint8_t> pattern_vector =
465 pattern_content.ToOneByteVector();
466 if (pattern_vector.length() == 1) {
467 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
468 limit);
469 } else {
470 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
471 limit);
472 }
473 } else {
474 FindStringIndices(isolate, subject_vector,
475 pattern_content.ToUC16Vector(), indices, limit);
476 }
477 } else {
478 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
479 if (pattern_content.IsOneByte()) {
480 Vector<const uint8_t> pattern_vector =
481 pattern_content.ToOneByteVector();
482 if (pattern_vector.length() == 1) {
483 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
484 limit);
485 } else {
486 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
487 limit);
488 }
489 } else {
490 Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
491 if (pattern_vector.length() == 1) {
492 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
493 limit);
494 } else {
495 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
496 limit);
497 }
498 }
499 }
500 }
501 }
502
503 namespace {
GetRewoundRegexpIndicesList(Isolate * isolate)504 std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
505 std::vector<int>* list = isolate->regexp_indices();
506 list->clear();
507 return list;
508 }
509
TruncateRegexpIndicesList(Isolate * isolate)510 void TruncateRegexpIndicesList(Isolate* isolate) {
511 // Same size as smallest zone segment, preserving behavior from the
512 // runtime zone.
513 static const int kMaxRegexpIndicesListCapacity = 8 * KB;
514 std::vector<int>* indicies = isolate->regexp_indices();
515 if (indicies->capacity() > kMaxRegexpIndicesListCapacity) {
516 // Throw away backing storage.
517 indicies->clear();
518 indicies->shrink_to_fit();
519 }
520 }
521 } // namespace
522
523 template <typename ResultSeqString>
StringReplaceGlobalAtomRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> pattern_regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)524 V8_WARN_UNUSED_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
525 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
526 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
527 DCHECK(subject->IsFlat());
528 DCHECK(replacement->IsFlat());
529
530 std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
531
532 DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
533 String* pattern =
534 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
535 int subject_len = subject->length();
536 int pattern_len = pattern->length();
537 int replacement_len = replacement->length();
538
539 FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF);
540
541 if (indices->empty()) return *subject;
542
543 // Detect integer overflow.
544 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
545 static_cast<int64_t>(pattern_len)) *
546 static_cast<int64_t>(indices->size()) +
547 static_cast<int64_t>(subject_len);
548 int result_len;
549 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
550 STATIC_ASSERT(String::kMaxLength < kMaxInt);
551 result_len = kMaxInt; // Provoke exception.
552 } else {
553 result_len = static_cast<int>(result_len_64);
554 }
555 if (result_len == 0) {
556 return ReadOnlyRoots(isolate).empty_string();
557 }
558
559 int subject_pos = 0;
560 int result_pos = 0;
561
562 MaybeHandle<SeqString> maybe_res;
563 if (ResultSeqString::kHasOneByteEncoding) {
564 maybe_res = isolate->factory()->NewRawOneByteString(result_len);
565 } else {
566 maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
567 }
568 Handle<SeqString> untyped_res;
569 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
570 Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
571
572 for (int index : *indices) {
573 // Copy non-matched subject content.
574 if (subject_pos < index) {
575 String::WriteToFlat(*subject, result->GetChars() + result_pos,
576 subject_pos, index);
577 result_pos += index - subject_pos;
578 }
579
580 // Replace match.
581 if (replacement_len > 0) {
582 String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
583 replacement_len);
584 result_pos += replacement_len;
585 }
586
587 subject_pos = index + pattern_len;
588 }
589 // Add remaining subject content at the end.
590 if (subject_pos < subject_len) {
591 String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
592 subject_len);
593 }
594
595 int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
596 RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, 0,
597 match_indices);
598
599 TruncateRegexpIndicesList(isolate);
600
601 return *result;
602 }
603
StringReplaceGlobalRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)604 V8_WARN_UNUSED_RESULT static Object* StringReplaceGlobalRegExpWithString(
605 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
606 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
607 DCHECK(subject->IsFlat());
608 DCHECK(replacement->IsFlat());
609
610 int capture_count = regexp->CaptureCount();
611 int subject_length = subject->length();
612
613 JSRegExp::Type typeTag = regexp->TypeTag();
614 if (typeTag == JSRegExp::IRREGEXP) {
615 // Ensure the RegExp is compiled so we can access the capture-name map.
616 if (RegExpImpl::IrregexpPrepare(isolate, regexp, subject) == -1) {
617 DCHECK(isolate->has_pending_exception());
618 return ReadOnlyRoots(isolate).exception();
619 }
620 }
621
622 // CompiledReplacement uses zone allocation.
623 Zone zone(isolate->allocator(), ZONE_NAME);
624 CompiledReplacement compiled_replacement(&zone);
625 const bool simple_replace = compiled_replacement.Compile(
626 isolate, regexp, replacement, capture_count, subject_length);
627
628 // Shortcut for simple non-regexp global replacements
629 if (typeTag == JSRegExp::ATOM && simple_replace) {
630 if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
631 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
632 isolate, subject, regexp, replacement, last_match_info);
633 } else {
634 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
635 isolate, subject, regexp, replacement, last_match_info);
636 }
637 }
638
639 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
640 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
641
642 int32_t* current_match = global_cache.FetchNext();
643 if (current_match == nullptr) {
644 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
645 return *subject;
646 }
647
648 // Guessing the number of parts that the final result string is built
649 // from. Global regexps can match any number of times, so we guess
650 // conservatively.
651 int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
652 ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
653
654 // Number of parts added by compiled replacement plus preceding
655 // string and possibly suffix after last match. It is possible for
656 // all components to use two elements when encoded as two smis.
657 const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
658
659 int prev = 0;
660
661 do {
662 builder.EnsureCapacity(parts_added_per_loop);
663
664 int start = current_match[0];
665 int end = current_match[1];
666
667 if (prev < start) {
668 builder.AddSubjectSlice(prev, start);
669 }
670
671 if (simple_replace) {
672 builder.AddString(replacement);
673 } else {
674 compiled_replacement.Apply(&builder, start, end, current_match);
675 }
676 prev = end;
677
678 current_match = global_cache.FetchNext();
679 } while (current_match != nullptr);
680
681 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
682
683 if (prev < subject_length) {
684 builder.EnsureCapacity(2);
685 builder.AddSubjectSlice(prev, subject_length);
686 }
687
688 RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
689 global_cache.LastSuccessfulMatch());
690
691 RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
692 }
693
694 template <typename ResultSeqString>
StringReplaceGlobalRegExpWithEmptyString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_info)695 V8_WARN_UNUSED_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
696 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
697 Handle<RegExpMatchInfo> last_match_info) {
698 DCHECK(subject->IsFlat());
699
700 // Shortcut for simple non-regexp global replacements
701 if (regexp->TypeTag() == JSRegExp::ATOM) {
702 Handle<String> empty_string = isolate->factory()->empty_string();
703 if (subject->IsOneByteRepresentation()) {
704 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
705 isolate, subject, regexp, empty_string, last_match_info);
706 } else {
707 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
708 isolate, subject, regexp, empty_string, last_match_info);
709 }
710 }
711
712 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
713 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
714
715 int32_t* current_match = global_cache.FetchNext();
716 if (current_match == nullptr) {
717 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
718 return *subject;
719 }
720
721 int start = current_match[0];
722 int end = current_match[1];
723 int capture_count = regexp->CaptureCount();
724 int subject_length = subject->length();
725
726 int new_length = subject_length - (end - start);
727 if (new_length == 0) return ReadOnlyRoots(isolate).empty_string();
728
729 Handle<ResultSeqString> answer;
730 if (ResultSeqString::kHasOneByteEncoding) {
731 answer = Handle<ResultSeqString>::cast(
732 isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
733 } else {
734 answer = Handle<ResultSeqString>::cast(
735 isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
736 }
737
738 int prev = 0;
739 int position = 0;
740
741 do {
742 start = current_match[0];
743 end = current_match[1];
744 if (prev < start) {
745 // Add substring subject[prev;start] to answer string.
746 String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
747 position += start - prev;
748 }
749 prev = end;
750
751 current_match = global_cache.FetchNext();
752 } while (current_match != nullptr);
753
754 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
755
756 RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
757 global_cache.LastSuccessfulMatch());
758
759 if (prev < subject_length) {
760 // Add substring subject[prev;length] to answer string.
761 String::WriteToFlat(*subject, answer->GetChars() + position, prev,
762 subject_length);
763 position += subject_length - prev;
764 }
765
766 if (position == 0) return ReadOnlyRoots(isolate).empty_string();
767
768 // Shorten string and fill
769 int string_size = ResultSeqString::SizeFor(position);
770 int allocated_string_size = ResultSeqString::SizeFor(new_length);
771 int delta = allocated_string_size - string_size;
772
773 answer->set_length(position);
774 if (delta == 0) return *answer;
775
776 Address end_of_string = answer->address() + string_size;
777 Heap* heap = isolate->heap();
778
779 // The trimming is performed on a newly allocated object, which is on a
780 // freshly allocated page or on an already swept page. Hence, the sweeper
781 // thread can not get confused with the filler creation. No synchronization
782 // needed.
783 // TODO(hpayer): We should shrink the large object page if the size
784 // of the object changed significantly.
785 if (!heap->lo_space()->Contains(*answer)) {
786 heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
787 }
788 return *answer;
789 }
790
791 namespace {
792
StringReplaceGlobalRegExpWithStringHelper(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> subject,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)793 Object* StringReplaceGlobalRegExpWithStringHelper(
794 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
795 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
796 CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
797
798 subject = String::Flatten(isolate, subject);
799
800 if (replacement->length() == 0) {
801 if (subject->HasOnlyOneByteChars()) {
802 return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
803 isolate, subject, regexp, last_match_info);
804 } else {
805 return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
806 isolate, subject, regexp, last_match_info);
807 }
808 }
809
810 replacement = String::Flatten(isolate, replacement);
811
812 return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
813 replacement, last_match_info);
814 }
815
816 } // namespace
817
RUNTIME_FUNCTION(Runtime_StringSplit)818 RUNTIME_FUNCTION(Runtime_StringSplit) {
819 HandleScope handle_scope(isolate);
820 DCHECK_EQ(3, args.length());
821 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
822 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
823 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
824 CHECK_LT(0, limit);
825
826 int subject_length = subject->length();
827 int pattern_length = pattern->length();
828 CHECK_LT(0, pattern_length);
829
830 if (limit == 0xFFFFFFFFu) {
831 FixedArray* last_match_cache_unused;
832 Handle<Object> cached_answer(
833 RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
834 &last_match_cache_unused,
835 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
836 isolate);
837 if (*cached_answer != Smi::kZero) {
838 // The cache FixedArray is a COW-array and can therefore be reused.
839 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
840 Handle<FixedArray>::cast(cached_answer));
841 return *result;
842 }
843 }
844
845 // The limit can be very large (0xFFFFFFFFu), but since the pattern
846 // isn't empty, we can never create more parts than ~half the length
847 // of the subject.
848
849 subject = String::Flatten(isolate, subject);
850 pattern = String::Flatten(isolate, pattern);
851
852 std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
853
854 FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
855
856 if (static_cast<uint32_t>(indices->size()) < limit) {
857 indices->push_back(subject_length);
858 }
859
860 // The list indices now contains the end of each part to create.
861
862 // Create JSArray of substrings separated by separator.
863 int part_count = static_cast<int>(indices->size());
864
865 Handle<JSArray> result =
866 isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count,
867 INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
868
869 DCHECK(result->HasObjectElements());
870
871 Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate);
872
873 if (part_count == 1 && indices->at(0) == subject_length) {
874 elements->set(0, *subject);
875 } else {
876 int part_start = 0;
877 FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
878 int part_end = indices->at(i);
879 Handle<String> substring =
880 isolate->factory()->NewProperSubString(subject, part_start, part_end);
881 elements->set(i, *substring);
882 part_start = part_end + pattern_length;
883 });
884 }
885
886 if (limit == 0xFFFFFFFFu) {
887 if (result->HasObjectElements()) {
888 RegExpResultsCache::Enter(isolate, subject, pattern, elements,
889 isolate->factory()->empty_fixed_array(),
890 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
891 }
892 }
893
894 TruncateRegexpIndicesList(isolate);
895
896 return *result;
897 }
898
RUNTIME_FUNCTION(Runtime_RegExpExec)899 RUNTIME_FUNCTION(Runtime_RegExpExec) {
900 HandleScope scope(isolate);
901 DCHECK_EQ(4, args.length());
902 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
903 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
904 CONVERT_INT32_ARG_CHECKED(index, 2);
905 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
906 // Due to the way the JS calls are constructed this must be less than the
907 // length of a string, i.e. it is always a Smi. We check anyway for security.
908 CHECK_LE(0, index);
909 CHECK_GE(subject->length(), index);
910 isolate->counters()->regexp_entry_runtime()->Increment();
911 RETURN_RESULT_OR_FAILURE(isolate, RegExpImpl::Exec(isolate, regexp, subject,
912 index, last_match_info));
913 }
914
RUNTIME_FUNCTION(Runtime_RegExpInternalReplace)915 RUNTIME_FUNCTION(Runtime_RegExpInternalReplace) {
916 HandleScope scope(isolate);
917 DCHECK_EQ(3, args.length());
918 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
919 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
920 CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
921
922 Handle<RegExpMatchInfo> internal_match_info =
923 isolate->regexp_internal_match_info();
924
925 return StringReplaceGlobalRegExpWithStringHelper(
926 isolate, regexp, subject, replacement, internal_match_info);
927 }
928
929 namespace {
930
931 class MatchInfoBackedMatch : public String::Match {
932 public:
MatchInfoBackedMatch(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> subject,Handle<RegExpMatchInfo> match_info)933 MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
934 Handle<String> subject,
935 Handle<RegExpMatchInfo> match_info)
936 : isolate_(isolate), match_info_(match_info) {
937 subject_ = String::Flatten(isolate, subject);
938
939 if (regexp->TypeTag() == JSRegExp::IRREGEXP) {
940 Object* o = regexp->CaptureNameMap();
941 has_named_captures_ = o->IsFixedArray();
942 if (has_named_captures_) {
943 capture_name_map_ = handle(FixedArray::cast(o), isolate);
944 }
945 } else {
946 has_named_captures_ = false;
947 }
948 }
949
GetMatch()950 Handle<String> GetMatch() override {
951 return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
952 }
953
GetPrefix()954 Handle<String> GetPrefix() override {
955 const int match_start = match_info_->Capture(0);
956 return isolate_->factory()->NewSubString(subject_, 0, match_start);
957 }
958
GetSuffix()959 Handle<String> GetSuffix() override {
960 const int match_end = match_info_->Capture(1);
961 return isolate_->factory()->NewSubString(subject_, match_end,
962 subject_->length());
963 }
964
HasNamedCaptures()965 bool HasNamedCaptures() override { return has_named_captures_; }
966
CaptureCount()967 int CaptureCount() override {
968 return match_info_->NumberOfCaptureRegisters() / 2;
969 }
970
GetCapture(int i,bool * capture_exists)971 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
972 Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
973 isolate_, match_info_, i, capture_exists);
974 return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
975 : isolate_->factory()->empty_string();
976 }
977
GetNamedCapture(Handle<String> name,CaptureState * state)978 MaybeHandle<String> GetNamedCapture(Handle<String> name,
979 CaptureState* state) override {
980 DCHECK(has_named_captures_);
981 const int capture_index = LookupNamedCapture(
982 [=](String* capture_name) { return capture_name->Equals(*name); },
983 *capture_name_map_);
984
985 if (capture_index == -1) {
986 *state = INVALID;
987 return name; // Arbitrary string handle.
988 }
989
990 DCHECK(1 <= capture_index && capture_index <= CaptureCount());
991
992 bool capture_exists;
993 Handle<String> capture_value;
994 ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
995 GetCapture(capture_index, &capture_exists),
996 String);
997
998 if (!capture_exists) {
999 *state = UNMATCHED;
1000 return isolate_->factory()->empty_string();
1001 } else {
1002 *state = MATCHED;
1003 return capture_value;
1004 }
1005 }
1006
1007 private:
1008 Isolate* isolate_;
1009 Handle<String> subject_;
1010 Handle<RegExpMatchInfo> match_info_;
1011
1012 bool has_named_captures_;
1013 Handle<FixedArray> capture_name_map_;
1014 };
1015
1016 class VectorBackedMatch : public String::Match {
1017 public:
VectorBackedMatch(Isolate * isolate,Handle<String> subject,Handle<String> match,int match_position,ZoneVector<Handle<Object>> * captures,Handle<Object> groups_obj)1018 VectorBackedMatch(Isolate* isolate, Handle<String> subject,
1019 Handle<String> match, int match_position,
1020 ZoneVector<Handle<Object>>* captures,
1021 Handle<Object> groups_obj)
1022 : isolate_(isolate),
1023 match_(match),
1024 match_position_(match_position),
1025 captures_(captures) {
1026 subject_ = String::Flatten(isolate, subject);
1027
1028 DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
1029 has_named_captures_ = !groups_obj->IsUndefined(isolate);
1030 if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
1031 }
1032
GetMatch()1033 Handle<String> GetMatch() override { return match_; }
1034
GetPrefix()1035 Handle<String> GetPrefix() override {
1036 return isolate_->factory()->NewSubString(subject_, 0, match_position_);
1037 }
1038
GetSuffix()1039 Handle<String> GetSuffix() override {
1040 const int match_end_position = match_position_ + match_->length();
1041 return isolate_->factory()->NewSubString(subject_, match_end_position,
1042 subject_->length());
1043 }
1044
HasNamedCaptures()1045 bool HasNamedCaptures() override { return has_named_captures_; }
1046
CaptureCount()1047 int CaptureCount() override { return static_cast<int>(captures_->size()); }
1048
GetCapture(int i,bool * capture_exists)1049 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1050 Handle<Object> capture_obj = captures_->at(i);
1051 if (capture_obj->IsUndefined(isolate_)) {
1052 *capture_exists = false;
1053 return isolate_->factory()->empty_string();
1054 }
1055 *capture_exists = true;
1056 return Object::ToString(isolate_, capture_obj);
1057 }
1058
GetNamedCapture(Handle<String> name,CaptureState * state)1059 MaybeHandle<String> GetNamedCapture(Handle<String> name,
1060 CaptureState* state) override {
1061 DCHECK(has_named_captures_);
1062
1063 Maybe<bool> maybe_capture_exists =
1064 JSReceiver::HasProperty(groups_obj_, name);
1065 if (maybe_capture_exists.IsNothing()) return MaybeHandle<String>();
1066
1067 if (!maybe_capture_exists.FromJust()) {
1068 *state = INVALID;
1069 return name; // Arbitrary string handle.
1070 }
1071
1072 Handle<Object> capture_obj;
1073 ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1074 Object::GetProperty(isolate_, groups_obj_, name),
1075 String);
1076 if (capture_obj->IsUndefined(isolate_)) {
1077 *state = UNMATCHED;
1078 return isolate_->factory()->empty_string();
1079 } else {
1080 *state = MATCHED;
1081 return Object::ToString(isolate_, capture_obj);
1082 }
1083 }
1084
1085 private:
1086 Isolate* isolate_;
1087 Handle<String> subject_;
1088 Handle<String> match_;
1089 const int match_position_;
1090 ZoneVector<Handle<Object>>* captures_;
1091
1092 bool has_named_captures_;
1093 Handle<JSReceiver> groups_obj_;
1094 };
1095
1096 // Create the groups object (see also the RegExp result creation in
1097 // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
ConstructNamedCaptureGroupsObject(Isolate * isolate,Handle<FixedArray> capture_map,std::function<Object * (int)> f_get_capture)1098 Handle<JSObject> ConstructNamedCaptureGroupsObject(
1099 Isolate* isolate, Handle<FixedArray> capture_map,
1100 std::function<Object*(int)> f_get_capture) {
1101 Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
1102
1103 const int capture_count = capture_map->length() >> 1;
1104 for (int i = 0; i < capture_count; i++) {
1105 const int name_ix = i * 2;
1106 const int index_ix = i * 2 + 1;
1107
1108 Handle<String> capture_name(String::cast(capture_map->get(name_ix)),
1109 isolate);
1110 const int capture_ix = Smi::ToInt(capture_map->get(index_ix));
1111 DCHECK(1 <= capture_ix && capture_ix <= capture_count);
1112
1113 Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
1114 DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString());
1115
1116 JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE);
1117 }
1118
1119 return groups;
1120 }
1121
1122 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1123 // separate last match info. See comment on that function.
1124 template <bool has_capture>
SearchRegExpMultiple(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_array,Handle<JSArray> result_array)1125 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
1126 Handle<JSRegExp> regexp,
1127 Handle<RegExpMatchInfo> last_match_array,
1128 Handle<JSArray> result_array) {
1129 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1130 DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
1131 DCHECK(subject->IsFlat());
1132
1133 int capture_count = regexp->CaptureCount();
1134 int subject_length = subject->length();
1135
1136 static const int kMinLengthToCache = 0x1000;
1137
1138 if (subject_length > kMinLengthToCache) {
1139 FixedArray* last_match_cache;
1140 Object* cached_answer = RegExpResultsCache::Lookup(
1141 isolate->heap(), *subject, regexp->data(), &last_match_cache,
1142 RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1143 if (cached_answer->IsFixedArray()) {
1144 int capture_registers = (capture_count + 1) * 2;
1145 int32_t* last_match = NewArray<int32_t>(capture_registers);
1146 for (int i = 0; i < capture_registers; i++) {
1147 last_match[i] = Smi::ToInt(last_match_cache->get(i));
1148 }
1149 Handle<FixedArray> cached_fixed_array =
1150 Handle<FixedArray>(FixedArray::cast(cached_answer), isolate);
1151 // The cache FixedArray is a COW-array and we need to return a copy.
1152 Handle<FixedArray> copied_fixed_array =
1153 isolate->factory()->CopyFixedArrayWithMap(
1154 cached_fixed_array, isolate->factory()->fixed_array_map());
1155 JSArray::SetContent(result_array, copied_fixed_array);
1156 RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
1157 capture_count, last_match);
1158 DeleteArray(last_match);
1159 return *result_array;
1160 }
1161 }
1162
1163 RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
1164 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1165
1166 // Ensured in Runtime_RegExpExecMultiple.
1167 DCHECK(result_array->HasObjectElements());
1168 Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()),
1169 isolate);
1170 if (result_elements->length() < 16) {
1171 result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
1172 }
1173
1174 FixedArrayBuilder builder(result_elements);
1175
1176 // Position to search from.
1177 int match_start = -1;
1178 int match_end = 0;
1179 bool first = true;
1180
1181 // Two smis before and after the match, for very long strings.
1182 static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1183
1184 while (true) {
1185 int32_t* current_match = global_cache.FetchNext();
1186 if (current_match == nullptr) break;
1187 match_start = current_match[0];
1188 builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch);
1189 if (match_end < match_start) {
1190 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1191 match_start);
1192 }
1193 match_end = current_match[1];
1194 {
1195 // Avoid accumulating new handles inside loop.
1196 HandleScope temp_scope(isolate);
1197 Handle<String> match;
1198 if (!first) {
1199 match = isolate->factory()->NewProperSubString(subject, match_start,
1200 match_end);
1201 } else {
1202 match =
1203 isolate->factory()->NewSubString(subject, match_start, match_end);
1204 first = false;
1205 }
1206
1207 if (has_capture) {
1208 // Arguments array to replace function is match, captures, index and
1209 // subject, i.e., 3 + capture count in total. If the RegExp contains
1210 // named captures, they are also passed as the last argument.
1211
1212 Handle<Object> maybe_capture_map(regexp->CaptureNameMap(), isolate);
1213 const bool has_named_captures = maybe_capture_map->IsFixedArray();
1214
1215 const int argc =
1216 has_named_captures ? 4 + capture_count : 3 + capture_count;
1217
1218 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1219 int cursor = 0;
1220
1221 elements->set(cursor++, *match);
1222 for (int i = 1; i <= capture_count; i++) {
1223 int start = current_match[i * 2];
1224 if (start >= 0) {
1225 int end = current_match[i * 2 + 1];
1226 DCHECK(start <= end);
1227 Handle<String> substring =
1228 isolate->factory()->NewSubString(subject, start, end);
1229 elements->set(cursor++, *substring);
1230 } else {
1231 DCHECK_GT(0, current_match[i * 2 + 1]);
1232 elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value());
1233 }
1234 }
1235
1236 elements->set(cursor++, Smi::FromInt(match_start));
1237 elements->set(cursor++, *subject);
1238
1239 if (has_named_captures) {
1240 Handle<FixedArray> capture_map =
1241 Handle<FixedArray>::cast(maybe_capture_map);
1242 Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1243 isolate, capture_map, [=](int ix) { return elements->get(ix); });
1244 elements->set(cursor++, *groups);
1245 }
1246
1247 DCHECK_EQ(cursor, argc);
1248 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1249 } else {
1250 builder.Add(*match);
1251 }
1252 }
1253 }
1254
1255 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1256
1257 if (match_start >= 0) {
1258 // Finished matching, with at least one match.
1259 if (match_end < subject_length) {
1260 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1261 subject_length);
1262 }
1263
1264 RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
1265 capture_count,
1266 global_cache.LastSuccessfulMatch());
1267
1268 if (subject_length > kMinLengthToCache) {
1269 // Store the last successful match into the array for caching.
1270 // TODO(yangguo): do not expose last match to JS and simplify caching.
1271 int capture_registers = (capture_count + 1) * 2;
1272 Handle<FixedArray> last_match_cache =
1273 isolate->factory()->NewFixedArray(capture_registers);
1274 int32_t* last_match = global_cache.LastSuccessfulMatch();
1275 for (int i = 0; i < capture_registers; i++) {
1276 last_match_cache->set(i, Smi::FromInt(last_match[i]));
1277 }
1278 Handle<FixedArray> result_fixed_array =
1279 FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length());
1280 // Cache the result and copy the FixedArray into a COW array.
1281 Handle<FixedArray> copied_fixed_array =
1282 isolate->factory()->CopyFixedArrayWithMap(
1283 result_fixed_array, isolate->factory()->fixed_array_map());
1284 RegExpResultsCache::Enter(
1285 isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1286 last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1287 }
1288 return *builder.ToJSArray(result_array);
1289 } else {
1290 return ReadOnlyRoots(isolate).null_value(); // No matches at all.
1291 }
1292 }
1293
1294 // Legacy implementation of RegExp.prototype[Symbol.replace] which
1295 // doesn't properly call the underlying exec method.
RegExpReplace(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> string,Handle<Object> replace_obj)1296 V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace(
1297 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string,
1298 Handle<Object> replace_obj) {
1299 // Functional fast-paths are dispatched directly by replace builtin.
1300 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1301 DCHECK(!replace_obj->IsCallable());
1302
1303 Factory* factory = isolate->factory();
1304
1305 const int flags = regexp->GetFlags();
1306 const bool global = (flags & JSRegExp::kGlobal) != 0;
1307 const bool sticky = (flags & JSRegExp::kSticky) != 0;
1308
1309 Handle<String> replace;
1310 ASSIGN_RETURN_ON_EXCEPTION(isolate, replace,
1311 Object::ToString(isolate, replace_obj), String);
1312 replace = String::Flatten(isolate, replace);
1313
1314 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1315
1316 if (!global) {
1317 // Non-global regexp search, string replace.
1318
1319 uint32_t last_index = 0;
1320 if (sticky) {
1321 Handle<Object> last_index_obj(regexp->last_index(), isolate);
1322 ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1323 Object::ToLength(isolate, last_index_obj),
1324 String);
1325 last_index = PositiveNumberToUint32(*last_index_obj);
1326 }
1327
1328 Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1329 isolate);
1330
1331 // A lastIndex exceeding the string length always always returns null
1332 // (signalling failure) in RegExpBuiltinExec, thus we can skip the call.
1333 if (last_index <= static_cast<uint32_t>(string->length())) {
1334 ASSIGN_RETURN_ON_EXCEPTION(isolate, match_indices_obj,
1335 RegExpImpl::Exec(isolate, regexp, string,
1336 last_index, last_match_info),
1337 String);
1338 }
1339
1340 if (match_indices_obj->IsNull(isolate)) {
1341 if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1342 return string;
1343 }
1344
1345 auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1346
1347 const int start_index = match_indices->Capture(0);
1348 const int end_index = match_indices->Capture(1);
1349
1350 if (sticky)
1351 regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER);
1352
1353 IncrementalStringBuilder builder(isolate);
1354 builder.AppendString(factory->NewSubString(string, 0, start_index));
1355
1356 if (replace->length() > 0) {
1357 MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1358 Handle<String> replacement;
1359 ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1360 String::GetSubstitution(isolate, &m, replace),
1361 String);
1362 builder.AppendString(replacement);
1363 }
1364
1365 builder.AppendString(
1366 factory->NewSubString(string, end_index, string->length()));
1367 return builder.Finish();
1368 } else {
1369 // Global regexp search, string replace.
1370 DCHECK(global);
1371 RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1372 String);
1373
1374 if (replace->length() == 0) {
1375 if (string->HasOnlyOneByteChars()) {
1376 Object* result =
1377 StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1378 isolate, string, regexp, last_match_info);
1379 return handle(String::cast(result), isolate);
1380 } else {
1381 Object* result =
1382 StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1383 isolate, string, regexp, last_match_info);
1384 return handle(String::cast(result), isolate);
1385 }
1386 }
1387
1388 Object* result = StringReplaceGlobalRegExpWithString(
1389 isolate, string, regexp, replace, last_match_info);
1390 if (result->IsString()) {
1391 return handle(String::cast(result), isolate);
1392 } else {
1393 return MaybeHandle<String>();
1394 }
1395 }
1396
1397 UNREACHABLE();
1398 }
1399
1400 } // namespace
1401
1402 // This is only called for StringReplaceGlobalRegExpWithFunction.
RUNTIME_FUNCTION(Runtime_RegExpExecMultiple)1403 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1404 HandleScope handles(isolate);
1405 DCHECK_EQ(4, args.length());
1406
1407 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1408 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1409 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2);
1410 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1411 CHECK(result_array->HasObjectElements());
1412
1413 subject = String::Flatten(isolate, subject);
1414 CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
1415
1416 if (regexp->CaptureCount() == 0) {
1417 return SearchRegExpMultiple<false>(isolate, subject, regexp,
1418 last_match_info, result_array);
1419 } else {
1420 return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
1421 result_array);
1422 }
1423 }
1424
RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction)1425 RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1426 HandleScope scope(isolate);
1427 DCHECK_EQ(3, args.length());
1428 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
1429 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
1430 CONVERT_ARG_HANDLE_CHECKED(JSReceiver, replace_obj, 2);
1431
1432 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1433 DCHECK(replace_obj->map()->is_callable());
1434
1435 Factory* factory = isolate->factory();
1436 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1437
1438 const int flags = regexp->GetFlags();
1439 DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1440
1441 // TODO(jgruber): This should be an easy port to CSA with massive payback.
1442
1443 const bool sticky = (flags & JSRegExp::kSticky) != 0;
1444 uint32_t last_index = 0;
1445 if (sticky) {
1446 Handle<Object> last_index_obj(regexp->last_index(), isolate);
1447 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1448 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1449 last_index = PositiveNumberToUint32(*last_index_obj);
1450
1451 if (last_index > static_cast<uint32_t>(subject->length())) last_index = 0;
1452 }
1453
1454 Handle<Object> match_indices_obj;
1455 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1456 isolate, match_indices_obj,
1457 RegExpImpl::Exec(isolate, regexp, subject, last_index, last_match_info));
1458
1459 if (match_indices_obj->IsNull(isolate)) {
1460 if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1461 return *subject;
1462 }
1463
1464 Handle<RegExpMatchInfo> match_indices =
1465 Handle<RegExpMatchInfo>::cast(match_indices_obj);
1466
1467 const int index = match_indices->Capture(0);
1468 const int end_of_match = match_indices->Capture(1);
1469
1470 if (sticky)
1471 regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER);
1472
1473 IncrementalStringBuilder builder(isolate);
1474 builder.AppendString(factory->NewSubString(subject, 0, index));
1475
1476 // Compute the parameter list consisting of the match, captures, index,
1477 // and subject for the replace function invocation. If the RegExp contains
1478 // named captures, they are also passed as the last argument.
1479
1480 // The number of captures plus one for the match.
1481 const int m = match_indices->NumberOfCaptureRegisters() / 2;
1482
1483 bool has_named_captures = false;
1484 Handle<FixedArray> capture_map;
1485 if (m > 1) {
1486 // The existence of capture groups implies IRREGEXP kind.
1487 DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
1488
1489 Object* maybe_capture_map = regexp->CaptureNameMap();
1490 if (maybe_capture_map->IsFixedArray()) {
1491 has_named_captures = true;
1492 capture_map = handle(FixedArray::cast(maybe_capture_map), isolate);
1493 }
1494 }
1495
1496 const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures);
1497 if (argc == static_cast<uint32_t>(-1)) {
1498 THROW_NEW_ERROR_RETURN_FAILURE(
1499 isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1500 }
1501 ScopedVector<Handle<Object>> argv(argc);
1502
1503 int cursor = 0;
1504 for (int j = 0; j < m; j++) {
1505 bool ok;
1506 Handle<String> capture =
1507 RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1508 if (ok) {
1509 argv[cursor++] = capture;
1510 } else {
1511 argv[cursor++] = factory->undefined_value();
1512 }
1513 }
1514
1515 argv[cursor++] = handle(Smi::FromInt(index), isolate);
1516 argv[cursor++] = subject;
1517
1518 if (has_named_captures) {
1519 argv[cursor++] = ConstructNamedCaptureGroupsObject(
1520 isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1521 }
1522
1523 DCHECK_EQ(cursor, argc);
1524
1525 Handle<Object> replacement_obj;
1526 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1527 isolate, replacement_obj,
1528 Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1529 argv.start()));
1530
1531 Handle<String> replacement;
1532 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1533 isolate, replacement, Object::ToString(isolate, replacement_obj));
1534
1535 builder.AppendString(replacement);
1536 builder.AppendString(
1537 factory->NewSubString(subject, end_of_match, subject->length()));
1538
1539 RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1540 }
1541
1542 namespace {
1543
ToUint32(Isolate * isolate,Handle<Object> object,uint32_t * out)1544 V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1545 Handle<Object> object,
1546 uint32_t* out) {
1547 if (object->IsUndefined(isolate)) {
1548 *out = kMaxUInt32;
1549 return object;
1550 }
1551
1552 Handle<Object> number;
1553 ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object),
1554 Object);
1555 *out = NumberToUint32(*number);
1556 return object;
1557 }
1558
NewJSArrayWithElements(Isolate * isolate,Handle<FixedArray> elems,int num_elems)1559 Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1560 Handle<FixedArray> elems,
1561 int num_elems) {
1562 return isolate->factory()->NewJSArrayWithElements(
1563 FixedArray::ShrinkOrEmpty(isolate, elems, num_elems));
1564 }
1565
1566 } // namespace
1567
1568 // Slow path for:
1569 // ES#sec-regexp.prototype-@@replace
1570 // RegExp.prototype [ @@split ] ( string, limit )
RUNTIME_FUNCTION(Runtime_RegExpSplit)1571 RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1572 HandleScope scope(isolate);
1573 DCHECK_EQ(3, args.length());
1574
1575 DCHECK(args[1]->IsString());
1576
1577 CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1578 CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1579 CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);
1580
1581 Factory* factory = isolate->factory();
1582
1583 Handle<JSFunction> regexp_fun = isolate->regexp_function();
1584 Handle<Object> ctor;
1585 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1586 isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1587
1588 Handle<Object> flags_obj;
1589 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1590 isolate, flags_obj,
1591 JSObject::GetProperty(isolate, recv, factory->flags_string()));
1592
1593 Handle<String> flags;
1594 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1595 Object::ToString(isolate, flags_obj));
1596
1597 Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1598 const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1599
1600 Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1601 const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1602
1603 Handle<String> new_flags = flags;
1604 if (!sticky) {
1605 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1606 factory->NewConsString(flags, y_str));
1607 }
1608
1609 Handle<JSReceiver> splitter;
1610 {
1611 const int argc = 2;
1612
1613 ScopedVector<Handle<Object>> argv(argc);
1614 argv[0] = recv;
1615 argv[1] = new_flags;
1616
1617 Handle<Object> splitter_obj;
1618 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1619 isolate, splitter_obj,
1620 Execution::New(isolate, ctor, argc, argv.start()));
1621
1622 splitter = Handle<JSReceiver>::cast(splitter_obj);
1623 }
1624
1625 uint32_t limit;
1626 RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1627
1628 const uint32_t length = string->length();
1629
1630 if (limit == 0) return *factory->NewJSArray(0);
1631
1632 if (length == 0) {
1633 Handle<Object> result;
1634 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1635 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1636 factory->undefined_value()));
1637
1638 if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1639
1640 Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
1641 elems->set(0, *string);
1642 return *factory->NewJSArrayWithElements(elems);
1643 }
1644
1645 static const int kInitialArraySize = 8;
1646 Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1647 uint32_t num_elems = 0;
1648
1649 uint32_t string_index = 0;
1650 uint32_t prev_string_index = 0;
1651 while (string_index < length) {
1652 RETURN_FAILURE_ON_EXCEPTION(
1653 isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1654
1655 Handle<Object> result;
1656 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1657 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1658 factory->undefined_value()));
1659
1660 if (result->IsNull(isolate)) {
1661 string_index = static_cast<uint32_t>(
1662 RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1663 continue;
1664 }
1665
1666 Handle<Object> last_index_obj;
1667 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1668 isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1669
1670 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1671 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1672
1673 const uint32_t end =
1674 std::min(PositiveNumberToUint32(*last_index_obj), length);
1675 if (end == prev_string_index) {
1676 string_index = static_cast<uint32_t>(
1677 RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1678 continue;
1679 }
1680
1681 {
1682 Handle<String> substr =
1683 factory->NewSubString(string, prev_string_index, string_index);
1684 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1685 if (num_elems == limit) {
1686 return *NewJSArrayWithElements(isolate, elems, num_elems);
1687 }
1688 }
1689
1690 prev_string_index = end;
1691
1692 Handle<Object> num_captures_obj;
1693 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1694 isolate, num_captures_obj,
1695 Object::GetProperty(isolate, result,
1696 isolate->factory()->length_string()));
1697
1698 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1699 isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1700 const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj);
1701
1702 for (uint32_t i = 1; i < num_captures; i++) {
1703 Handle<Object> capture;
1704 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1705 isolate, capture, Object::GetElement(isolate, result, i));
1706 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture);
1707 if (num_elems == limit) {
1708 return *NewJSArrayWithElements(isolate, elems, num_elems);
1709 }
1710 }
1711
1712 string_index = prev_string_index;
1713 }
1714
1715 {
1716 Handle<String> substr =
1717 factory->NewSubString(string, prev_string_index, length);
1718 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1719 }
1720
1721 return *NewJSArrayWithElements(isolate, elems, num_elems);
1722 }
1723
1724 // Slow path for:
1725 // ES#sec-regexp.prototype-@@replace
1726 // RegExp.prototype [ @@replace ] ( string, replaceValue )
RUNTIME_FUNCTION(Runtime_RegExpReplace)1727 RUNTIME_FUNCTION(Runtime_RegExpReplace) {
1728 HandleScope scope(isolate);
1729 DCHECK_EQ(3, args.length());
1730
1731 CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1732 CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1733 Handle<Object> replace_obj = args.at(2);
1734
1735 Factory* factory = isolate->factory();
1736
1737 string = String::Flatten(isolate, string);
1738
1739 // Fast-path for unmodified JSRegExps.
1740 if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1741 RETURN_RESULT_OR_FAILURE(
1742 isolate, RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string,
1743 replace_obj));
1744 }
1745
1746 const uint32_t length = string->length();
1747 const bool functional_replace = replace_obj->IsCallable();
1748
1749 Handle<String> replace;
1750 if (!functional_replace) {
1751 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1752 Object::ToString(isolate, replace_obj));
1753 }
1754
1755 Handle<Object> global_obj;
1756 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1757 isolate, global_obj,
1758 JSReceiver::GetProperty(isolate, recv, factory->global_string()));
1759 const bool global = global_obj->BooleanValue(isolate);
1760
1761 bool unicode = false;
1762 if (global) {
1763 Handle<Object> unicode_obj;
1764 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1765 isolate, unicode_obj,
1766 JSReceiver::GetProperty(isolate, recv, factory->unicode_string()));
1767 unicode = unicode_obj->BooleanValue(isolate);
1768
1769 RETURN_FAILURE_ON_EXCEPTION(isolate,
1770 RegExpUtils::SetLastIndex(isolate, recv, 0));
1771 }
1772
1773 Zone zone(isolate->allocator(), ZONE_NAME);
1774 ZoneVector<Handle<Object>> results(&zone);
1775
1776 while (true) {
1777 Handle<Object> result;
1778 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1779 isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1780 factory->undefined_value()));
1781
1782 if (result->IsNull(isolate)) break;
1783
1784 results.push_back(result);
1785 if (!global) break;
1786
1787 Handle<Object> match_obj;
1788 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1789 Object::GetElement(isolate, result, 0));
1790
1791 Handle<String> match;
1792 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1793 Object::ToString(isolate, match_obj));
1794
1795 if (match->length() == 0) {
1796 RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1797 isolate, recv, string, unicode));
1798 }
1799 }
1800
1801 // TODO(jgruber): Look into ReplacementStringBuilder instead.
1802 IncrementalStringBuilder builder(isolate);
1803 uint32_t next_source_position = 0;
1804
1805 for (const auto& result : results) {
1806 HandleScope handle_scope(isolate);
1807 Handle<Object> captures_length_obj;
1808 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1809 isolate, captures_length_obj,
1810 Object::GetProperty(isolate, result, factory->length_string()));
1811
1812 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1813 isolate, captures_length_obj,
1814 Object::ToLength(isolate, captures_length_obj));
1815 const uint32_t captures_length =
1816 PositiveNumberToUint32(*captures_length_obj);
1817
1818 Handle<Object> match_obj;
1819 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1820 Object::GetElement(isolate, result, 0));
1821
1822 Handle<String> match;
1823 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1824 Object::ToString(isolate, match_obj));
1825
1826 const int match_length = match->length();
1827
1828 Handle<Object> position_obj;
1829 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1830 isolate, position_obj,
1831 Object::GetProperty(isolate, result, factory->index_string()));
1832
1833 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1834 isolate, position_obj, Object::ToInteger(isolate, position_obj));
1835 const uint32_t position =
1836 std::min(PositiveNumberToUint32(*position_obj), length);
1837
1838 // Do not reserve capacity since captures_length is user-controlled.
1839 ZoneVector<Handle<Object>> captures(&zone);
1840
1841 for (uint32_t n = 0; n < captures_length; n++) {
1842 Handle<Object> capture;
1843 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1844 isolate, capture, Object::GetElement(isolate, result, n));
1845
1846 if (!capture->IsUndefined(isolate)) {
1847 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1848 Object::ToString(isolate, capture));
1849 }
1850 captures.push_back(capture);
1851 }
1852
1853 Handle<Object> groups_obj = isolate->factory()->undefined_value();
1854 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1855 isolate, groups_obj,
1856 Object::GetProperty(isolate, result, factory->groups_string()));
1857
1858 const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1859
1860 Handle<String> replacement;
1861 if (functional_replace) {
1862 const uint32_t argc =
1863 GetArgcForReplaceCallable(captures_length, has_named_captures);
1864 if (argc == static_cast<uint32_t>(-1)) {
1865 THROW_NEW_ERROR_RETURN_FAILURE(
1866 isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1867 }
1868
1869 ScopedVector<Handle<Object>> argv(argc);
1870
1871 int cursor = 0;
1872 for (uint32_t j = 0; j < captures_length; j++) {
1873 argv[cursor++] = captures[j];
1874 }
1875
1876 argv[cursor++] = handle(Smi::FromInt(position), isolate);
1877 argv[cursor++] = string;
1878 if (has_named_captures) argv[cursor++] = groups_obj;
1879
1880 DCHECK_EQ(cursor, argc);
1881
1882 Handle<Object> replacement_obj;
1883 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1884 isolate, replacement_obj,
1885 Execution::Call(isolate, replace_obj, factory->undefined_value(),
1886 argc, argv.start()));
1887
1888 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1889 isolate, replacement, Object::ToString(isolate, replacement_obj));
1890 } else {
1891 DCHECK(!functional_replace);
1892 if (!groups_obj->IsUndefined(isolate)) {
1893 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1894 isolate, groups_obj, JSReceiver::ToObject(isolate, groups_obj));
1895 }
1896 VectorBackedMatch m(isolate, string, match, position, &captures,
1897 groups_obj);
1898 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1899 isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1900 }
1901
1902 if (position >= next_source_position) {
1903 builder.AppendString(
1904 factory->NewSubString(string, next_source_position, position));
1905 builder.AppendString(replacement);
1906
1907 next_source_position = position + match_length;
1908 }
1909 }
1910
1911 if (next_source_position < length) {
1912 builder.AppendString(
1913 factory->NewSubString(string, next_source_position, length));
1914 }
1915
1916 RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1917 }
1918
RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile)1919 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1920 HandleScope scope(isolate);
1921 DCHECK_EQ(3, args.length());
1922 // TODO(pwong): To follow the spec more closely and simplify calling code,
1923 // this could handle the canonicalization of pattern and flags. See
1924 // https://tc39.github.io/ecma262/#sec-regexpinitialize
1925 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1926 CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
1927 CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
1928
1929 RETURN_FAILURE_ON_EXCEPTION(isolate,
1930 JSRegExp::Initialize(regexp, source, flags));
1931
1932 return *regexp;
1933 }
1934
RUNTIME_FUNCTION(Runtime_IsRegExp)1935 RUNTIME_FUNCTION(Runtime_IsRegExp) {
1936 SealHandleScope shs(isolate);
1937 DCHECK_EQ(1, args.length());
1938 CONVERT_ARG_CHECKED(Object, obj, 0);
1939 return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1940 }
1941
1942 } // namespace internal
1943 } // namespace v8
1944