1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/regexp/regexp-utils.h"
6
7 #include "src/heap/factory.h"
8 #include "src/isolate.h"
9 #include "src/objects-inl.h"
10 #include "src/objects/js-regexp-inl.h"
11 #include "src/regexp/jsregexp.h"
12
13 namespace v8 {
14 namespace internal {
15
GenericCaptureGetter(Isolate * isolate,Handle<RegExpMatchInfo> match_info,int capture,bool * ok)16 Handle<String> RegExpUtils::GenericCaptureGetter(
17 Isolate* isolate, Handle<RegExpMatchInfo> match_info, int capture,
18 bool* ok) {
19 const int index = capture * 2;
20 if (index >= match_info->NumberOfCaptureRegisters()) {
21 if (ok != nullptr) *ok = false;
22 return isolate->factory()->empty_string();
23 }
24
25 const int match_start = match_info->Capture(index);
26 const int match_end = match_info->Capture(index + 1);
27 if (match_start == -1 || match_end == -1) {
28 if (ok != nullptr) *ok = false;
29 return isolate->factory()->empty_string();
30 }
31
32 if (ok != nullptr) *ok = true;
33 Handle<String> last_subject(match_info->LastSubject(), isolate);
34 return isolate->factory()->NewSubString(last_subject, match_start, match_end);
35 }
36
37 namespace {
38
HasInitialRegExpMap(Isolate * isolate,Handle<JSReceiver> recv)39 V8_INLINE bool HasInitialRegExpMap(Isolate* isolate, Handle<JSReceiver> recv) {
40 return recv->map() == isolate->regexp_function()->initial_map();
41 }
42
43 } // namespace
44
SetLastIndex(Isolate * isolate,Handle<JSReceiver> recv,uint64_t value)45 MaybeHandle<Object> RegExpUtils::SetLastIndex(Isolate* isolate,
46 Handle<JSReceiver> recv,
47 uint64_t value) {
48 Handle<Object> value_as_object =
49 isolate->factory()->NewNumberFromInt64(value);
50 if (HasInitialRegExpMap(isolate, recv)) {
51 JSRegExp::cast(*recv)->set_last_index(*value_as_object, SKIP_WRITE_BARRIER);
52 return recv;
53 } else {
54 return Object::SetProperty(isolate, recv,
55 isolate->factory()->lastIndex_string(),
56 value_as_object, LanguageMode::kStrict);
57 }
58 }
59
GetLastIndex(Isolate * isolate,Handle<JSReceiver> recv)60 MaybeHandle<Object> RegExpUtils::GetLastIndex(Isolate* isolate,
61 Handle<JSReceiver> recv) {
62 if (HasInitialRegExpMap(isolate, recv)) {
63 return handle(JSRegExp::cast(*recv)->last_index(), isolate);
64 } else {
65 return Object::GetProperty(isolate, recv,
66 isolate->factory()->lastIndex_string());
67 }
68 }
69
70 // ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
71 // Also takes an optional exec method in case our caller
72 // has already fetched exec.
RegExpExec(Isolate * isolate,Handle<JSReceiver> regexp,Handle<String> string,Handle<Object> exec)73 MaybeHandle<Object> RegExpUtils::RegExpExec(Isolate* isolate,
74 Handle<JSReceiver> regexp,
75 Handle<String> string,
76 Handle<Object> exec) {
77 if (exec->IsUndefined(isolate)) {
78 ASSIGN_RETURN_ON_EXCEPTION(
79 isolate, exec,
80 Object::GetProperty(isolate, regexp, isolate->factory()->exec_string()),
81 Object);
82 }
83
84 if (exec->IsCallable()) {
85 const int argc = 1;
86 ScopedVector<Handle<Object>> argv(argc);
87 argv[0] = string;
88
89 Handle<Object> result;
90 ASSIGN_RETURN_ON_EXCEPTION(
91 isolate, result,
92 Execution::Call(isolate, exec, regexp, argc, argv.start()), Object);
93
94 if (!result->IsJSReceiver() && !result->IsNull(isolate)) {
95 THROW_NEW_ERROR(isolate,
96 NewTypeError(MessageTemplate::kInvalidRegExpExecResult),
97 Object);
98 }
99 return result;
100 }
101
102 if (!regexp->IsJSRegExp()) {
103 THROW_NEW_ERROR(isolate,
104 NewTypeError(MessageTemplate::kIncompatibleMethodReceiver,
105 isolate->factory()->NewStringFromAsciiChecked(
106 "RegExp.prototype.exec"),
107 regexp),
108 Object);
109 }
110
111 {
112 Handle<JSFunction> regexp_exec = isolate->regexp_exec_function();
113
114 const int argc = 1;
115 ScopedVector<Handle<Object>> argv(argc);
116 argv[0] = string;
117
118 return Execution::Call(isolate, regexp_exec, regexp, argc, argv.start());
119 }
120 }
121
IsRegExp(Isolate * isolate,Handle<Object> object)122 Maybe<bool> RegExpUtils::IsRegExp(Isolate* isolate, Handle<Object> object) {
123 if (!object->IsJSReceiver()) return Just(false);
124
125 Handle<JSReceiver> receiver = Handle<JSReceiver>::cast(object);
126
127 Handle<Object> match;
128 ASSIGN_RETURN_ON_EXCEPTION_VALUE(
129 isolate, match,
130 JSObject::GetProperty(isolate, receiver,
131 isolate->factory()->match_symbol()),
132 Nothing<bool>());
133
134 if (!match->IsUndefined(isolate)) return Just(match->BooleanValue(isolate));
135 return Just(object->IsJSRegExp());
136 }
137
IsUnmodifiedRegExp(Isolate * isolate,Handle<Object> obj)138 bool RegExpUtils::IsUnmodifiedRegExp(Isolate* isolate, Handle<Object> obj) {
139 // TODO(ishell): Update this check once map changes for constant field
140 // tracking are landing.
141
142 #ifdef V8_ENABLE_FORCE_SLOW_PATH
143 if (isolate->force_slow_path()) return false;
144 #endif
145
146 if (!obj->IsJSReceiver()) return false;
147
148 JSReceiver* recv = JSReceiver::cast(*obj);
149
150 // Check the receiver's map.
151 Handle<JSFunction> regexp_function = isolate->regexp_function();
152 if (recv->map() != regexp_function->initial_map()) return false;
153
154 // Check the receiver's prototype's map.
155 Object* proto = recv->map()->prototype();
156 if (!proto->IsJSReceiver()) return false;
157
158 Handle<Map> initial_proto_initial_map = isolate->regexp_prototype_map();
159 if (JSReceiver::cast(proto)->map() != *initial_proto_initial_map) {
160 return false;
161 }
162
163 // The smi check is required to omit ToLength(lastIndex) calls with possible
164 // user-code execution on the fast path.
165 Object* last_index = JSRegExp::cast(recv)->last_index();
166 return last_index->IsSmi() && Smi::ToInt(last_index) >= 0;
167 }
168
AdvanceStringIndex(Handle<String> string,uint64_t index,bool unicode)169 uint64_t RegExpUtils::AdvanceStringIndex(Handle<String> string, uint64_t index,
170 bool unicode) {
171 DCHECK_LE(static_cast<double>(index), kMaxSafeInteger);
172 const uint64_t string_length = static_cast<uint64_t>(string->length());
173 if (unicode && index < string_length) {
174 const uint16_t first = string->Get(static_cast<uint32_t>(index));
175 if (first >= 0xD800 && first <= 0xDBFF && index + 1 < string_length) {
176 DCHECK_LT(index, std::numeric_limits<uint64_t>::max());
177 const uint16_t second = string->Get(static_cast<uint32_t>(index + 1));
178 if (second >= 0xDC00 && second <= 0xDFFF) {
179 return index + 2;
180 }
181 }
182 }
183
184 return index + 1;
185 }
186
SetAdvancedStringIndex(Isolate * isolate,Handle<JSReceiver> regexp,Handle<String> string,bool unicode)187 MaybeHandle<Object> RegExpUtils::SetAdvancedStringIndex(
188 Isolate* isolate, Handle<JSReceiver> regexp, Handle<String> string,
189 bool unicode) {
190 Handle<Object> last_index_obj;
191 ASSIGN_RETURN_ON_EXCEPTION(
192 isolate, last_index_obj,
193 Object::GetProperty(isolate, regexp,
194 isolate->factory()->lastIndex_string()),
195 Object);
196
197 ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
198 Object::ToLength(isolate, last_index_obj), Object);
199 const uint64_t last_index = PositiveNumberToUint64(*last_index_obj);
200 const uint64_t new_last_index =
201 AdvanceStringIndex(string, last_index, unicode);
202
203 return SetLastIndex(isolate, regexp, new_last_index);
204 }
205
206 } // namespace internal
207 } // namespace v8
208