1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/regexp/regexp-utils.h"
6 
7 #include "src/heap/factory.h"
8 #include "src/isolate.h"
9 #include "src/objects-inl.h"
10 #include "src/objects/js-regexp-inl.h"
11 #include "src/regexp/jsregexp.h"
12 
13 namespace v8 {
14 namespace internal {
15 
GenericCaptureGetter(Isolate * isolate,Handle<RegExpMatchInfo> match_info,int capture,bool * ok)16 Handle<String> RegExpUtils::GenericCaptureGetter(
17     Isolate* isolate, Handle<RegExpMatchInfo> match_info, int capture,
18     bool* ok) {
19   const int index = capture * 2;
20   if (index >= match_info->NumberOfCaptureRegisters()) {
21     if (ok != nullptr) *ok = false;
22     return isolate->factory()->empty_string();
23   }
24 
25   const int match_start = match_info->Capture(index);
26   const int match_end = match_info->Capture(index + 1);
27   if (match_start == -1 || match_end == -1) {
28     if (ok != nullptr) *ok = false;
29     return isolate->factory()->empty_string();
30   }
31 
32   if (ok != nullptr) *ok = true;
33   Handle<String> last_subject(match_info->LastSubject(), isolate);
34   return isolate->factory()->NewSubString(last_subject, match_start, match_end);
35 }
36 
37 namespace {
38 
HasInitialRegExpMap(Isolate * isolate,Handle<JSReceiver> recv)39 V8_INLINE bool HasInitialRegExpMap(Isolate* isolate, Handle<JSReceiver> recv) {
40   return recv->map() == isolate->regexp_function()->initial_map();
41 }
42 
43 }  // namespace
44 
SetLastIndex(Isolate * isolate,Handle<JSReceiver> recv,uint64_t value)45 MaybeHandle<Object> RegExpUtils::SetLastIndex(Isolate* isolate,
46                                               Handle<JSReceiver> recv,
47                                               uint64_t value) {
48   Handle<Object> value_as_object =
49       isolate->factory()->NewNumberFromInt64(value);
50   if (HasInitialRegExpMap(isolate, recv)) {
51     JSRegExp::cast(*recv)->set_last_index(*value_as_object, SKIP_WRITE_BARRIER);
52     return recv;
53   } else {
54     return Object::SetProperty(isolate, recv,
55                                isolate->factory()->lastIndex_string(),
56                                value_as_object, LanguageMode::kStrict);
57   }
58 }
59 
GetLastIndex(Isolate * isolate,Handle<JSReceiver> recv)60 MaybeHandle<Object> RegExpUtils::GetLastIndex(Isolate* isolate,
61                                               Handle<JSReceiver> recv) {
62   if (HasInitialRegExpMap(isolate, recv)) {
63     return handle(JSRegExp::cast(*recv)->last_index(), isolate);
64   } else {
65     return Object::GetProperty(isolate, recv,
66                                isolate->factory()->lastIndex_string());
67   }
68 }
69 
70 // ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
71 // Also takes an optional exec method in case our caller
72 // has already fetched exec.
RegExpExec(Isolate * isolate,Handle<JSReceiver> regexp,Handle<String> string,Handle<Object> exec)73 MaybeHandle<Object> RegExpUtils::RegExpExec(Isolate* isolate,
74                                             Handle<JSReceiver> regexp,
75                                             Handle<String> string,
76                                             Handle<Object> exec) {
77   if (exec->IsUndefined(isolate)) {
78     ASSIGN_RETURN_ON_EXCEPTION(
79         isolate, exec,
80         Object::GetProperty(isolate, regexp, isolate->factory()->exec_string()),
81         Object);
82   }
83 
84   if (exec->IsCallable()) {
85     const int argc = 1;
86     ScopedVector<Handle<Object>> argv(argc);
87     argv[0] = string;
88 
89     Handle<Object> result;
90     ASSIGN_RETURN_ON_EXCEPTION(
91         isolate, result,
92         Execution::Call(isolate, exec, regexp, argc, argv.start()), Object);
93 
94     if (!result->IsJSReceiver() && !result->IsNull(isolate)) {
95       THROW_NEW_ERROR(isolate,
96                       NewTypeError(MessageTemplate::kInvalidRegExpExecResult),
97                       Object);
98     }
99     return result;
100   }
101 
102   if (!regexp->IsJSRegExp()) {
103     THROW_NEW_ERROR(isolate,
104                     NewTypeError(MessageTemplate::kIncompatibleMethodReceiver,
105                                  isolate->factory()->NewStringFromAsciiChecked(
106                                      "RegExp.prototype.exec"),
107                                  regexp),
108                     Object);
109   }
110 
111   {
112     Handle<JSFunction> regexp_exec = isolate->regexp_exec_function();
113 
114     const int argc = 1;
115     ScopedVector<Handle<Object>> argv(argc);
116     argv[0] = string;
117 
118     return Execution::Call(isolate, regexp_exec, regexp, argc, argv.start());
119   }
120 }
121 
IsRegExp(Isolate * isolate,Handle<Object> object)122 Maybe<bool> RegExpUtils::IsRegExp(Isolate* isolate, Handle<Object> object) {
123   if (!object->IsJSReceiver()) return Just(false);
124 
125   Handle<JSReceiver> receiver = Handle<JSReceiver>::cast(object);
126 
127   Handle<Object> match;
128   ASSIGN_RETURN_ON_EXCEPTION_VALUE(
129       isolate, match,
130       JSObject::GetProperty(isolate, receiver,
131                             isolate->factory()->match_symbol()),
132       Nothing<bool>());
133 
134   if (!match->IsUndefined(isolate)) return Just(match->BooleanValue(isolate));
135   return Just(object->IsJSRegExp());
136 }
137 
IsUnmodifiedRegExp(Isolate * isolate,Handle<Object> obj)138 bool RegExpUtils::IsUnmodifiedRegExp(Isolate* isolate, Handle<Object> obj) {
139   // TODO(ishell): Update this check once map changes for constant field
140   // tracking are landing.
141 
142 #ifdef V8_ENABLE_FORCE_SLOW_PATH
143   if (isolate->force_slow_path()) return false;
144 #endif
145 
146   if (!obj->IsJSReceiver()) return false;
147 
148   JSReceiver* recv = JSReceiver::cast(*obj);
149 
150   // Check the receiver's map.
151   Handle<JSFunction> regexp_function = isolate->regexp_function();
152   if (recv->map() != regexp_function->initial_map()) return false;
153 
154   // Check the receiver's prototype's map.
155   Object* proto = recv->map()->prototype();
156   if (!proto->IsJSReceiver()) return false;
157 
158   Handle<Map> initial_proto_initial_map = isolate->regexp_prototype_map();
159   if (JSReceiver::cast(proto)->map() != *initial_proto_initial_map) {
160     return false;
161   }
162 
163   // The smi check is required to omit ToLength(lastIndex) calls with possible
164   // user-code execution on the fast path.
165   Object* last_index = JSRegExp::cast(recv)->last_index();
166   return last_index->IsSmi() && Smi::ToInt(last_index) >= 0;
167 }
168 
AdvanceStringIndex(Handle<String> string,uint64_t index,bool unicode)169 uint64_t RegExpUtils::AdvanceStringIndex(Handle<String> string, uint64_t index,
170                                          bool unicode) {
171   DCHECK_LE(static_cast<double>(index), kMaxSafeInteger);
172   const uint64_t string_length = static_cast<uint64_t>(string->length());
173   if (unicode && index < string_length) {
174     const uint16_t first = string->Get(static_cast<uint32_t>(index));
175     if (first >= 0xD800 && first <= 0xDBFF && index + 1 < string_length) {
176       DCHECK_LT(index, std::numeric_limits<uint64_t>::max());
177       const uint16_t second = string->Get(static_cast<uint32_t>(index + 1));
178       if (second >= 0xDC00 && second <= 0xDFFF) {
179         return index + 2;
180       }
181     }
182   }
183 
184   return index + 1;
185 }
186 
SetAdvancedStringIndex(Isolate * isolate,Handle<JSReceiver> regexp,Handle<String> string,bool unicode)187 MaybeHandle<Object> RegExpUtils::SetAdvancedStringIndex(
188     Isolate* isolate, Handle<JSReceiver> regexp, Handle<String> string,
189     bool unicode) {
190   Handle<Object> last_index_obj;
191   ASSIGN_RETURN_ON_EXCEPTION(
192       isolate, last_index_obj,
193       Object::GetProperty(isolate, regexp,
194                           isolate->factory()->lastIndex_string()),
195       Object);
196 
197   ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
198                              Object::ToLength(isolate, last_index_obj), Object);
199   const uint64_t last_index = PositiveNumberToUint64(*last_index_obj);
200   const uint64_t new_last_index =
201       AdvanceStringIndex(string, last_index, unicode);
202 
203   return SetLastIndex(isolate, regexp, new_last_index);
204 }
205 
206 }  // namespace internal
207 }  // namespace v8
208