1// Copyright 2012 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5// This file relies on the fact that the following declaration has been made 6// in runtime.js: 7// var $Object = global.Object; 8// var $Array = global.Array; 9 10var $RegExp = global.RegExp; 11 12// ------------------------------------------------------------------- 13 14// A recursive descent parser for Patterns according to the grammar of 15// ECMA-262 15.10.1, with deviations noted below. 16function DoConstructRegExp(object, pattern, flags) { 17 // RegExp : Called as constructor; see ECMA-262, section 15.10.4. 18 if (IS_REGEXP(pattern)) { 19 if (!IS_UNDEFINED(flags)) { 20 throw MakeTypeError('regexp_flags', []); 21 } 22 flags = (pattern.global ? 'g' : '') 23 + (pattern.ignoreCase ? 'i' : '') 24 + (pattern.multiline ? 'm' : ''); 25 if (harmony_regexps) 26 flags += (pattern.sticky ? 'y' : ''); 27 pattern = pattern.source; 28 } 29 30 pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern); 31 flags = IS_UNDEFINED(flags) ? '' : ToString(flags); 32 33 var global = false; 34 var ignoreCase = false; 35 var multiline = false; 36 var sticky = false; 37 for (var i = 0; i < flags.length; i++) { 38 var c = %_CallFunction(flags, i, StringCharAt); 39 switch (c) { 40 case 'g': 41 if (global) { 42 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 43 } 44 global = true; 45 break; 46 case 'i': 47 if (ignoreCase) { 48 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 49 } 50 ignoreCase = true; 51 break; 52 case 'm': 53 if (multiline) { 54 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 55 } 56 multiline = true; 57 break; 58 case 'y': 59 if (!harmony_regexps || sticky) { 60 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 61 } 62 sticky = true; 63 break; 64 default: 65 throw MakeSyntaxError("invalid_regexp_flags", [flags]); 66 } 67 } 68 69 %RegExpInitializeObject(object, pattern, global, ignoreCase, multiline, sticky); 70 71 // Call internal function to compile the pattern. 72 %RegExpCompile(object, pattern, flags); 73} 74 75 76function RegExpConstructor(pattern, flags) { 77 if (%_IsConstructCall()) { 78 DoConstructRegExp(this, pattern, flags); 79 } else { 80 // RegExp : Called as function; see ECMA-262, section 15.10.3.1. 81 if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) { 82 return pattern; 83 } 84 return new $RegExp(pattern, flags); 85 } 86} 87 88// Deprecated RegExp.prototype.compile method. We behave like the constructor 89// were called again. In SpiderMonkey, this method returns the regexp object. 90// In JSC, it returns undefined. For compatibility with JSC, we match their 91// behavior. 92function RegExpCompileJS(pattern, flags) { 93 // Both JSC and SpiderMonkey treat a missing pattern argument as the 94 // empty subject string, and an actual undefined value passed as the 95 // pattern as the string 'undefined'. Note that JSC is inconsistent 96 // here, treating undefined values differently in 97 // RegExp.prototype.compile and in the constructor, where they are 98 // the empty string. For compatibility with JSC, we match their 99 // behavior. 100 if (this == $RegExp.prototype) { 101 // We don't allow recompiling RegExp.prototype. 102 throw MakeTypeError('incompatible_method_receiver', 103 ['RegExp.prototype.compile', this]); 104 } 105 if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) { 106 DoConstructRegExp(this, 'undefined', flags); 107 } else { 108 DoConstructRegExp(this, pattern, flags); 109 } 110} 111 112 113function DoRegExpExec(regexp, string, index) { 114 var result = %_RegExpExec(regexp, string, index, lastMatchInfo); 115 if (result !== null) lastMatchInfoOverride = null; 116 return result; 117} 118 119 120// This is kind of performance sensitive, so we want to avoid unnecessary 121// type checks on inputs. But we also don't want to inline it several times 122// manually, so we use a macro :-) 123macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING) 124 var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1; 125 var start = MATCHINFO[CAPTURE0]; 126 var end = MATCHINFO[CAPTURE1]; 127 // Calculate the substring of the first match before creating the result array 128 // to avoid an unnecessary write barrier storing the first result. 129 var first = %_SubString(STRING, start, end); 130 var result = %_RegExpConstructResult(numResults, start, STRING); 131 result[0] = first; 132 if (numResults == 1) return result; 133 var j = REGEXP_FIRST_CAPTURE + 2; 134 for (var i = 1; i < numResults; i++) { 135 start = MATCHINFO[j++]; 136 if (start != -1) { 137 end = MATCHINFO[j]; 138 result[i] = %_SubString(STRING, start, end); 139 } 140 j++; 141 } 142 return result; 143endmacro 144 145 146function RegExpExecNoTests(regexp, string, start) { 147 // Must be called with RegExp, string and positive integer as arguments. 148 var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo); 149 if (matchInfo !== null) { 150 lastMatchInfoOverride = null; 151 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchInfo, string); 152 } 153 regexp.lastIndex = 0; 154 return null; 155} 156 157 158function RegExpExec(string) { 159 if (!IS_REGEXP(this)) { 160 throw MakeTypeError('incompatible_method_receiver', 161 ['RegExp.prototype.exec', this]); 162 } 163 164 string = TO_STRING_INLINE(string); 165 var lastIndex = this.lastIndex; 166 167 // Conversion is required by the ES5 specification (RegExp.prototype.exec 168 // algorithm, step 5) even if the value is discarded for non-global RegExps. 169 var i = TO_INTEGER(lastIndex); 170 171 var updateLastIndex = this.global || (harmony_regexps && this.sticky); 172 if (updateLastIndex) { 173 if (i < 0 || i > string.length) { 174 this.lastIndex = 0; 175 return null; 176 } 177 } else { 178 i = 0; 179 } 180 181 // matchIndices is either null or the lastMatchInfo array. 182 var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo); 183 184 if (IS_NULL(matchIndices)) { 185 this.lastIndex = 0; 186 return null; 187 } 188 189 // Successful match. 190 lastMatchInfoOverride = null; 191 if (updateLastIndex) { 192 this.lastIndex = lastMatchInfo[CAPTURE1]; 193 } 194 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string); 195} 196 197 198// One-element cache for the simplified test regexp. 199var regexp_key; 200var regexp_val; 201 202// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be 203// that test is defined in terms of String.prototype.exec. However, it probably 204// means the original value of String.prototype.exec, which is what everybody 205// else implements. 206function RegExpTest(string) { 207 if (!IS_REGEXP(this)) { 208 throw MakeTypeError('incompatible_method_receiver', 209 ['RegExp.prototype.test', this]); 210 } 211 string = TO_STRING_INLINE(string); 212 213 var lastIndex = this.lastIndex; 214 215 // Conversion is required by the ES5 specification (RegExp.prototype.exec 216 // algorithm, step 5) even if the value is discarded for non-global RegExps. 217 var i = TO_INTEGER(lastIndex); 218 219 if (this.global || (harmony_regexps && this.sticky)) { 220 if (i < 0 || i > string.length) { 221 this.lastIndex = 0; 222 return false; 223 } 224 // matchIndices is either null or the lastMatchInfo array. 225 var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo); 226 if (IS_NULL(matchIndices)) { 227 this.lastIndex = 0; 228 return false; 229 } 230 lastMatchInfoOverride = null; 231 this.lastIndex = lastMatchInfo[CAPTURE1]; 232 return true; 233 } else { 234 // Non-global, non-sticky regexp. 235 // Remove irrelevant preceeding '.*' in a test regexp. The expression 236 // checks whether this.source starts with '.*' and that the third char is 237 // not a '?'. But see https://code.google.com/p/v8/issues/detail?id=3560 238 var regexp = this; 239 if (regexp.source.length >= 3 && 240 %_StringCharCodeAt(regexp.source, 0) == 46 && // '.' 241 %_StringCharCodeAt(regexp.source, 1) == 42 && // '*' 242 %_StringCharCodeAt(regexp.source, 2) != 63) { // '?' 243 regexp = TrimRegExp(regexp); 244 } 245 // matchIndices is either null or the lastMatchInfo array. 246 var matchIndices = %_RegExpExec(regexp, string, 0, lastMatchInfo); 247 if (IS_NULL(matchIndices)) { 248 this.lastIndex = 0; 249 return false; 250 } 251 lastMatchInfoOverride = null; 252 return true; 253 } 254} 255 256function TrimRegExp(regexp) { 257 if (!%_ObjectEquals(regexp_key, regexp)) { 258 regexp_key = regexp; 259 regexp_val = 260 new $RegExp(%_SubString(regexp.source, 2, regexp.source.length), 261 (regexp.ignoreCase ? regexp.multiline ? "im" : "i" 262 : regexp.multiline ? "m" : "")); 263 } 264 return regexp_val; 265} 266 267 268function RegExpToString() { 269 if (!IS_REGEXP(this)) { 270 throw MakeTypeError('incompatible_method_receiver', 271 ['RegExp.prototype.toString', this]); 272 } 273 var result = '/' + this.source + '/'; 274 if (this.global) result += 'g'; 275 if (this.ignoreCase) result += 'i'; 276 if (this.multiline) result += 'm'; 277 if (harmony_regexps && this.sticky) result += 'y'; 278 return result; 279} 280 281 282// Getters for the static properties lastMatch, lastParen, leftContext, and 283// rightContext of the RegExp constructor. The properties are computed based 284// on the captures array of the last successful match and the subject string 285// of the last successful match. 286function RegExpGetLastMatch() { 287 if (lastMatchInfoOverride !== null) { 288 return OVERRIDE_MATCH(lastMatchInfoOverride); 289 } 290 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 291 return %_SubString(regExpSubject, 292 lastMatchInfo[CAPTURE0], 293 lastMatchInfo[CAPTURE1]); 294} 295 296 297function RegExpGetLastParen() { 298 if (lastMatchInfoOverride) { 299 var override = lastMatchInfoOverride; 300 if (override.length <= 3) return ''; 301 return override[override.length - 3]; 302 } 303 var length = NUMBER_OF_CAPTURES(lastMatchInfo); 304 if (length <= 2) return ''; // There were no captures. 305 // We match the SpiderMonkey behavior: return the substring defined by the 306 // last pair (after the first pair) of elements of the capture array even if 307 // it is empty. 308 var regExpSubject = LAST_SUBJECT(lastMatchInfo); 309 var start = lastMatchInfo[CAPTURE(length - 2)]; 310 var end = lastMatchInfo[CAPTURE(length - 1)]; 311 if (start != -1 && end != -1) { 312 return %_SubString(regExpSubject, start, end); 313 } 314 return ""; 315} 316 317 318function RegExpGetLeftContext() { 319 var start_index; 320 var subject; 321 if (!lastMatchInfoOverride) { 322 start_index = lastMatchInfo[CAPTURE0]; 323 subject = LAST_SUBJECT(lastMatchInfo); 324 } else { 325 var override = lastMatchInfoOverride; 326 start_index = OVERRIDE_POS(override); 327 subject = OVERRIDE_SUBJECT(override); 328 } 329 return %_SubString(subject, 0, start_index); 330} 331 332 333function RegExpGetRightContext() { 334 var start_index; 335 var subject; 336 if (!lastMatchInfoOverride) { 337 start_index = lastMatchInfo[CAPTURE1]; 338 subject = LAST_SUBJECT(lastMatchInfo); 339 } else { 340 var override = lastMatchInfoOverride; 341 subject = OVERRIDE_SUBJECT(override); 342 var match = OVERRIDE_MATCH(override); 343 start_index = OVERRIDE_POS(override) + match.length; 344 } 345 return %_SubString(subject, start_index, subject.length); 346} 347 348 349// The properties $1..$9 are the first nine capturing substrings of the last 350// successful match, or ''. The function RegExpMakeCaptureGetter will be 351// called with indices from 1 to 9. 352function RegExpMakeCaptureGetter(n) { 353 return function() { 354 if (lastMatchInfoOverride) { 355 if (n < lastMatchInfoOverride.length - 2) { 356 return OVERRIDE_CAPTURE(lastMatchInfoOverride, n); 357 } 358 return ''; 359 } 360 var index = n * 2; 361 if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return ''; 362 var matchStart = lastMatchInfo[CAPTURE(index)]; 363 var matchEnd = lastMatchInfo[CAPTURE(index + 1)]; 364 if (matchStart == -1 || matchEnd == -1) return ''; 365 return %_SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd); 366 }; 367} 368 369 370// Property of the builtins object for recording the result of the last 371// regexp match. The property lastMatchInfo includes the matchIndices 372// array of the last successful regexp match (an array of start/end index 373// pairs for the match and all the captured substrings), the invariant is 374// that there are at least two capture indeces. The array also contains 375// the subject string for the last successful match. 376var lastMatchInfo = new InternalPackedArray( 377 2, // REGEXP_NUMBER_OF_CAPTURES 378 "", // Last subject. 379 UNDEFINED, // Last input - settable with RegExpSetInput. 380 0, // REGEXP_FIRST_CAPTURE + 0 381 0 // REGEXP_FIRST_CAPTURE + 1 382); 383 384// Override last match info with an array of actual substrings. 385// Used internally by replace regexp with function. 386// The array has the format of an "apply" argument for a replacement 387// function. 388var lastMatchInfoOverride = null; 389 390// ------------------------------------------------------------------- 391 392function SetUpRegExp() { 393 %CheckIsBootstrapping(); 394 %FunctionSetInstanceClassName($RegExp, 'RegExp'); 395 %AddNamedProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM); 396 %SetCode($RegExp, RegExpConstructor); 397 398 InstallFunctions($RegExp.prototype, DONT_ENUM, $Array( 399 "exec", RegExpExec, 400 "test", RegExpTest, 401 "toString", RegExpToString, 402 "compile", RegExpCompileJS 403 )); 404 405 // The length of compile is 1 in SpiderMonkey. 406 %FunctionSetLength($RegExp.prototype.compile, 1); 407 408 // The properties `input` and `$_` are aliases for each other. When this 409 // value is set the value it is set to is coerced to a string. 410 // Getter and setter for the input. 411 var RegExpGetInput = function() { 412 var regExpInput = LAST_INPUT(lastMatchInfo); 413 return IS_UNDEFINED(regExpInput) ? "" : regExpInput; 414 }; 415 var RegExpSetInput = function(string) { 416 LAST_INPUT(lastMatchInfo) = ToString(string); 417 }; 418 419 %OptimizeObjectForAddingMultipleProperties($RegExp, 22); 420 %DefineAccessorPropertyUnchecked($RegExp, 'input', RegExpGetInput, 421 RegExpSetInput, DONT_DELETE); 422 %DefineAccessorPropertyUnchecked($RegExp, '$_', RegExpGetInput, 423 RegExpSetInput, DONT_ENUM | DONT_DELETE); 424 425 // The properties multiline and $* are aliases for each other. When this 426 // value is set in SpiderMonkey, the value it is set to is coerced to a 427 // boolean. We mimic that behavior with a slight difference: in SpiderMonkey 428 // the value of the expression 'RegExp.multiline = null' (for instance) is the 429 // boolean false (i.e., the value after coercion), while in V8 it is the value 430 // null (i.e., the value before coercion). 431 432 // Getter and setter for multiline. 433 var multiline = false; 434 var RegExpGetMultiline = function() { return multiline; }; 435 var RegExpSetMultiline = function(flag) { multiline = flag ? true : false; }; 436 437 %DefineAccessorPropertyUnchecked($RegExp, 'multiline', RegExpGetMultiline, 438 RegExpSetMultiline, DONT_DELETE); 439 %DefineAccessorPropertyUnchecked($RegExp, '$*', RegExpGetMultiline, 440 RegExpSetMultiline, 441 DONT_ENUM | DONT_DELETE); 442 443 444 var NoOpSetter = function(ignored) {}; 445 446 447 // Static properties set by a successful match. 448 %DefineAccessorPropertyUnchecked($RegExp, 'lastMatch', RegExpGetLastMatch, 449 NoOpSetter, DONT_DELETE); 450 %DefineAccessorPropertyUnchecked($RegExp, '$&', RegExpGetLastMatch, 451 NoOpSetter, DONT_ENUM | DONT_DELETE); 452 %DefineAccessorPropertyUnchecked($RegExp, 'lastParen', RegExpGetLastParen, 453 NoOpSetter, DONT_DELETE); 454 %DefineAccessorPropertyUnchecked($RegExp, '$+', RegExpGetLastParen, 455 NoOpSetter, DONT_ENUM | DONT_DELETE); 456 %DefineAccessorPropertyUnchecked($RegExp, 'leftContext', 457 RegExpGetLeftContext, NoOpSetter, 458 DONT_DELETE); 459 %DefineAccessorPropertyUnchecked($RegExp, '$`', RegExpGetLeftContext, 460 NoOpSetter, DONT_ENUM | DONT_DELETE); 461 %DefineAccessorPropertyUnchecked($RegExp, 'rightContext', 462 RegExpGetRightContext, NoOpSetter, 463 DONT_DELETE); 464 %DefineAccessorPropertyUnchecked($RegExp, "$'", RegExpGetRightContext, 465 NoOpSetter, DONT_ENUM | DONT_DELETE); 466 467 for (var i = 1; i < 10; ++i) { 468 %DefineAccessorPropertyUnchecked($RegExp, '$' + i, 469 RegExpMakeCaptureGetter(i), NoOpSetter, 470 DONT_DELETE); 471 } 472 %ToFastProperties($RegExp); 473} 474 475SetUpRegExp(); 476