1// Copyright 2012 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5(function(global, utils) { 6 7%CheckIsBootstrapping(); 8 9// ------------------------------------------------------------------- 10// Imports 11 12var FLAG_harmony_tolength; 13var GlobalObject = global.Object; 14var GlobalRegExp = global.RegExp; 15var GlobalRegExpPrototype; 16var InternalArray = utils.InternalArray; 17var InternalPackedArray = utils.InternalPackedArray; 18var MakeTypeError; 19var matchSymbol = utils.ImportNow("match_symbol"); 20var searchSymbol = utils.ImportNow("search_symbol"); 21var splitSymbol = utils.ImportNow("split_symbol"); 22 23utils.ImportFromExperimental(function(from) { 24 FLAG_harmony_tolength = from.FLAG_harmony_tolength; 25}); 26 27utils.Import(function(from) { 28 MakeTypeError = from.MakeTypeError; 29}); 30 31// ------------------------------------------------------------------- 32 33// Property of the builtins object for recording the result of the last 34// regexp match. The property RegExpLastMatchInfo includes the matchIndices 35// array of the last successful regexp match (an array of start/end index 36// pairs for the match and all the captured substrings), the invariant is 37// that there are at least two capture indeces. The array also contains 38// the subject string for the last successful match. 39var RegExpLastMatchInfo = new InternalPackedArray( 40 2, // REGEXP_NUMBER_OF_CAPTURES 41 "", // Last subject. 42 UNDEFINED, // Last input - settable with RegExpSetInput. 43 0, // REGEXP_FIRST_CAPTURE + 0 44 0 // REGEXP_FIRST_CAPTURE + 1 45); 46 47// ------------------------------------------------------------------- 48 49function IsRegExp(o) { 50 if (!IS_RECEIVER(o)) return false; 51 var is_regexp = o[matchSymbol]; 52 if (!IS_UNDEFINED(is_regexp)) return TO_BOOLEAN(is_regexp); 53 return IS_REGEXP(o); 54} 55 56 57// ES6 section 21.2.3.2.2 58function RegExpInitialize(object, pattern, flags) { 59 pattern = IS_UNDEFINED(pattern) ? '' : TO_STRING(pattern); 60 flags = IS_UNDEFINED(flags) ? '' : TO_STRING(flags); 61 %RegExpInitializeAndCompile(object, pattern, flags); 62 return object; 63} 64 65 66function PatternFlags(pattern) { 67 return (REGEXP_GLOBAL(pattern) ? 'g' : '') + 68 (REGEXP_IGNORE_CASE(pattern) ? 'i' : '') + 69 (REGEXP_MULTILINE(pattern) ? 'm' : '') + 70 (REGEXP_UNICODE(pattern) ? 'u' : '') + 71 (REGEXP_STICKY(pattern) ? 'y' : ''); 72} 73 74 75function RegExpConstructor(pattern, flags) { 76 var newtarget = new.target; 77 var pattern_is_regexp = IsRegExp(pattern); 78 79 if (IS_UNDEFINED(newtarget)) { 80 newtarget = GlobalRegExp; 81 82 // ES6 section 21.2.3.1 step 3.b 83 if (pattern_is_regexp && IS_UNDEFINED(flags) && 84 pattern.constructor === newtarget) { 85 return pattern; 86 } 87 } 88 89 if (IS_REGEXP(pattern)) { 90 if (IS_UNDEFINED(flags)) flags = PatternFlags(pattern); 91 pattern = REGEXP_SOURCE(pattern); 92 93 } else if (pattern_is_regexp) { 94 var input_pattern = pattern; 95 pattern = pattern.source; 96 if (IS_UNDEFINED(flags)) flags = input_pattern.flags; 97 } 98 99 var object = %NewObject(GlobalRegExp, newtarget); 100 return RegExpInitialize(object, pattern, flags); 101} 102 103 104function RegExpCompileJS(pattern, flags) { 105 if (!IS_REGEXP(this)) { 106 throw MakeTypeError(kIncompatibleMethodReceiver, 107 "RegExp.prototype.compile", this); 108 } 109 110 if (IS_REGEXP(pattern)) { 111 if (!IS_UNDEFINED(flags)) throw MakeTypeError(kRegExpFlags); 112 113 flags = PatternFlags(pattern); 114 pattern = REGEXP_SOURCE(pattern); 115 } 116 117 RegExpInitialize(this, pattern, flags); 118 119 // Return undefined for compatibility with JSC. 120 // See http://crbug.com/585775 for web compat details. 121} 122 123 124function DoRegExpExec(regexp, string, index) { 125 return %_RegExpExec(regexp, string, index, RegExpLastMatchInfo); 126} 127 128 129// This is kind of performance sensitive, so we want to avoid unnecessary 130// type checks on inputs. But we also don't want to inline it several times 131// manually, so we use a macro :-) 132macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING) 133 var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1; 134 var start = MATCHINFO[CAPTURE0]; 135 var end = MATCHINFO[CAPTURE1]; 136 // Calculate the substring of the first match before creating the result array 137 // to avoid an unnecessary write barrier storing the first result. 138 var first = %_SubString(STRING, start, end); 139 var result = %_RegExpConstructResult(numResults, start, STRING); 140 result[0] = first; 141 if (numResults == 1) return result; 142 var j = REGEXP_FIRST_CAPTURE + 2; 143 for (var i = 1; i < numResults; i++) { 144 start = MATCHINFO[j++]; 145 if (start != -1) { 146 end = MATCHINFO[j]; 147 result[i] = %_SubString(STRING, start, end); 148 } 149 j++; 150 } 151 return result; 152endmacro 153 154 155function RegExpExecNoTests(regexp, string, start) { 156 // Must be called with RegExp, string and positive integer as arguments. 157 var matchInfo = %_RegExpExec(regexp, string, start, RegExpLastMatchInfo); 158 if (matchInfo !== null) { 159 // ES6 21.2.5.2.2 step 18. 160 if (REGEXP_STICKY(regexp)) regexp.lastIndex = matchInfo[CAPTURE1]; 161 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchInfo, string); 162 } 163 regexp.lastIndex = 0; 164 return null; 165} 166 167 168function RegExpExecJS(string) { 169 if (!IS_REGEXP(this)) { 170 throw MakeTypeError(kIncompatibleMethodReceiver, 171 'RegExp.prototype.exec', this); 172 } 173 174 string = TO_STRING(string); 175 var lastIndex = this.lastIndex; 176 177 // Conversion is required by the ES2015 specification (RegExpBuiltinExec 178 // algorithm, step 4) even if the value is discarded for non-global RegExps. 179 var i = TO_LENGTH_OR_INTEGER(lastIndex); 180 181 var updateLastIndex = REGEXP_GLOBAL(this) || REGEXP_STICKY(this); 182 if (updateLastIndex) { 183 if (i < 0 || i > string.length) { 184 this.lastIndex = 0; 185 return null; 186 } 187 } else { 188 i = 0; 189 } 190 191 // matchIndices is either null or the RegExpLastMatchInfo array. 192 var matchIndices = %_RegExpExec(this, string, i, RegExpLastMatchInfo); 193 194 if (IS_NULL(matchIndices)) { 195 this.lastIndex = 0; 196 return null; 197 } 198 199 // Successful match. 200 if (updateLastIndex) { 201 this.lastIndex = RegExpLastMatchInfo[CAPTURE1]; 202 } 203 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string); 204} 205 206 207// One-element cache for the simplified test regexp. 208var regexp_key; 209var regexp_val; 210 211// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be 212// that test is defined in terms of String.prototype.exec. However, it probably 213// means the original value of String.prototype.exec, which is what everybody 214// else implements. 215function RegExpTest(string) { 216 if (!IS_REGEXP(this)) { 217 throw MakeTypeError(kIncompatibleMethodReceiver, 218 'RegExp.prototype.test', this); 219 } 220 string = TO_STRING(string); 221 222 var lastIndex = this.lastIndex; 223 224 // Conversion is required by the ES2015 specification (RegExpBuiltinExec 225 // algorithm, step 4) even if the value is discarded for non-global RegExps. 226 var i = TO_LENGTH_OR_INTEGER(lastIndex); 227 228 if (REGEXP_GLOBAL(this) || REGEXP_STICKY(this)) { 229 if (i < 0 || i > string.length) { 230 this.lastIndex = 0; 231 return false; 232 } 233 // matchIndices is either null or the RegExpLastMatchInfo array. 234 var matchIndices = %_RegExpExec(this, string, i, RegExpLastMatchInfo); 235 if (IS_NULL(matchIndices)) { 236 this.lastIndex = 0; 237 return false; 238 } 239 this.lastIndex = RegExpLastMatchInfo[CAPTURE1]; 240 return true; 241 } else { 242 // Non-global, non-sticky regexp. 243 // Remove irrelevant preceeding '.*' in a test regexp. The expression 244 // checks whether this.source starts with '.*' and that the third char is 245 // not a '?'. But see https://code.google.com/p/v8/issues/detail?id=3560 246 var regexp = this; 247 var source = REGEXP_SOURCE(regexp); 248 if (regexp.length >= 3 && 249 %_StringCharCodeAt(regexp, 0) == 46 && // '.' 250 %_StringCharCodeAt(regexp, 1) == 42 && // '*' 251 %_StringCharCodeAt(regexp, 2) != 63) { // '?' 252 regexp = TrimRegExp(regexp); 253 } 254 // matchIndices is either null or the RegExpLastMatchInfo array. 255 var matchIndices = %_RegExpExec(regexp, string, 0, RegExpLastMatchInfo); 256 if (IS_NULL(matchIndices)) { 257 this.lastIndex = 0; 258 return false; 259 } 260 return true; 261 } 262} 263 264function TrimRegExp(regexp) { 265 if (!%_ObjectEquals(regexp_key, regexp)) { 266 regexp_key = regexp; 267 regexp_val = 268 new GlobalRegExp( 269 %_SubString(REGEXP_SOURCE(regexp), 2, REGEXP_SOURCE(regexp).length), 270 (REGEXP_IGNORE_CASE(regexp) ? REGEXP_MULTILINE(regexp) ? "im" : "i" 271 : REGEXP_MULTILINE(regexp) ? "m" : "")); 272 } 273 return regexp_val; 274} 275 276 277function RegExpToString() { 278 if (!IS_REGEXP(this)) { 279 // RegExp.prototype.toString() returns '/(?:)/' as a compatibility fix; 280 // a UseCounter is incremented to track it. 281 // TODO(littledan): Remove this workaround or standardize it 282 if (this === GlobalRegExpPrototype) { 283 %IncrementUseCounter(kRegExpPrototypeToString); 284 return '/(?:)/'; 285 } 286 throw MakeTypeError(kIncompatibleMethodReceiver, 287 'RegExp.prototype.toString', this); 288 } 289 var result = '/' + REGEXP_SOURCE(this) + '/'; 290 if (REGEXP_GLOBAL(this)) result += 'g'; 291 if (REGEXP_IGNORE_CASE(this)) result += 'i'; 292 if (REGEXP_MULTILINE(this)) result += 'm'; 293 if (REGEXP_UNICODE(this)) result += 'u'; 294 if (REGEXP_STICKY(this)) result += 'y'; 295 return result; 296} 297 298 299// ES6 21.2.5.11. 300function RegExpSplit(string, limit) { 301 // TODO(yangguo): allow non-regexp receivers. 302 if (!IS_REGEXP(this)) { 303 throw MakeTypeError(kIncompatibleMethodReceiver, 304 "RegExp.prototype.@@split", this); 305 } 306 var separator = this; 307 var subject = TO_STRING(string); 308 309 limit = (IS_UNDEFINED(limit)) ? kMaxUint32 : TO_UINT32(limit); 310 var length = subject.length; 311 312 if (limit === 0) return []; 313 314 if (length === 0) { 315 if (DoRegExpExec(separator, subject, 0, 0) !== null) return []; 316 return [subject]; 317 } 318 319 var currentIndex = 0; 320 var startIndex = 0; 321 var startMatch = 0; 322 var result = new InternalArray(); 323 324 outer_loop: 325 while (true) { 326 if (startIndex === length) { 327 result[result.length] = %_SubString(subject, currentIndex, length); 328 break; 329 } 330 331 var matchInfo = DoRegExpExec(separator, subject, startIndex); 332 if (matchInfo === null || length === (startMatch = matchInfo[CAPTURE0])) { 333 result[result.length] = %_SubString(subject, currentIndex, length); 334 break; 335 } 336 var endIndex = matchInfo[CAPTURE1]; 337 338 // We ignore a zero-length match at the currentIndex. 339 if (startIndex === endIndex && endIndex === currentIndex) { 340 startIndex++; 341 continue; 342 } 343 344 result[result.length] = %_SubString(subject, currentIndex, startMatch); 345 346 if (result.length === limit) break; 347 348 var matchinfo_len = NUMBER_OF_CAPTURES(matchInfo) + REGEXP_FIRST_CAPTURE; 349 for (var i = REGEXP_FIRST_CAPTURE + 2; i < matchinfo_len; ) { 350 var start = matchInfo[i++]; 351 var end = matchInfo[i++]; 352 if (end != -1) { 353 result[result.length] = %_SubString(subject, start, end); 354 } else { 355 result[result.length] = UNDEFINED; 356 } 357 if (result.length === limit) break outer_loop; 358 } 359 360 startIndex = currentIndex = endIndex; 361 } 362 363 var array_result = []; 364 %MoveArrayContents(result, array_result); 365 return array_result; 366} 367 368 369// ES6 21.2.5.6. 370function RegExpMatch(string) { 371 // TODO(yangguo): allow non-regexp receivers. 372 if (!IS_REGEXP(this)) { 373 throw MakeTypeError(kIncompatibleMethodReceiver, 374 "RegExp.prototype.@@match", this); 375 } 376 var subject = TO_STRING(string); 377 378 if (!REGEXP_GLOBAL(this)) return RegExpExecNoTests(this, subject, 0); 379 this.lastIndex = 0; 380 var result = %StringMatch(subject, this, RegExpLastMatchInfo); 381 return result; 382} 383 384 385// ES6 21.2.5.9. 386function RegExpSearch(string) { 387 // TODO(yangguo): allow non-regexp receivers. 388 if (!IS_REGEXP(this)) { 389 throw MakeTypeError(kIncompatibleMethodReceiver, 390 "RegExp.prototype.@@search", this); 391 } 392 var match = DoRegExpExec(this, TO_STRING(string), 0); 393 if (match) return match[CAPTURE0]; 394 return -1; 395} 396 397 398// Getters for the static properties lastMatch, lastParen, leftContext, and 399// rightContext of the RegExp constructor. The properties are computed based 400// on the captures array of the last successful match and the subject string 401// of the last successful match. 402function RegExpGetLastMatch() { 403 var regExpSubject = LAST_SUBJECT(RegExpLastMatchInfo); 404 return %_SubString(regExpSubject, 405 RegExpLastMatchInfo[CAPTURE0], 406 RegExpLastMatchInfo[CAPTURE1]); 407} 408 409 410function RegExpGetLastParen() { 411 var length = NUMBER_OF_CAPTURES(RegExpLastMatchInfo); 412 if (length <= 2) return ''; // There were no captures. 413 // We match the SpiderMonkey behavior: return the substring defined by the 414 // last pair (after the first pair) of elements of the capture array even if 415 // it is empty. 416 var regExpSubject = LAST_SUBJECT(RegExpLastMatchInfo); 417 var start = RegExpLastMatchInfo[CAPTURE(length - 2)]; 418 var end = RegExpLastMatchInfo[CAPTURE(length - 1)]; 419 if (start != -1 && end != -1) { 420 return %_SubString(regExpSubject, start, end); 421 } 422 return ""; 423} 424 425 426function RegExpGetLeftContext() { 427 var start_index; 428 var subject; 429 start_index = RegExpLastMatchInfo[CAPTURE0]; 430 subject = LAST_SUBJECT(RegExpLastMatchInfo); 431 return %_SubString(subject, 0, start_index); 432} 433 434 435function RegExpGetRightContext() { 436 var start_index; 437 var subject; 438 start_index = RegExpLastMatchInfo[CAPTURE1]; 439 subject = LAST_SUBJECT(RegExpLastMatchInfo); 440 return %_SubString(subject, start_index, subject.length); 441} 442 443 444// The properties $1..$9 are the first nine capturing substrings of the last 445// successful match, or ''. The function RegExpMakeCaptureGetter will be 446// called with indices from 1 to 9. 447function RegExpMakeCaptureGetter(n) { 448 return function foo() { 449 var index = n * 2; 450 if (index >= NUMBER_OF_CAPTURES(RegExpLastMatchInfo)) return ''; 451 var matchStart = RegExpLastMatchInfo[CAPTURE(index)]; 452 var matchEnd = RegExpLastMatchInfo[CAPTURE(index + 1)]; 453 if (matchStart == -1 || matchEnd == -1) return ''; 454 return %_SubString(LAST_SUBJECT(RegExpLastMatchInfo), matchStart, matchEnd); 455 }; 456} 457 458 459// ES6 21.2.5.4. 460function RegExpGetGlobal() { 461 if (!IS_REGEXP(this)) { 462 // TODO(littledan): Remove this RegExp compat workaround 463 if (this === GlobalRegExpPrototype) { 464 return UNDEFINED; 465 } 466 throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.global"); 467 } 468 return !!REGEXP_GLOBAL(this); 469} 470%FunctionSetName(RegExpGetGlobal, "RegExp.prototype.global"); 471%SetNativeFlag(RegExpGetGlobal); 472 473 474// ES6 21.2.5.5. 475function RegExpGetIgnoreCase() { 476 if (!IS_REGEXP(this)) { 477 // TODO(littledan): Remove this RegExp compat workaround 478 if (this === GlobalRegExpPrototype) { 479 return UNDEFINED; 480 } 481 throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.ignoreCase"); 482 } 483 return !!REGEXP_IGNORE_CASE(this); 484} 485%FunctionSetName(RegExpGetIgnoreCase, "RegExp.prototype.ignoreCase"); 486%SetNativeFlag(RegExpGetIgnoreCase); 487 488 489// ES6 21.2.5.7. 490function RegExpGetMultiline() { 491 if (!IS_REGEXP(this)) { 492 // TODO(littledan): Remove this RegExp compat workaround 493 if (this === GlobalRegExpPrototype) { 494 return UNDEFINED; 495 } 496 throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.multiline"); 497 } 498 return !!REGEXP_MULTILINE(this); 499} 500%FunctionSetName(RegExpGetMultiline, "RegExp.prototype.multiline"); 501%SetNativeFlag(RegExpGetMultiline); 502 503 504// ES6 21.2.5.10. 505function RegExpGetSource() { 506 if (!IS_REGEXP(this)) { 507 // TODO(littledan): Remove this RegExp compat workaround 508 if (this === GlobalRegExpPrototype) { 509 return UNDEFINED; 510 } 511 throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.source"); 512 } 513 return REGEXP_SOURCE(this); 514} 515%FunctionSetName(RegExpGetSource, "RegExp.prototype.source"); 516%SetNativeFlag(RegExpGetSource); 517 518// ------------------------------------------------------------------- 519 520%FunctionSetInstanceClassName(GlobalRegExp, 'RegExp'); 521GlobalRegExpPrototype = new GlobalObject(); 522%FunctionSetPrototype(GlobalRegExp, GlobalRegExpPrototype); 523%AddNamedProperty( 524 GlobalRegExp.prototype, 'constructor', GlobalRegExp, DONT_ENUM); 525%SetCode(GlobalRegExp, RegExpConstructor); 526 527utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [ 528 "exec", RegExpExecJS, 529 "test", RegExpTest, 530 "toString", RegExpToString, 531 "compile", RegExpCompileJS, 532 matchSymbol, RegExpMatch, 533 searchSymbol, RegExpSearch, 534 splitSymbol, RegExpSplit, 535]); 536 537utils.InstallGetter(GlobalRegExp.prototype, 'global', RegExpGetGlobal); 538utils.InstallGetter(GlobalRegExp.prototype, 'ignoreCase', RegExpGetIgnoreCase); 539utils.InstallGetter(GlobalRegExp.prototype, 'multiline', RegExpGetMultiline); 540utils.InstallGetter(GlobalRegExp.prototype, 'source', RegExpGetSource); 541 542// The length of compile is 1 in SpiderMonkey. 543%FunctionSetLength(GlobalRegExp.prototype.compile, 1); 544 545// The properties `input` and `$_` are aliases for each other. When this 546// value is set the value it is set to is coerced to a string. 547// Getter and setter for the input. 548var RegExpGetInput = function() { 549 var regExpInput = LAST_INPUT(RegExpLastMatchInfo); 550 return IS_UNDEFINED(regExpInput) ? "" : regExpInput; 551}; 552var RegExpSetInput = function(string) { 553 LAST_INPUT(RegExpLastMatchInfo) = TO_STRING(string); 554}; 555 556%OptimizeObjectForAddingMultipleProperties(GlobalRegExp, 22); 557utils.InstallGetterSetter(GlobalRegExp, 'input', RegExpGetInput, RegExpSetInput, 558 DONT_DELETE); 559utils.InstallGetterSetter(GlobalRegExp, '$_', RegExpGetInput, RegExpSetInput, 560 DONT_ENUM | DONT_DELETE); 561 562 563var NoOpSetter = function(ignored) {}; 564 565 566// Static properties set by a successful match. 567utils.InstallGetterSetter(GlobalRegExp, 'lastMatch', RegExpGetLastMatch, 568 NoOpSetter, DONT_DELETE); 569utils.InstallGetterSetter(GlobalRegExp, '$&', RegExpGetLastMatch, NoOpSetter, 570 DONT_ENUM | DONT_DELETE); 571utils.InstallGetterSetter(GlobalRegExp, 'lastParen', RegExpGetLastParen, 572 NoOpSetter, DONT_DELETE); 573utils.InstallGetterSetter(GlobalRegExp, '$+', RegExpGetLastParen, NoOpSetter, 574 DONT_ENUM | DONT_DELETE); 575utils.InstallGetterSetter(GlobalRegExp, 'leftContext', RegExpGetLeftContext, 576 NoOpSetter, DONT_DELETE); 577utils.InstallGetterSetter(GlobalRegExp, '$`', RegExpGetLeftContext, NoOpSetter, 578 DONT_ENUM | DONT_DELETE); 579utils.InstallGetterSetter(GlobalRegExp, 'rightContext', RegExpGetRightContext, 580 NoOpSetter, DONT_DELETE); 581utils.InstallGetterSetter(GlobalRegExp, "$'", RegExpGetRightContext, NoOpSetter, 582 DONT_ENUM | DONT_DELETE); 583 584for (var i = 1; i < 10; ++i) { 585 utils.InstallGetterSetter(GlobalRegExp, '$' + i, RegExpMakeCaptureGetter(i), 586 NoOpSetter, DONT_DELETE); 587} 588%ToFastProperties(GlobalRegExp); 589 590// ------------------------------------------------------------------- 591// Exports 592 593utils.Export(function(to) { 594 to.RegExpExec = DoRegExpExec; 595 to.RegExpExecNoTests = RegExpExecNoTests; 596 to.RegExpLastMatchInfo = RegExpLastMatchInfo; 597 to.RegExpTest = RegExpTest; 598}); 599 600}) 601