1// Copyright 2012 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5(function(global, utils) {
6
7%CheckIsBootstrapping();
8
9// -------------------------------------------------------------------
10// Imports
11
12var FLAG_harmony_tolength;
13var GlobalObject = global.Object;
14var GlobalRegExp = global.RegExp;
15var GlobalRegExpPrototype;
16var InternalArray = utils.InternalArray;
17var InternalPackedArray = utils.InternalPackedArray;
18var MakeTypeError;
19var matchSymbol = utils.ImportNow("match_symbol");
20var searchSymbol = utils.ImportNow("search_symbol");
21var splitSymbol = utils.ImportNow("split_symbol");
22
23utils.ImportFromExperimental(function(from) {
24  FLAG_harmony_tolength = from.FLAG_harmony_tolength;
25});
26
27utils.Import(function(from) {
28  MakeTypeError = from.MakeTypeError;
29});
30
31// -------------------------------------------------------------------
32
33// Property of the builtins object for recording the result of the last
34// regexp match.  The property RegExpLastMatchInfo includes the matchIndices
35// array of the last successful regexp match (an array of start/end index
36// pairs for the match and all the captured substrings), the invariant is
37// that there are at least two capture indeces.  The array also contains
38// the subject string for the last successful match.
39var RegExpLastMatchInfo = new InternalPackedArray(
40 2,                 // REGEXP_NUMBER_OF_CAPTURES
41 "",                // Last subject.
42 UNDEFINED,         // Last input - settable with RegExpSetInput.
43 0,                 // REGEXP_FIRST_CAPTURE + 0
44 0                  // REGEXP_FIRST_CAPTURE + 1
45);
46
47// -------------------------------------------------------------------
48
49function IsRegExp(o) {
50  if (!IS_RECEIVER(o)) return false;
51  var is_regexp = o[matchSymbol];
52  if (!IS_UNDEFINED(is_regexp)) return TO_BOOLEAN(is_regexp);
53  return IS_REGEXP(o);
54}
55
56
57// ES6 section 21.2.3.2.2
58function RegExpInitialize(object, pattern, flags) {
59  pattern = IS_UNDEFINED(pattern) ? '' : TO_STRING(pattern);
60  flags = IS_UNDEFINED(flags) ? '' : TO_STRING(flags);
61  %RegExpInitializeAndCompile(object, pattern, flags);
62  return object;
63}
64
65
66function PatternFlags(pattern) {
67  return (REGEXP_GLOBAL(pattern) ? 'g' : '') +
68         (REGEXP_IGNORE_CASE(pattern) ? 'i' : '') +
69         (REGEXP_MULTILINE(pattern) ? 'm' : '') +
70         (REGEXP_UNICODE(pattern) ? 'u' : '') +
71         (REGEXP_STICKY(pattern) ? 'y' : '');
72}
73
74
75function RegExpConstructor(pattern, flags) {
76  var newtarget = new.target;
77  var pattern_is_regexp = IsRegExp(pattern);
78
79  if (IS_UNDEFINED(newtarget)) {
80    newtarget = GlobalRegExp;
81
82    // ES6 section 21.2.3.1 step 3.b
83    if (pattern_is_regexp && IS_UNDEFINED(flags) &&
84        pattern.constructor === newtarget) {
85      return pattern;
86    }
87  }
88
89  if (IS_REGEXP(pattern)) {
90    if (IS_UNDEFINED(flags)) flags = PatternFlags(pattern);
91    pattern = REGEXP_SOURCE(pattern);
92
93  } else if (pattern_is_regexp) {
94    var input_pattern = pattern;
95    pattern = pattern.source;
96    if (IS_UNDEFINED(flags)) flags = input_pattern.flags;
97  }
98
99  var object = %NewObject(GlobalRegExp, newtarget);
100  return RegExpInitialize(object, pattern, flags);
101}
102
103
104function RegExpCompileJS(pattern, flags) {
105  if (!IS_REGEXP(this)) {
106    throw MakeTypeError(kIncompatibleMethodReceiver,
107                        "RegExp.prototype.compile", this);
108  }
109
110  if (IS_REGEXP(pattern)) {
111    if (!IS_UNDEFINED(flags)) throw MakeTypeError(kRegExpFlags);
112
113    flags = PatternFlags(pattern);
114    pattern = REGEXP_SOURCE(pattern);
115  }
116
117  RegExpInitialize(this, pattern, flags);
118
119  // Return undefined for compatibility with JSC.
120  // See http://crbug.com/585775 for web compat details.
121}
122
123
124function DoRegExpExec(regexp, string, index) {
125  return %_RegExpExec(regexp, string, index, RegExpLastMatchInfo);
126}
127
128
129// This is kind of performance sensitive, so we want to avoid unnecessary
130// type checks on inputs. But we also don't want to inline it several times
131// manually, so we use a macro :-)
132macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING)
133  var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1;
134  var start = MATCHINFO[CAPTURE0];
135  var end = MATCHINFO[CAPTURE1];
136  // Calculate the substring of the first match before creating the result array
137  // to avoid an unnecessary write barrier storing the first result.
138  var first = %_SubString(STRING, start, end);
139  var result = %_RegExpConstructResult(numResults, start, STRING);
140  result[0] = first;
141  if (numResults == 1) return result;
142  var j = REGEXP_FIRST_CAPTURE + 2;
143  for (var i = 1; i < numResults; i++) {
144    start = MATCHINFO[j++];
145    if (start != -1) {
146      end = MATCHINFO[j];
147      result[i] = %_SubString(STRING, start, end);
148    }
149    j++;
150  }
151  return result;
152endmacro
153
154
155function RegExpExecNoTests(regexp, string, start) {
156  // Must be called with RegExp, string and positive integer as arguments.
157  var matchInfo = %_RegExpExec(regexp, string, start, RegExpLastMatchInfo);
158  if (matchInfo !== null) {
159    // ES6 21.2.5.2.2 step 18.
160    if (REGEXP_STICKY(regexp)) regexp.lastIndex = matchInfo[CAPTURE1];
161    RETURN_NEW_RESULT_FROM_MATCH_INFO(matchInfo, string);
162  }
163  regexp.lastIndex = 0;
164  return null;
165}
166
167
168function RegExpExecJS(string) {
169  if (!IS_REGEXP(this)) {
170    throw MakeTypeError(kIncompatibleMethodReceiver,
171                        'RegExp.prototype.exec', this);
172  }
173
174  string = TO_STRING(string);
175  var lastIndex = this.lastIndex;
176
177  // Conversion is required by the ES2015 specification (RegExpBuiltinExec
178  // algorithm, step 4) even if the value is discarded for non-global RegExps.
179  var i = TO_LENGTH_OR_INTEGER(lastIndex);
180
181  var updateLastIndex = REGEXP_GLOBAL(this) || REGEXP_STICKY(this);
182  if (updateLastIndex) {
183    if (i < 0 || i > string.length) {
184      this.lastIndex = 0;
185      return null;
186    }
187  } else {
188    i = 0;
189  }
190
191  // matchIndices is either null or the RegExpLastMatchInfo array.
192  var matchIndices = %_RegExpExec(this, string, i, RegExpLastMatchInfo);
193
194  if (IS_NULL(matchIndices)) {
195    this.lastIndex = 0;
196    return null;
197  }
198
199  // Successful match.
200  if (updateLastIndex) {
201    this.lastIndex = RegExpLastMatchInfo[CAPTURE1];
202  }
203  RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string);
204}
205
206
207// One-element cache for the simplified test regexp.
208var regexp_key;
209var regexp_val;
210
211// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
212// that test is defined in terms of String.prototype.exec. However, it probably
213// means the original value of String.prototype.exec, which is what everybody
214// else implements.
215function RegExpTest(string) {
216  if (!IS_REGEXP(this)) {
217    throw MakeTypeError(kIncompatibleMethodReceiver,
218                        'RegExp.prototype.test', this);
219  }
220  string = TO_STRING(string);
221
222  var lastIndex = this.lastIndex;
223
224  // Conversion is required by the ES2015 specification (RegExpBuiltinExec
225  // algorithm, step 4) even if the value is discarded for non-global RegExps.
226  var i = TO_LENGTH_OR_INTEGER(lastIndex);
227
228  if (REGEXP_GLOBAL(this) || REGEXP_STICKY(this)) {
229    if (i < 0 || i > string.length) {
230      this.lastIndex = 0;
231      return false;
232    }
233    // matchIndices is either null or the RegExpLastMatchInfo array.
234    var matchIndices = %_RegExpExec(this, string, i, RegExpLastMatchInfo);
235    if (IS_NULL(matchIndices)) {
236      this.lastIndex = 0;
237      return false;
238    }
239    this.lastIndex = RegExpLastMatchInfo[CAPTURE1];
240    return true;
241  } else {
242    // Non-global, non-sticky regexp.
243    // Remove irrelevant preceeding '.*' in a test regexp.  The expression
244    // checks whether this.source starts with '.*' and that the third char is
245    // not a '?'.  But see https://code.google.com/p/v8/issues/detail?id=3560
246    var regexp = this;
247    var source = REGEXP_SOURCE(regexp);
248    if (regexp.length >= 3 &&
249        %_StringCharCodeAt(regexp, 0) == 46 &&  // '.'
250        %_StringCharCodeAt(regexp, 1) == 42 &&  // '*'
251        %_StringCharCodeAt(regexp, 2) != 63) {  // '?'
252      regexp = TrimRegExp(regexp);
253    }
254    // matchIndices is either null or the RegExpLastMatchInfo array.
255    var matchIndices = %_RegExpExec(regexp, string, 0, RegExpLastMatchInfo);
256    if (IS_NULL(matchIndices)) {
257      this.lastIndex = 0;
258      return false;
259    }
260    return true;
261  }
262}
263
264function TrimRegExp(regexp) {
265  if (!%_ObjectEquals(regexp_key, regexp)) {
266    regexp_key = regexp;
267    regexp_val =
268      new GlobalRegExp(
269          %_SubString(REGEXP_SOURCE(regexp), 2, REGEXP_SOURCE(regexp).length),
270          (REGEXP_IGNORE_CASE(regexp) ? REGEXP_MULTILINE(regexp) ? "im" : "i"
271                                      : REGEXP_MULTILINE(regexp) ? "m" : ""));
272  }
273  return regexp_val;
274}
275
276
277function RegExpToString() {
278  if (!IS_REGEXP(this)) {
279    // RegExp.prototype.toString() returns '/(?:)/' as a compatibility fix;
280    // a UseCounter is incremented to track it.
281    // TODO(littledan): Remove this workaround or standardize it
282    if (this === GlobalRegExpPrototype) {
283      %IncrementUseCounter(kRegExpPrototypeToString);
284      return '/(?:)/';
285    }
286    throw MakeTypeError(kIncompatibleMethodReceiver,
287                        'RegExp.prototype.toString', this);
288  }
289  var result = '/' + REGEXP_SOURCE(this) + '/';
290  if (REGEXP_GLOBAL(this)) result += 'g';
291  if (REGEXP_IGNORE_CASE(this)) result += 'i';
292  if (REGEXP_MULTILINE(this)) result += 'm';
293  if (REGEXP_UNICODE(this)) result += 'u';
294  if (REGEXP_STICKY(this)) result += 'y';
295  return result;
296}
297
298
299// ES6 21.2.5.11.
300function RegExpSplit(string, limit) {
301  // TODO(yangguo): allow non-regexp receivers.
302  if (!IS_REGEXP(this)) {
303    throw MakeTypeError(kIncompatibleMethodReceiver,
304                        "RegExp.prototype.@@split", this);
305  }
306  var separator = this;
307  var subject = TO_STRING(string);
308
309  limit = (IS_UNDEFINED(limit)) ? kMaxUint32 : TO_UINT32(limit);
310  var length = subject.length;
311
312  if (limit === 0) return [];
313
314  if (length === 0) {
315    if (DoRegExpExec(separator, subject, 0, 0) !== null) return [];
316    return [subject];
317  }
318
319  var currentIndex = 0;
320  var startIndex = 0;
321  var startMatch = 0;
322  var result = new InternalArray();
323
324  outer_loop:
325  while (true) {
326    if (startIndex === length) {
327      result[result.length] = %_SubString(subject, currentIndex, length);
328      break;
329    }
330
331    var matchInfo = DoRegExpExec(separator, subject, startIndex);
332    if (matchInfo === null || length === (startMatch = matchInfo[CAPTURE0])) {
333      result[result.length] = %_SubString(subject, currentIndex, length);
334      break;
335    }
336    var endIndex = matchInfo[CAPTURE1];
337
338    // We ignore a zero-length match at the currentIndex.
339    if (startIndex === endIndex && endIndex === currentIndex) {
340      startIndex++;
341      continue;
342    }
343
344    result[result.length] = %_SubString(subject, currentIndex, startMatch);
345
346    if (result.length === limit) break;
347
348    var matchinfo_len = NUMBER_OF_CAPTURES(matchInfo) + REGEXP_FIRST_CAPTURE;
349    for (var i = REGEXP_FIRST_CAPTURE + 2; i < matchinfo_len; ) {
350      var start = matchInfo[i++];
351      var end = matchInfo[i++];
352      if (end != -1) {
353        result[result.length] = %_SubString(subject, start, end);
354      } else {
355        result[result.length] = UNDEFINED;
356      }
357      if (result.length === limit) break outer_loop;
358    }
359
360    startIndex = currentIndex = endIndex;
361  }
362
363  var array_result = [];
364  %MoveArrayContents(result, array_result);
365  return array_result;
366}
367
368
369// ES6 21.2.5.6.
370function RegExpMatch(string) {
371  // TODO(yangguo): allow non-regexp receivers.
372  if (!IS_REGEXP(this)) {
373    throw MakeTypeError(kIncompatibleMethodReceiver,
374                        "RegExp.prototype.@@match", this);
375  }
376  var subject = TO_STRING(string);
377
378  if (!REGEXP_GLOBAL(this)) return RegExpExecNoTests(this, subject, 0);
379  this.lastIndex = 0;
380  var result = %StringMatch(subject, this, RegExpLastMatchInfo);
381  return result;
382}
383
384
385// ES6 21.2.5.9.
386function RegExpSearch(string) {
387  // TODO(yangguo): allow non-regexp receivers.
388  if (!IS_REGEXP(this)) {
389    throw MakeTypeError(kIncompatibleMethodReceiver,
390                        "RegExp.prototype.@@search", this);
391  }
392  var match = DoRegExpExec(this, TO_STRING(string), 0);
393  if (match) return match[CAPTURE0];
394  return -1;
395}
396
397
398// Getters for the static properties lastMatch, lastParen, leftContext, and
399// rightContext of the RegExp constructor.  The properties are computed based
400// on the captures array of the last successful match and the subject string
401// of the last successful match.
402function RegExpGetLastMatch() {
403  var regExpSubject = LAST_SUBJECT(RegExpLastMatchInfo);
404  return %_SubString(regExpSubject,
405                     RegExpLastMatchInfo[CAPTURE0],
406                     RegExpLastMatchInfo[CAPTURE1]);
407}
408
409
410function RegExpGetLastParen() {
411  var length = NUMBER_OF_CAPTURES(RegExpLastMatchInfo);
412  if (length <= 2) return '';  // There were no captures.
413  // We match the SpiderMonkey behavior: return the substring defined by the
414  // last pair (after the first pair) of elements of the capture array even if
415  // it is empty.
416  var regExpSubject = LAST_SUBJECT(RegExpLastMatchInfo);
417  var start = RegExpLastMatchInfo[CAPTURE(length - 2)];
418  var end = RegExpLastMatchInfo[CAPTURE(length - 1)];
419  if (start != -1 && end != -1) {
420    return %_SubString(regExpSubject, start, end);
421  }
422  return "";
423}
424
425
426function RegExpGetLeftContext() {
427  var start_index;
428  var subject;
429  start_index = RegExpLastMatchInfo[CAPTURE0];
430  subject = LAST_SUBJECT(RegExpLastMatchInfo);
431  return %_SubString(subject, 0, start_index);
432}
433
434
435function RegExpGetRightContext() {
436  var start_index;
437  var subject;
438  start_index = RegExpLastMatchInfo[CAPTURE1];
439  subject = LAST_SUBJECT(RegExpLastMatchInfo);
440  return %_SubString(subject, start_index, subject.length);
441}
442
443
444// The properties $1..$9 are the first nine capturing substrings of the last
445// successful match, or ''.  The function RegExpMakeCaptureGetter will be
446// called with indices from 1 to 9.
447function RegExpMakeCaptureGetter(n) {
448  return function foo() {
449    var index = n * 2;
450    if (index >= NUMBER_OF_CAPTURES(RegExpLastMatchInfo)) return '';
451    var matchStart = RegExpLastMatchInfo[CAPTURE(index)];
452    var matchEnd = RegExpLastMatchInfo[CAPTURE(index + 1)];
453    if (matchStart == -1 || matchEnd == -1) return '';
454    return %_SubString(LAST_SUBJECT(RegExpLastMatchInfo), matchStart, matchEnd);
455  };
456}
457
458
459// ES6 21.2.5.4.
460function RegExpGetGlobal() {
461  if (!IS_REGEXP(this)) {
462    // TODO(littledan): Remove this RegExp compat workaround
463    if (this === GlobalRegExpPrototype) {
464      return UNDEFINED;
465    }
466    throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.global");
467  }
468  return !!REGEXP_GLOBAL(this);
469}
470%FunctionSetName(RegExpGetGlobal, "RegExp.prototype.global");
471%SetNativeFlag(RegExpGetGlobal);
472
473
474// ES6 21.2.5.5.
475function RegExpGetIgnoreCase() {
476  if (!IS_REGEXP(this)) {
477    // TODO(littledan): Remove this RegExp compat workaround
478    if (this === GlobalRegExpPrototype) {
479      return UNDEFINED;
480    }
481    throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.ignoreCase");
482  }
483  return !!REGEXP_IGNORE_CASE(this);
484}
485%FunctionSetName(RegExpGetIgnoreCase, "RegExp.prototype.ignoreCase");
486%SetNativeFlag(RegExpGetIgnoreCase);
487
488
489// ES6 21.2.5.7.
490function RegExpGetMultiline() {
491  if (!IS_REGEXP(this)) {
492    // TODO(littledan): Remove this RegExp compat workaround
493    if (this === GlobalRegExpPrototype) {
494      return UNDEFINED;
495    }
496    throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.multiline");
497  }
498  return !!REGEXP_MULTILINE(this);
499}
500%FunctionSetName(RegExpGetMultiline, "RegExp.prototype.multiline");
501%SetNativeFlag(RegExpGetMultiline);
502
503
504// ES6 21.2.5.10.
505function RegExpGetSource() {
506  if (!IS_REGEXP(this)) {
507    // TODO(littledan): Remove this RegExp compat workaround
508    if (this === GlobalRegExpPrototype) {
509      return UNDEFINED;
510    }
511    throw MakeTypeError(kRegExpNonRegExp, "RegExp.prototype.source");
512  }
513  return REGEXP_SOURCE(this);
514}
515%FunctionSetName(RegExpGetSource, "RegExp.prototype.source");
516%SetNativeFlag(RegExpGetSource);
517
518// -------------------------------------------------------------------
519
520%FunctionSetInstanceClassName(GlobalRegExp, 'RegExp');
521GlobalRegExpPrototype = new GlobalObject();
522%FunctionSetPrototype(GlobalRegExp, GlobalRegExpPrototype);
523%AddNamedProperty(
524    GlobalRegExp.prototype, 'constructor', GlobalRegExp, DONT_ENUM);
525%SetCode(GlobalRegExp, RegExpConstructor);
526
527utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [
528  "exec", RegExpExecJS,
529  "test", RegExpTest,
530  "toString", RegExpToString,
531  "compile", RegExpCompileJS,
532  matchSymbol, RegExpMatch,
533  searchSymbol, RegExpSearch,
534  splitSymbol, RegExpSplit,
535]);
536
537utils.InstallGetter(GlobalRegExp.prototype, 'global', RegExpGetGlobal);
538utils.InstallGetter(GlobalRegExp.prototype, 'ignoreCase', RegExpGetIgnoreCase);
539utils.InstallGetter(GlobalRegExp.prototype, 'multiline', RegExpGetMultiline);
540utils.InstallGetter(GlobalRegExp.prototype, 'source', RegExpGetSource);
541
542// The length of compile is 1 in SpiderMonkey.
543%FunctionSetLength(GlobalRegExp.prototype.compile, 1);
544
545// The properties `input` and `$_` are aliases for each other.  When this
546// value is set the value it is set to is coerced to a string.
547// Getter and setter for the input.
548var RegExpGetInput = function() {
549  var regExpInput = LAST_INPUT(RegExpLastMatchInfo);
550  return IS_UNDEFINED(regExpInput) ? "" : regExpInput;
551};
552var RegExpSetInput = function(string) {
553  LAST_INPUT(RegExpLastMatchInfo) = TO_STRING(string);
554};
555
556%OptimizeObjectForAddingMultipleProperties(GlobalRegExp, 22);
557utils.InstallGetterSetter(GlobalRegExp, 'input', RegExpGetInput, RegExpSetInput,
558                          DONT_DELETE);
559utils.InstallGetterSetter(GlobalRegExp, '$_', RegExpGetInput, RegExpSetInput,
560                          DONT_ENUM | DONT_DELETE);
561
562
563var NoOpSetter = function(ignored) {};
564
565
566// Static properties set by a successful match.
567utils.InstallGetterSetter(GlobalRegExp, 'lastMatch', RegExpGetLastMatch,
568                          NoOpSetter, DONT_DELETE);
569utils.InstallGetterSetter(GlobalRegExp, '$&', RegExpGetLastMatch, NoOpSetter,
570                          DONT_ENUM | DONT_DELETE);
571utils.InstallGetterSetter(GlobalRegExp, 'lastParen', RegExpGetLastParen,
572                          NoOpSetter, DONT_DELETE);
573utils.InstallGetterSetter(GlobalRegExp, '$+', RegExpGetLastParen, NoOpSetter,
574                          DONT_ENUM | DONT_DELETE);
575utils.InstallGetterSetter(GlobalRegExp, 'leftContext', RegExpGetLeftContext,
576                          NoOpSetter, DONT_DELETE);
577utils.InstallGetterSetter(GlobalRegExp, '$`', RegExpGetLeftContext, NoOpSetter,
578                          DONT_ENUM | DONT_DELETE);
579utils.InstallGetterSetter(GlobalRegExp, 'rightContext', RegExpGetRightContext,
580                          NoOpSetter, DONT_DELETE);
581utils.InstallGetterSetter(GlobalRegExp, "$'", RegExpGetRightContext, NoOpSetter,
582                          DONT_ENUM | DONT_DELETE);
583
584for (var i = 1; i < 10; ++i) {
585  utils.InstallGetterSetter(GlobalRegExp, '$' + i, RegExpMakeCaptureGetter(i),
586                            NoOpSetter, DONT_DELETE);
587}
588%ToFastProperties(GlobalRegExp);
589
590// -------------------------------------------------------------------
591// Exports
592
593utils.Export(function(to) {
594  to.RegExpExec = DoRegExpExec;
595  to.RegExpExecNoTests = RegExpExecNoTests;
596  to.RegExpLastMatchInfo = RegExpLastMatchInfo;
597  to.RegExpTest = RegExpTest;
598});
599
600})
601