1// Copyright 2012 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// This file relies on the fact that the following declaration has been made
6// in runtime.js:
7// var $Object = global.Object;
8// var $Array = global.Array;
9
10var $RegExp = global.RegExp;
11
12// -------------------------------------------------------------------
13
14// A recursive descent parser for Patterns according to the grammar of
15// ECMA-262 15.10.1, with deviations noted below.
16function DoConstructRegExp(object, pattern, flags) {
17  // RegExp : Called as constructor; see ECMA-262, section 15.10.4.
18  if (IS_REGEXP(pattern)) {
19    if (!IS_UNDEFINED(flags)) {
20      throw MakeTypeError('regexp_flags', []);
21    }
22    flags = (pattern.global ? 'g' : '')
23        + (pattern.ignoreCase ? 'i' : '')
24        + (pattern.multiline ? 'm' : '');
25    if (harmony_regexps)
26        flags += (pattern.sticky ? 'y' : '');
27    pattern = pattern.source;
28  }
29
30  pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern);
31  flags = IS_UNDEFINED(flags) ? '' : ToString(flags);
32
33  var global = false;
34  var ignoreCase = false;
35  var multiline = false;
36  var sticky = false;
37  for (var i = 0; i < flags.length; i++) {
38    var c = %_CallFunction(flags, i, StringCharAt);
39    switch (c) {
40      case 'g':
41        if (global) {
42          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
43        }
44        global = true;
45        break;
46      case 'i':
47        if (ignoreCase) {
48          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
49        }
50        ignoreCase = true;
51        break;
52      case 'm':
53        if (multiline) {
54          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
55        }
56        multiline = true;
57        break;
58      case 'y':
59        if (!harmony_regexps || sticky) {
60          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
61        }
62        sticky = true;
63        break;
64      default:
65        throw MakeSyntaxError("invalid_regexp_flags", [flags]);
66    }
67  }
68
69  %RegExpInitializeObject(object, pattern, global, ignoreCase, multiline, sticky);
70
71  // Call internal function to compile the pattern.
72  %RegExpCompile(object, pattern, flags);
73}
74
75
76function RegExpConstructor(pattern, flags) {
77  if (%_IsConstructCall()) {
78    DoConstructRegExp(this, pattern, flags);
79  } else {
80    // RegExp : Called as function; see ECMA-262, section 15.10.3.1.
81    if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) {
82      return pattern;
83    }
84    return new $RegExp(pattern, flags);
85  }
86}
87
88// Deprecated RegExp.prototype.compile method.  We behave like the constructor
89// were called again.  In SpiderMonkey, this method returns the regexp object.
90// In JSC, it returns undefined.  For compatibility with JSC, we match their
91// behavior.
92function RegExpCompileJS(pattern, flags) {
93  // Both JSC and SpiderMonkey treat a missing pattern argument as the
94  // empty subject string, and an actual undefined value passed as the
95  // pattern as the string 'undefined'.  Note that JSC is inconsistent
96  // here, treating undefined values differently in
97  // RegExp.prototype.compile and in the constructor, where they are
98  // the empty string.  For compatibility with JSC, we match their
99  // behavior.
100  if (this == $RegExp.prototype) {
101    // We don't allow recompiling RegExp.prototype.
102    throw MakeTypeError('incompatible_method_receiver',
103                        ['RegExp.prototype.compile', this]);
104  }
105  if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) {
106    DoConstructRegExp(this, 'undefined', flags);
107  } else {
108    DoConstructRegExp(this, pattern, flags);
109  }
110}
111
112
113function DoRegExpExec(regexp, string, index) {
114  var result = %_RegExpExec(regexp, string, index, lastMatchInfo);
115  if (result !== null) lastMatchInfoOverride = null;
116  return result;
117}
118
119
120// This is kind of performance sensitive, so we want to avoid unnecessary
121// type checks on inputs. But we also don't want to inline it several times
122// manually, so we use a macro :-)
123macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING)
124  var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1;
125  var start = MATCHINFO[CAPTURE0];
126  var end = MATCHINFO[CAPTURE1];
127  // Calculate the substring of the first match before creating the result array
128  // to avoid an unnecessary write barrier storing the first result.
129  var first = %_SubString(STRING, start, end);
130  var result = %_RegExpConstructResult(numResults, start, STRING);
131  result[0] = first;
132  if (numResults == 1) return result;
133  var j = REGEXP_FIRST_CAPTURE + 2;
134  for (var i = 1; i < numResults; i++) {
135    start = MATCHINFO[j++];
136    if (start != -1) {
137      end = MATCHINFO[j];
138      result[i] = %_SubString(STRING, start, end);
139    }
140    j++;
141  }
142  return result;
143endmacro
144
145
146function RegExpExecNoTests(regexp, string, start) {
147  // Must be called with RegExp, string and positive integer as arguments.
148  var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo);
149  if (matchInfo !== null) {
150    lastMatchInfoOverride = null;
151    RETURN_NEW_RESULT_FROM_MATCH_INFO(matchInfo, string);
152  }
153  regexp.lastIndex = 0;
154  return null;
155}
156
157
158function RegExpExec(string) {
159  if (!IS_REGEXP(this)) {
160    throw MakeTypeError('incompatible_method_receiver',
161                        ['RegExp.prototype.exec', this]);
162  }
163
164  string = TO_STRING_INLINE(string);
165  var lastIndex = this.lastIndex;
166
167  // Conversion is required by the ES5 specification (RegExp.prototype.exec
168  // algorithm, step 5) even if the value is discarded for non-global RegExps.
169  var i = TO_INTEGER(lastIndex);
170
171  var updateLastIndex = this.global || (harmony_regexps && this.sticky);
172  if (updateLastIndex) {
173    if (i < 0 || i > string.length) {
174      this.lastIndex = 0;
175      return null;
176    }
177  } else {
178    i = 0;
179  }
180
181  // matchIndices is either null or the lastMatchInfo array.
182  var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
183
184  if (IS_NULL(matchIndices)) {
185    this.lastIndex = 0;
186    return null;
187  }
188
189  // Successful match.
190  lastMatchInfoOverride = null;
191  if (updateLastIndex) {
192    this.lastIndex = lastMatchInfo[CAPTURE1];
193  }
194  RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string);
195}
196
197
198// One-element cache for the simplified test regexp.
199var regexp_key;
200var regexp_val;
201
202// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
203// that test is defined in terms of String.prototype.exec. However, it probably
204// means the original value of String.prototype.exec, which is what everybody
205// else implements.
206function RegExpTest(string) {
207  if (!IS_REGEXP(this)) {
208    throw MakeTypeError('incompatible_method_receiver',
209                        ['RegExp.prototype.test', this]);
210  }
211  string = TO_STRING_INLINE(string);
212
213  var lastIndex = this.lastIndex;
214
215  // Conversion is required by the ES5 specification (RegExp.prototype.exec
216  // algorithm, step 5) even if the value is discarded for non-global RegExps.
217  var i = TO_INTEGER(lastIndex);
218
219  if (this.global || (harmony_regexps && this.sticky)) {
220    if (i < 0 || i > string.length) {
221      this.lastIndex = 0;
222      return false;
223    }
224    // matchIndices is either null or the lastMatchInfo array.
225    var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
226    if (IS_NULL(matchIndices)) {
227      this.lastIndex = 0;
228      return false;
229    }
230    lastMatchInfoOverride = null;
231    this.lastIndex = lastMatchInfo[CAPTURE1];
232    return true;
233  } else {
234    // Non-global, non-sticky regexp.
235    // Remove irrelevant preceeding '.*' in a test regexp.  The expression
236    // checks whether this.source starts with '.*' and that the third char is
237    // not a '?'.  But see https://code.google.com/p/v8/issues/detail?id=3560
238    var regexp = this;
239    if (regexp.source.length >= 3 &&
240        %_StringCharCodeAt(regexp.source, 0) == 46 &&  // '.'
241        %_StringCharCodeAt(regexp.source, 1) == 42 &&  // '*'
242        %_StringCharCodeAt(regexp.source, 2) != 63) {  // '?'
243      regexp = TrimRegExp(regexp);
244    }
245    // matchIndices is either null or the lastMatchInfo array.
246    var matchIndices = %_RegExpExec(regexp, string, 0, lastMatchInfo);
247    if (IS_NULL(matchIndices)) {
248      this.lastIndex = 0;
249      return false;
250    }
251    lastMatchInfoOverride = null;
252    return true;
253  }
254}
255
256function TrimRegExp(regexp) {
257  if (!%_ObjectEquals(regexp_key, regexp)) {
258    regexp_key = regexp;
259    regexp_val =
260      new $RegExp(%_SubString(regexp.source, 2, regexp.source.length),
261                  (regexp.ignoreCase ? regexp.multiline ? "im" : "i"
262                                     : regexp.multiline ? "m" : ""));
263  }
264  return regexp_val;
265}
266
267
268function RegExpToString() {
269  if (!IS_REGEXP(this)) {
270    throw MakeTypeError('incompatible_method_receiver',
271                        ['RegExp.prototype.toString', this]);
272  }
273  var result = '/' + this.source + '/';
274  if (this.global) result += 'g';
275  if (this.ignoreCase) result += 'i';
276  if (this.multiline) result += 'm';
277  if (harmony_regexps && this.sticky) result += 'y';
278  return result;
279}
280
281
282// Getters for the static properties lastMatch, lastParen, leftContext, and
283// rightContext of the RegExp constructor.  The properties are computed based
284// on the captures array of the last successful match and the subject string
285// of the last successful match.
286function RegExpGetLastMatch() {
287  if (lastMatchInfoOverride !== null) {
288    return OVERRIDE_MATCH(lastMatchInfoOverride);
289  }
290  var regExpSubject = LAST_SUBJECT(lastMatchInfo);
291  return %_SubString(regExpSubject,
292                     lastMatchInfo[CAPTURE0],
293                     lastMatchInfo[CAPTURE1]);
294}
295
296
297function RegExpGetLastParen() {
298  if (lastMatchInfoOverride) {
299    var override = lastMatchInfoOverride;
300    if (override.length <= 3) return '';
301    return override[override.length - 3];
302  }
303  var length = NUMBER_OF_CAPTURES(lastMatchInfo);
304  if (length <= 2) return '';  // There were no captures.
305  // We match the SpiderMonkey behavior: return the substring defined by the
306  // last pair (after the first pair) of elements of the capture array even if
307  // it is empty.
308  var regExpSubject = LAST_SUBJECT(lastMatchInfo);
309  var start = lastMatchInfo[CAPTURE(length - 2)];
310  var end = lastMatchInfo[CAPTURE(length - 1)];
311  if (start != -1 && end != -1) {
312    return %_SubString(regExpSubject, start, end);
313  }
314  return "";
315}
316
317
318function RegExpGetLeftContext() {
319  var start_index;
320  var subject;
321  if (!lastMatchInfoOverride) {
322    start_index = lastMatchInfo[CAPTURE0];
323    subject = LAST_SUBJECT(lastMatchInfo);
324  } else {
325    var override = lastMatchInfoOverride;
326    start_index = OVERRIDE_POS(override);
327    subject = OVERRIDE_SUBJECT(override);
328  }
329  return %_SubString(subject, 0, start_index);
330}
331
332
333function RegExpGetRightContext() {
334  var start_index;
335  var subject;
336  if (!lastMatchInfoOverride) {
337    start_index = lastMatchInfo[CAPTURE1];
338    subject = LAST_SUBJECT(lastMatchInfo);
339  } else {
340    var override = lastMatchInfoOverride;
341    subject = OVERRIDE_SUBJECT(override);
342    var match = OVERRIDE_MATCH(override);
343    start_index = OVERRIDE_POS(override) + match.length;
344  }
345  return %_SubString(subject, start_index, subject.length);
346}
347
348
349// The properties $1..$9 are the first nine capturing substrings of the last
350// successful match, or ''.  The function RegExpMakeCaptureGetter will be
351// called with indices from 1 to 9.
352function RegExpMakeCaptureGetter(n) {
353  return function() {
354    if (lastMatchInfoOverride) {
355      if (n < lastMatchInfoOverride.length - 2) {
356        return OVERRIDE_CAPTURE(lastMatchInfoOverride, n);
357      }
358      return '';
359    }
360    var index = n * 2;
361    if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return '';
362    var matchStart = lastMatchInfo[CAPTURE(index)];
363    var matchEnd = lastMatchInfo[CAPTURE(index + 1)];
364    if (matchStart == -1 || matchEnd == -1) return '';
365    return %_SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd);
366  };
367}
368
369
370// Property of the builtins object for recording the result of the last
371// regexp match.  The property lastMatchInfo includes the matchIndices
372// array of the last successful regexp match (an array of start/end index
373// pairs for the match and all the captured substrings), the invariant is
374// that there are at least two capture indeces.  The array also contains
375// the subject string for the last successful match.
376var lastMatchInfo = new InternalPackedArray(
377    2,                 // REGEXP_NUMBER_OF_CAPTURES
378    "",                // Last subject.
379    UNDEFINED,         // Last input - settable with RegExpSetInput.
380    0,                 // REGEXP_FIRST_CAPTURE + 0
381    0                  // REGEXP_FIRST_CAPTURE + 1
382);
383
384// Override last match info with an array of actual substrings.
385// Used internally by replace regexp with function.
386// The array has the format of an "apply" argument for a replacement
387// function.
388var lastMatchInfoOverride = null;
389
390// -------------------------------------------------------------------
391
392function SetUpRegExp() {
393  %CheckIsBootstrapping();
394  %FunctionSetInstanceClassName($RegExp, 'RegExp');
395  %AddNamedProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM);
396  %SetCode($RegExp, RegExpConstructor);
397
398  InstallFunctions($RegExp.prototype, DONT_ENUM, $Array(
399    "exec", RegExpExec,
400    "test", RegExpTest,
401    "toString", RegExpToString,
402    "compile", RegExpCompileJS
403  ));
404
405  // The length of compile is 1 in SpiderMonkey.
406  %FunctionSetLength($RegExp.prototype.compile, 1);
407
408  // The properties `input` and `$_` are aliases for each other.  When this
409  // value is set the value it is set to is coerced to a string.
410  // Getter and setter for the input.
411  var RegExpGetInput = function() {
412    var regExpInput = LAST_INPUT(lastMatchInfo);
413    return IS_UNDEFINED(regExpInput) ? "" : regExpInput;
414  };
415  var RegExpSetInput = function(string) {
416    LAST_INPUT(lastMatchInfo) = ToString(string);
417  };
418
419  %OptimizeObjectForAddingMultipleProperties($RegExp, 22);
420  %DefineAccessorPropertyUnchecked($RegExp, 'input', RegExpGetInput,
421                                   RegExpSetInput, DONT_DELETE);
422  %DefineAccessorPropertyUnchecked($RegExp, '$_', RegExpGetInput,
423                                   RegExpSetInput, DONT_ENUM | DONT_DELETE);
424
425  // The properties multiline and $* are aliases for each other.  When this
426  // value is set in SpiderMonkey, the value it is set to is coerced to a
427  // boolean.  We mimic that behavior with a slight difference: in SpiderMonkey
428  // the value of the expression 'RegExp.multiline = null' (for instance) is the
429  // boolean false (i.e., the value after coercion), while in V8 it is the value
430  // null (i.e., the value before coercion).
431
432  // Getter and setter for multiline.
433  var multiline = false;
434  var RegExpGetMultiline = function() { return multiline; };
435  var RegExpSetMultiline = function(flag) { multiline = flag ? true : false; };
436
437  %DefineAccessorPropertyUnchecked($RegExp, 'multiline', RegExpGetMultiline,
438                                   RegExpSetMultiline, DONT_DELETE);
439  %DefineAccessorPropertyUnchecked($RegExp, '$*', RegExpGetMultiline,
440                                   RegExpSetMultiline,
441                                   DONT_ENUM | DONT_DELETE);
442
443
444  var NoOpSetter = function(ignored) {};
445
446
447  // Static properties set by a successful match.
448  %DefineAccessorPropertyUnchecked($RegExp, 'lastMatch', RegExpGetLastMatch,
449                                   NoOpSetter, DONT_DELETE);
450  %DefineAccessorPropertyUnchecked($RegExp, '$&', RegExpGetLastMatch,
451                                   NoOpSetter, DONT_ENUM | DONT_DELETE);
452  %DefineAccessorPropertyUnchecked($RegExp, 'lastParen', RegExpGetLastParen,
453                                   NoOpSetter, DONT_DELETE);
454  %DefineAccessorPropertyUnchecked($RegExp, '$+', RegExpGetLastParen,
455                                   NoOpSetter, DONT_ENUM | DONT_DELETE);
456  %DefineAccessorPropertyUnchecked($RegExp, 'leftContext',
457                                   RegExpGetLeftContext, NoOpSetter,
458                                   DONT_DELETE);
459  %DefineAccessorPropertyUnchecked($RegExp, '$`', RegExpGetLeftContext,
460                                   NoOpSetter, DONT_ENUM | DONT_DELETE);
461  %DefineAccessorPropertyUnchecked($RegExp, 'rightContext',
462                                   RegExpGetRightContext, NoOpSetter,
463                                   DONT_DELETE);
464  %DefineAccessorPropertyUnchecked($RegExp, "$'", RegExpGetRightContext,
465                                   NoOpSetter, DONT_ENUM | DONT_DELETE);
466
467  for (var i = 1; i < 10; ++i) {
468    %DefineAccessorPropertyUnchecked($RegExp, '$' + i,
469                                     RegExpMakeCaptureGetter(i), NoOpSetter,
470                                     DONT_DELETE);
471  }
472  %ToFastProperties($RegExp);
473}
474
475SetUpRegExp();
476