1// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// ES6 extends the \uxxxx escape and also allows \u{xxxxx}.
6
7// Flags: --harmony-unicode-regexps --harmony-regexps
8
9function testRegexpHelper(r) {
10  assertTrue(r.test("foo"));
11  assertTrue(r.test("boo"));
12  assertFalse(r.test("moo"));
13}
14
15
16(function TestUnicodeEscapes() {
17  testRegexpHelper(/(\u0066|\u0062)oo/);
18  testRegexpHelper(/(\u0066|\u0062)oo/u);
19  testRegexpHelper(/(\u{0066}|\u{0062})oo/u);
20  testRegexpHelper(/(\u{66}|\u{000062})oo/u);
21
22  // Note that we need \\ inside a string, otherwise it's interpreted as a
23  // unicode escape inside a string.
24  testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo"));
25  testRegexpHelper(new RegExp("(\\u0066|\\u0062)oo", "u"));
26  testRegexpHelper(new RegExp("(\\u{0066}|\\u{0062})oo", "u"));
27  testRegexpHelper(new RegExp("(\\u{66}|\\u{000062})oo", "u"));
28
29  // Though, unicode escapes via strings should work too.
30  testRegexpHelper(new RegExp("(\u0066|\u0062)oo"));
31  testRegexpHelper(new RegExp("(\u0066|\u0062)oo", "u"));
32  testRegexpHelper(new RegExp("(\u{0066}|\u{0062})oo", "u"));
33  testRegexpHelper(new RegExp("(\u{66}|\u{000062})oo", "u"));
34})();
35
36
37(function TestUnicodeEscapesInCharacterClasses() {
38  testRegexpHelper(/[\u0062-\u0066]oo/);
39  testRegexpHelper(/[\u0062-\u0066]oo/u);
40  testRegexpHelper(/[\u{0062}-\u{0066}]oo/u);
41  testRegexpHelper(/[\u{62}-\u{00000066}]oo/u);
42
43  // Note that we need \\ inside a string, otherwise it's interpreted as a
44  // unicode escape inside a string.
45  testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo"));
46  testRegexpHelper(new RegExp("[\\u0062-\\u0066]oo", "u"));
47  testRegexpHelper(new RegExp("[\\u{0062}-\\u{0066}]oo", "u"));
48  testRegexpHelper(new RegExp("[\\u{62}-\\u{00000066}]oo", "u"));
49
50  // Though, unicode escapes via strings should work too.
51  testRegexpHelper(new RegExp("[\u0062-\u0066]oo"));
52  testRegexpHelper(new RegExp("[\u0062-\u0066]oo", "u"));
53  testRegexpHelper(new RegExp("[\u{0062}-\u{0066}]oo", "u"));
54  testRegexpHelper(new RegExp("[\u{62}-\u{00000066}]oo", "u"));
55})();
56
57
58(function TestBraceEscapesWithoutUnicodeFlag() {
59  // \u followed by illegal escape will be parsed as u. {x} will be the
60  // character count.
61  function helper1(r) {
62    assertFalse(r.test("fbar"));
63    assertFalse(r.test("fubar"));
64    assertTrue(r.test("fuubar"));
65    assertFalse(r.test("fuuubar"));
66  }
67  helper1(/f\u{2}bar/);
68  helper1(new RegExp("f\\u{2}bar"));
69
70  function helper2(r) {
71    assertFalse(r.test("fbar"));
72    assertTrue(r.test("fubar"));
73    assertTrue(r.test("fuubar"));
74    assertFalse(r.test("fuuubar"));
75  }
76
77  helper2(/f\u{1,2}bar/);
78  helper2(new RegExp("f\\u{1,2}bar"));
79
80  function helper3(r) {
81    assertTrue(r.test("u"));
82    assertTrue(r.test("{"));
83    assertTrue(r.test("2"));
84    assertTrue(r.test("}"));
85    assertFalse(r.test("q"));
86    assertFalse(r.test("("));
87    assertFalse(r.test(")"));
88  }
89  helper3(/[\u{2}]/);
90  helper3(new RegExp("[\\u{2}]"));
91})();
92
93
94(function TestInvalidEscapes() {
95  // Without the u flag, invalid unicode escapes and other invalid escapes are
96  // treated as identity escapes.
97  function helper1(r) {
98    assertTrue(r.test("firstuxz89second"));
99  }
100  helper1(/first\u\x\z\8\9second/);
101  helper1(new RegExp("first\\u\\x\\z\\8\\9second"));
102
103  function helper2(r) {
104    assertTrue(r.test("u"));
105    assertTrue(r.test("x"));
106    assertTrue(r.test("z"));
107    assertTrue(r.test("8"));
108    assertTrue(r.test("9"));
109    assertFalse(r.test("q"));
110    assertFalse(r.test("7"));
111  }
112  helper2(/[\u\x\z\8\9]/);
113  helper2(new RegExp("[\\u\\x\\z\\8\\9]"));
114
115  // However, with the u flag, these are treated as invalid escapes.
116  assertThrows("/\\u/u", SyntaxError);
117  assertThrows("/\\u12/u", SyntaxError);
118  assertThrows("/\\ufoo/u", SyntaxError);
119  assertThrows("/\\x/u", SyntaxError);
120  assertThrows("/\\xfoo/u", SyntaxError);
121  assertThrows("/\\z/u", SyntaxError);
122  assertThrows("/\\8/u", SyntaxError);
123  assertThrows("/\\9/u", SyntaxError);
124
125  assertThrows("new RegExp('\\\\u', 'u')", SyntaxError);
126  assertThrows("new RegExp('\\\\u12', 'u')", SyntaxError);
127  assertThrows("new RegExp('\\\\ufoo', 'u')", SyntaxError);
128  assertThrows("new RegExp('\\\\x', 'u')", SyntaxError);
129  assertThrows("new RegExp('\\\\xfoo', 'u')", SyntaxError);
130  assertThrows("new RegExp('\\\\z', 'u')", SyntaxError);
131  assertThrows("new RegExp('\\\\8', 'u')", SyntaxError);
132  assertThrows("new RegExp('\\\\9', 'u')", SyntaxError);
133})();
134
135
136(function TestTooBigHexEscape() {
137  // The hex number inside \u{} has a maximum value.
138  /\u{10ffff}/u
139  new RegExp("\\u{10ffff}", "u")
140  assertThrows("/\\u{110000}/u", SyntaxError);
141  assertThrows("new RegExp('\\\\u{110000}', 'u')", SyntaxError);
142
143  // Without the u flag, they're of course fine ({x} is the count).
144  /\u{110000}/
145  new RegExp("\\u{110000}")
146})();
147
148
149(function TestSyntaxEscapes() {
150  // Syntax escapes work the same with or without the u flag.
151  function helper(r) {
152    assertTrue(r.test("foo[bar"));
153    assertFalse(r.test("foo]bar"));
154  }
155  helper(/foo\[bar/);
156  helper(new RegExp("foo\\[bar"));
157  helper(/foo\[bar/u);
158  helper(new RegExp("foo\\[bar", "u"));
159})();
160
161
162(function TestUnicodeSurrogates() {
163  // U+10E6D corresponds to the surrogate pair [U+D803, U+DE6D].
164  function helper(r) {
165    assertTrue(r.test("foo\u{10e6d}bar"));
166  }
167  helper(/foo\ud803\ude6dbar/u);
168  helper(new RegExp("foo\\ud803\\ude6dbar", "u"));
169})();
170
171
172(function AllFlags() {
173  // Test that we can pass all possible regexp flags and they work properly.
174  function helper1(r) {
175    assertTrue(r.global);
176    assertTrue(r.ignoreCase);
177    assertTrue(r.multiline);
178    assertTrue(r.sticky);
179    assertTrue(r.unicode);
180  }
181
182  helper1(/foo/gimyu);
183  helper1(new RegExp("foo", "gimyu"));
184
185  function helper2(r) {
186    assertFalse(r.global);
187    assertFalse(r.ignoreCase);
188    assertFalse(r.multiline);
189    assertFalse(r.sticky);
190    assertFalse(r.unicode);
191  }
192
193  helper2(/foo/);
194  helper2(new RegExp("foo"));
195})();
196
197
198(function DuplicatedFlags() {
199  // Test that duplicating the u flag is not allowed.
200  assertThrows("/foo/ugu");
201  assertThrows("new RegExp('foo', 'ugu')");
202})();
203
204
205(function ToString() {
206  // Test that the u flag is included in the string representation of regexps.
207  function helper(r) {
208    assertEquals(r.toString(), "/foo/u");
209  }
210  helper(/foo/u);
211  helper(new RegExp("foo", "u"));
212})();
213
214// Non-BMP patterns.
215// Single character atom.
216assertTrue(new RegExp("\u{12345}", "u").test("\u{12345}"));
217assertTrue(/\u{12345}/u.test("\u{12345}"));
218assertTrue(new RegExp("\u{12345}", "u").test("\ud808\udf45"));
219assertTrue(/\u{12345}/u.test("\ud808\udf45"));
220assertFalse(new RegExp("\u{12345}", "u").test("\udf45"));
221assertFalse(/\u{12345}/u.test("\udf45"));
222
223// Multi-character atom.
224assertTrue(new RegExp("\u{12345}\u{23456}", "u").test("a\u{12345}\u{23456}b"));
225assertTrue(/\u{12345}\u{23456}/u.test("b\u{12345}\u{23456}c"));
226assertFalse(new RegExp("\u{12345}\u{23456}", "u").test("a\udf45\u{23456}b"));
227assertFalse(/\u{12345}\u{23456}/u.test("b\udf45\u{23456}c"));
228
229// Disjunction.
230assertTrue(new RegExp("\u{12345}(?:\u{23456})", "u").test(
231    "a\u{12345}\u{23456}b"));
232assertTrue(/\u{12345}(?:\u{23456})/u.test("b\u{12345}\u{23456}c"));
233assertFalse(new RegExp("\u{12345}(?:\u{23456})", "u").test(
234    "a\udf45\u{23456}b"));
235assertFalse(/\u{12345}(?:\u{23456})/u.test("b\udf45\u{23456}c"));
236
237// Alternative.
238assertTrue(new RegExp("\u{12345}|\u{23456}", "u").test("a\u{12345}b"));
239assertTrue(/\u{12345}|\u{23456}/u.test("b\u{23456}c"));
240assertFalse(new RegExp("\u{12345}|\u{23456}", "u").test("a\udf45\ud84db"));
241assertFalse(/\u{12345}|\u{23456}/u.test("b\udf45\ud808c"));
242
243// Capture.
244assertTrue(new RegExp("(\u{12345}|\u{23456}).\\1", "u").test(
245    "\u{12345}b\u{12345}"));
246assertTrue(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{12345}"));
247assertFalse(new RegExp("(\u{12345}|\u{23456}).\\1", "u").test(
248    "\u{12345}b\u{23456}"));
249assertFalse(/(\u{12345}|\u{23456}).\1/u.test("\u{12345}b\u{23456}"));
250
251// Quantifier.
252assertTrue(new RegExp("\u{12345}{3}", "u").test("\u{12345}\u{12345}\u{12345}"));
253assertTrue(/\u{12345}{3}/u.test("\u{12345}\u{12345}\u{12345}"));
254assertTrue(new RegExp("\u{12345}{3}").test("\u{12345}\udf45\udf45"));
255assertTrue(/\ud808\udf45{3}/u.test("\u{12345}\udf45\udf45"));
256assertFalse(new RegExp("\u{12345}{3}", "u").test("\u{12345}\udf45\udf45"));
257assertFalse(/\u{12345}{3}/u.test("\u{12345}\udf45\udf45"));
258