1 /*************************************************
2 *      Perl-Compatible Regular Expressions       *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8                        Written by Philip Hazel
9      Original API code Copyright (c) 1997-2012 University of Cambridge
10          New API code Copyright (c) 2016 University of Cambridge
11 
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15 
16     * Redistributions of source code must retain the above copyright notice,
17       this list of conditions and the following disclaimer.
18 
19     * Redistributions in binary form must reproduce the above copyright
20       notice, this list of conditions and the following disclaimer in the
21       documentation and/or other materials provided with the distribution.
22 
23     * Neither the name of the University of Cambridge nor the names of its
24       contributors may be used to endorse or promote products derived from
25       this software without specific prior written permission.
26 
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40 
41 #ifdef HAVE_CONFIG_H
42 #include "config.h"
43 #endif
44 
45 #include <stdio.h>
46 #include <string.h>
47 
48 #define PCRE2_CODE_UNIT_WIDTH 0
49 #include "pcre2.h"
50 
51 /*
52  Letter characters:
53    \xe6\x92\xad = 0x64ad = 25773 (kanji)
54  Non-letter characters:
55    \xc2\xa1 = 0xa1 =  (Inverted Exclamation Mark)
56    \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
57    \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
58    \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
59  Newlines:
60    \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
61    \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
62  Othercase pairs:
63    \xc3\xa9 = 0xe9 = 233 (e')
64       \xc3\x89 = 0xc9 = 201 (E')
65    \xc3\xa1 = 0xe1 = 225 (a')
66       \xc3\x81 = 0xc1 = 193 (A')
67    \x53 = 0x53 = S
68      \x73 = 0x73 = s
69      \xc5\xbf = 0x17f = 383 (long S)
70    \xc8\xba = 0x23a = 570
71       \xe2\xb1\xa5 = 0x2c65 = 11365
72    \xe1\xbd\xb8 = 0x1f78 = 8056
73       \xe1\xbf\xb8 = 0x1ff8 = 8184
74    \xf0\x90\x90\x80 = 0x10400 = 66560
75       \xf0\x90\x90\xa8 = 0x10428 = 66600
76    \xc7\x84 = 0x1c4 = 452
77      \xc7\x85 = 0x1c5 = 453
78      \xc7\x86 = 0x1c6 = 454
79  Caseless sets:
80    ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
81    ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
82    ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
83 
84  Mark property:
85    \xcc\x8d = 0x30d = 781
86  Special:
87    \xc2\x80 = 0x80 = 128 (lowest 2 byte character)
88    \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
89    \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
90    \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
91    \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
92    \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
93 */
94 
95 static int regression_tests(void);
96 static int invalid_utf8_regression_tests(void);
97 static int invalid_utf16_regression_tests(void);
98 static int invalid_utf32_regression_tests(void);
99 
main(void)100 int main(void)
101 {
102 	int jit = 0;
103 #if defined SUPPORT_PCRE2_8
104 	pcre2_config_8(PCRE2_CONFIG_JIT, &jit);
105 #elif defined SUPPORT_PCRE2_16
106 	pcre2_config_16(PCRE2_CONFIG_JIT, &jit);
107 #elif defined SUPPORT_PCRE2_32
108 	pcre2_config_32(PCRE2_CONFIG_JIT, &jit);
109 #endif
110 	if (!jit) {
111 		printf("JIT must be enabled to run pcre_jit_test\n");
112 		return 1;
113 	}
114 	return regression_tests()
115 		| invalid_utf8_regression_tests()
116 		| invalid_utf16_regression_tests()
117 		| invalid_utf32_regression_tests();
118 }
119 
120 /* --------------------------------------------------------------------------------------- */
121 
122 #if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32)
123 #error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined
124 #endif
125 
126 #define MU	(PCRE2_MULTILINE | PCRE2_UTF)
127 #define MUP	(PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
128 #define CMU	(PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF)
129 #define CMUP	(PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
130 #define M	(PCRE2_MULTILINE)
131 #define MP	(PCRE2_MULTILINE | PCRE2_UCP)
132 #define U	(PCRE2_UTF)
133 #define CM	(PCRE2_CASELESS | PCRE2_MULTILINE)
134 
135 #define BSR(x)	((x) << 16)
136 #define A	PCRE2_NEWLINE_ANYCRLF
137 
138 #define GET_NEWLINE(x)	((x) & 0xffff)
139 #define GET_BSR(x)	((x) >> 16)
140 
141 #define OFFSET_MASK	0x00ffff
142 #define F_NO8		0x010000
143 #define F_NO16		0x020000
144 #define F_NO32		0x020000
145 #define F_NOMATCH	0x040000
146 #define F_DIFF		0x080000
147 #define F_FORCECONV	0x100000
148 #define F_PROPERTY	0x200000
149 
150 struct regression_test_case {
151 	int compile_options;
152 	int newline;
153 	int match_options;
154 	int start_offset;
155 	const char *pattern;
156 	const char *input;
157 };
158 
159 static struct regression_test_case regression_test_cases[] = {
160 	/* Constant strings. */
161 	{ MU, A, 0, 0, "AbC", "AbAbC" },
162 	{ MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
163 	{ CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
164 	{ M, A, 0, 0, "[^a]", "aAbB" },
165 	{ CM, A, 0, 0, "[^m]", "mMnN" },
166 	{ M, A, 0, 0, "a[^b][^#]", "abacd" },
167 	{ CM, A, 0, 0, "A[^B][^E]", "abacd" },
168 	{ CMU, A, 0, 0, "[^x][^#]", "XxBll" },
169 	{ MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" },
170 	{ CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" },
171 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" },
172 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" },
173 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" },
174 	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" },
175 	{ MU, A, 0, 0, "[axd]", "sAXd" },
176 	{ CMU, A, 0, 0, "[axd]", "sAXd" },
177 	{ CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" },
178 	{ MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
179 	{ MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
180 	{ CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
181 	{ MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
182 	{ MU, A, 0, 0, "[^a]", "\xc2\x80[]" },
183 	{ CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
184 	{ CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
185 	{ PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
186 	{ PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
187 	{ PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" },
188 #ifndef NEVER_BACKSLASH_C
189 	{ M, A, 0, 0, "\\Ca", "cda" },
190 	{ CM, A, 0, 0, "\\Ca", "CDA" },
191 	{ M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
192 	{ CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
193 #endif /* !NEVER_BACKSLASH_C */
194 	{ CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
195 	{ CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
196 	{ CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
197 	{ CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
198 	{ M, A, 0, 0, "[3-57-9]", "5" },
199 	{ PCRE2_AUTO_CALLOUT, A, 0, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890",
200 		"12345678901234567890123456789012345678901234567890123456789012345678901234567890" },
201 
202 	/* Assertions. */
203 	{ MU, A, 0, 0, "\\b[^A]", "A_B#" },
204 	{ M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" },
205 	{ MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" },
206 	{ MP, A, 0, 0, "\\B", "_\xa1" },
207 	{ MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," },
208 	{ MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" },
209 	{ MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
210 	{ MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
211 	{ MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
212 	{ MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
213 	{ CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" },
214 	{ M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" },
215 	{ M, A, 0, 1 | F_NOMATCH, "^", "\n" },
216 	{ 0, 0, 0, 0, "^ab", "ab" },
217 	{ 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" },
218 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" },
219 	{ MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
220 	{ M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" },
221 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" },
222 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" },
223 	{ 0, 0, 0, 0, "ab$", "ab" },
224 	{ 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
225 	{ PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" },
226 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" },
227 	{ M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" },
228 	{ MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" },
229 	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" },
230 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" },
231 	{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" },
232 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" },
233 	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" },
234 	{ U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
235 	{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" },
236 	{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" },
237 	{ U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" },
238 	{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" },
239 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" },
240 	{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" },
241 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" },
242 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
243 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
244 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
245 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
246 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
247 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
248 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
249 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
250 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
251 	{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" },
252 	{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
253 	{ M, A, 0, 0, "\\Aa", "aaa" },
254 	{ M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" },
255 	{ M, A, 0, 1, "\\Ga", "aaa" },
256 	{ M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" },
257 	{ M, A, 0, 0, "a\\z", "aaa" },
258 	{ M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" },
259 
260 	/* Brackets and alternatives. */
261 	{ MU, A, 0, 0, "(ab|bb|cd)", "bacde" },
262 	{ MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" },
263 	{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
264 	{ CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
265 	{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
266 	{ MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
267 	{ MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
268 	{ MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
269 	{ MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
270 	{ MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
271 	{ U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" },
272 	{ U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
273 	{ CM, A, 0, 0, "ab|cd", "CD" },
274 	{ CM, A, 0, 0, "a1277|a1377|bX487", "bx487" },
275 	{ CM, A, 0, 0, "a1277|a1377|bx487", "bX487" },
276 
277 	/* Greedy and non-greedy ? operators. */
278 	{ MU, A, 0, 0, "(?:a)?a", "laab" },
279 	{ CMU, A, 0, 0, "(A)?A", "llaab" },
280 	{ MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
281 	{ MU, A, 0, 0, "(a)?a", "manm" },
282 	{ CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
283 	{ MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" },
284 	{ MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
285 
286 	/* Greedy and non-greedy + operators */
287 	{ MU, A, 0, 0, "(aa)+aa", "aaaaaaa" },
288 	{ MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" },
289 	{ MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" },
290 	{ MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" },
291 	{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
292 	{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
293 	{ MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
294 
295 	/* Greedy and non-greedy * operators */
296 	{ CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
297 	{ MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
298 	{ MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" },
299 	{ CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" },
300 	{ MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
301 	{ MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
302 	{ M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
303 	{ M, A, 0, 0, "((?:a|)*){0}a", "a" },
304 
305 	/* Combining ? + * operators */
306 	{ MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
307 	{ MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
308 	{ MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
309 	{ MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
310 	{ MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
311 
312 	/* Single character iterators. */
313 	{ MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
314 	{ MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
315 	{ MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
316 	{ MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
317 	{ MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
318 	{ MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
319 	{ MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
320 	{ MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
321 	{ MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" },
322 	{ MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
323 	{ MU, A, 0, 0, "(a?+[^b])+", "babaacacb" },
324 	{ MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
325 	{ CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
326 	{ CMU, A, 0, 0, "[c-f]+k", "DemmFke" },
327 	{ MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
328 	{ MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
329 	{ CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
330 	{ CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
331 	{ CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" },
332 	{ CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
333 	{ MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
334 	{ CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
335 	{ MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
336 	{ MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
337 	{ MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" },
338 	{ MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
339 	{ CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
340 	{ CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
341 	{ CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
342 	{ CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
343 	{ MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
344 	{ MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
345 	{ MU, A, 0, 0, "\\d+123", "987654321,01234" },
346 	{ MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
347 	{ MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
348 	{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
349 	{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
350 	{ MU, A, 0, 0, ".[ab]*.", "xx" },
351 	{ MU, A, 0, 0, ".[ab]*a", "xxa" },
352 	{ MU, A, 0, 0, ".[ab]?.", "xx" },
353 	{ MU, A, 0, 0, "_[ab]+_*a", "_aa" },
354 
355 	/* Bracket repeats with limit. */
356 	{ MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
357 	{ MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
358 	{ MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
359 	{ MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
360 	{ MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
361 	{ MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
362 	{ MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
363 	{ MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
364 	{ MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" },
365 
366 	/* Basic character sets. */
367 	{ MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
368 	{ MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
369 	{ MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
370 	{ MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
371 	{ MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
372 	{ MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
373 	{ MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" },
374 	{ MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
375 	{ MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" },
376 	{ MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" },
377 	{ MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
378 	{ MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
379 	{ CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
380 	{ CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
381 	{ MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
382 	{ MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
383 	{ MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
384 	{ MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
385 	{ MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
386 	{ MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
387 	{ MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
388 	{ MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
389 	{ CMU, A, 0, 0 | F_NOMATCH, "^[\\x{0100}-\\x{017f}]", " " },
390 
391 	/* Unicode properties. */
392 	{ MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
393 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
394 	{ MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
395 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
396 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
397 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
398 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
399 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
400 	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
401 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
402 	{ MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
403 	{ MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
404 	{ CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
405 	{ MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
406 	{ MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
407 	{ MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
408 	{ CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
409 	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
410 	{ MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
411 	{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB  baaa" },
412 	{ MUP, 0, 0, 0 | F_NOMATCH, "[^\\p{Hangul}\\p{Z}]", " " },
413 
414 	/* Possible empty brackets. */
415 	{ MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
416 	{ MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" },
417 	{ MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
418 	{ MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" },
419 	{ MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
420 	{ MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
421 	{ MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
422 	{ MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
423 	{ MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
424 	{ MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
425 
426 	/* Start offset. */
427 	{ MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
428 	{ MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
429 	{ MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
430 	{ MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" },
431 
432 	/* Newline. */
433 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
434 	{ M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
435 	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." },
436 	{ MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" },
437 	{ MU, A, 0, 1, "^", "\r\n" },
438 	{ M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" },
439 	{ M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" },
440 
441 	/* Any character except newline or any newline. */
442 	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
443 	{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
444 	{ 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
445 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
446 	{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
447 	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
448 	{ 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
449 	{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
450 	{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" },
451 	{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" },
452 	{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" },
453 	{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" },
454 	{ U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
455 	{ MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" },
456 	{ MU, A, 0, 0, "\\R+", "ab\r\n\r" },
457 	{ MU, A, 0, 0, "\\R*", "ab\r\n\r" },
458 	{ MU, A, 0, 0, "\\R*", "\r\n\r" },
459 	{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" },
460 	{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
461 	{ MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
462 	{ MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
463 	{ MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
464 	{ MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" },
465 	{ MU, A, 0, 0, "\\R*\\R\\R", "\n\r" },
466 	{ MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
467 	{ MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
468 
469 	/* Atomic groups (no fallback from "next" direction). */
470 	{ MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
471 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
472 	{ MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
473 			"bababcdedefgheijijklmlmnop" },
474 	{ MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
475 	{ MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
476 	{ MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
477 	{ MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
478 	{ MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" },
479 	{ MU, A, 0, 0, "(?>x|)*$", "aaa" },
480 	{ MU, A, 0, 0, "(?>(x)|)*$", "aaa" },
481 	{ MU, A, 0, 0, "(?>x|())*$", "aaa" },
482 	{ MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
483 	{ MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
484 	{ MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
485 	{ MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
486 	{ MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
487 	{ MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
488 	{ MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
489 	{ MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
490 	{ MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
491 	{ MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
492 	{ MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
493 	{ MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
494 	{ MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
495 	{ MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
496 	{ CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
497 	{ MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
498 	{ MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
499 	{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
500 	{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
501 	{ MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
502 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
503 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
504 	{ MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
505 	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
506 	{ MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" },
507 	{ MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
508 
509 	/* Possessive quantifiers. */
510 	{ MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" },
511 	{ MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
512 	{ MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
513 	{ MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" },
514 	{ MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" },
515 	{ MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" },
516 	{ MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" },
517 	{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" },
518 	{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
519 	{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" },
520 	{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
521 	{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" },
522 	{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" },
523 	{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" },
524 	{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" },
525 	{ MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
526 	{ MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
527 	{ MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
528 	{ MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" },
529 	{ MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" },
530 	{ MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
531 	{ MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
532 	{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" },
533 	{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
534 	{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
535 	{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
536 	{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" },
537 	{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" },
538 	{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" },
539 	{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" },
540 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
541 	{ MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
542 	{ MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
543 	{ MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
544 	{ MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
545 
546 	/* Back references. */
547 	{ MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
548 	{ CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
549 	{ CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" },
550 	{ MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
551 	{ MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
552 	{ MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
553 	{ MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
554 	{ MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
555 	{ MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" },
556 	{ CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
557 	{ MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
558 	{ CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
559 	{ MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
560 	{ CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
561 	{ MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
562 	{ MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
563 	{ M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
564 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
565 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
566 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
567 	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
568 	{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
569 	{ CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
570 	{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
571 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
572 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
573 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
574 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
575 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
576 	{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
577 	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
578 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
579 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
580 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
581 	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
582 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
583 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
584 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
585 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
586 
587 	/* Assertions. */
588 	{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
589 	{ MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
590 	{ MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
591 	{ MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
592 	{ MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
593 	{ M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
594 	{ M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
595 	{ MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
596 	{ MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
597 	{ MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
598 	{ MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
599 	{ MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" },
600 	{ MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" },
601 	{ MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
602 	{ MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
603 	{ MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
604 	{ MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
605 	{ MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
606 	{ MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
607 	{ MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
608 	{ MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
609 	{ MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
610 	{ MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
611 	{ MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
612 	{ MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" },
613 	{ MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" },
614 	{ MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" },
615 	{ MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" },
616 	{ MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" },
617 	{ MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" },
618 	{ MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
619 	{ MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
620 	{ MU, A, 0, 0, "a(?=)b", "ab" },
621 	{ MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" },
622 
623 	/* Not empty, ACCEPT, FAIL */
624 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
625 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*", "bcaad" },
626 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*?", "bcaad" },
627 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
628 	{ MU, A, 0, 0, "a(*ACCEPT)b", "ab" },
629 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
630 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
631 	{ MU, A, PCRE2_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
632 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
633 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
634 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
635 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
636 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
637 	{ MU, A, 0, 0, "((a(*ACCEPT)b))", "ab" },
638 	{ MU, A, 0, 0, "(a(*FAIL)a|a)", "aaa" },
639 	{ MU, A, 0, 0, "(?=ab(*ACCEPT)b)a", "ab" },
640 	{ MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
641 	{ MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
642 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
643 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" },
644 
645 	/* Conditional blocks. */
646 	{ MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
647 	{ MU, A, 0, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
648 	{ MU, A, 0, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
649 	{ MU, A, 0, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
650 	{ MU, A, 0, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
651 	{ MU, A, 0, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
652 	{ MU, A, 0, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
653 	{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
654 	{ MU, A, 0, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
655 	{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
656 	{ MU, A, 0, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
657 	{ MU, A, 0, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
658 	{ MU, A, 0, 0, "(?(?=a)ab)", "a" },
659 	{ MU, A, 0, 0, "(?(?<!b)c)", "b" },
660 	{ MU, A, 0, 0, "(?(DEFINE)a(b))", "a" },
661 	{ MU, A, 0, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
662 	{ MU, A, 0, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
663 	{ MU, A, 0, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
664 	{ MU, A, 0, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
665 	{ MU, A, 0, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
666 	{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
667 	{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cbb" },
668 	{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
669 	{ MU, A, 0, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
670 	{ MU, A, 0, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
671 	{ MU, A, 0, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
672 	{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
673 	{ MU, A, 0, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
674 	{ MU, A, 0, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
675 	{ MU, A, 0, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
676 	{ MU, A, 0, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
677 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
678 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
679 	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
680 	{ MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
681 	{ MU, A, 0, 0, "(?(?!)a|b)", "ab" },
682 	{ MU, A, 0, 0, "(?(?!)a)", "ab" },
683 	{ MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
684 
685 	/* Set start of match. */
686 	{ MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
687 	{ MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
688 	{ MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
689 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
690 	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
691 
692 	/* First line. */
693 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
694 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
695 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" },
696 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" },
697 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" },
698 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" },
699 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" },
700 	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
701 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
702 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" },
703 	{ M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" },
704 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
705 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
706 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
707 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" },
708 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
709 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
710 	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" },
711 	{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" },
712 	{ MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
713 	{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" },
714 	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" },
715 
716 	/* Recurse. */
717 	{ MU, A, 0, 0, "(a)(?1)", "aa" },
718 	{ MU, A, 0, 0, "((a))(?1)", "aa" },
719 	{ MU, A, 0, 0, "(b|a)(?1)", "aa" },
720 	{ MU, A, 0, 0, "(b|(a))(?1)", "aa" },
721 	{ MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
722 	{ MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" },
723 	{ MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
724 	{ MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" },
725 	{ MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
726 	{ MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
727 	{ MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
728 	{ MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
729 	{ MU, A, 0, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
730 	{ MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
731 	{ MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
732 	{ MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
733 	{ MU, A, 0, 0, "b|<(?R)*>", "<<b>" },
734 	{ MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
735 	{ MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
736 	{ MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
737 	{ MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
738 	{ MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
739 	{ MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
740 	{ MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
741 	{ MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
742 	{ MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
743 	{ MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
744 	{ MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
745 	{ MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
746 	{ MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" },
747 	{ MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" },
748 
749 	/* 16 bit specific tests. */
750 	{ CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
751 	{ CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
752 	{ CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
753 	{ CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
754 	{ CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
755 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
756 	{ CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
757 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
758 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
759 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
760 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
761 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
762 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
763 	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
764 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
765 	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
766 	{ M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
767 	{ M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
768 	{ CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
769 	{ CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
770 	{ CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
771 	{ CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
772 	{ CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
773 	{ CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
774 	{ CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
775 	{ M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
776 	{ 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
777 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
778 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
779 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
780 	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
781 
782 	/* Partial matching. */
783 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" },
784 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" },
785 	{ MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" },
786 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" },
787 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
788 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
789 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" },
790 	{ MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" },
791 
792 	/* (*MARK) verb. */
793 	{ MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" },
794 	{ MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
795 	{ MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
796 	{ MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
797 	{ MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" },
798 	{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
799 	{ MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
800 	{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
801 	{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
802 	{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
803 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
804 	{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
805 	{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
806 	{ MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
807 	{ MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" },
808 
809 	/* (*COMMIT) verb. */
810 	{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
811 	{ MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" },
812 	{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
813 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
814 	{ MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
815 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
816 
817 	/* (*PRUNE) verb. */
818 	{ MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" },
819 	{ MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" },
820 	{ MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
821 	{ MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
822 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
823 	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
824 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
825 	{ MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
826 	{ MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
827 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
828 	{ MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
829 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
830 	{ MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
831 	{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
832 	{ MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
833 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
834 	{ MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
835 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
836 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
837 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
838 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
839 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
840 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
841 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
842 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
843 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
844 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
845 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
846 	{ MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
847 	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
848 
849 	/* (*SKIP) verb. */
850 	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
851 	{ MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
852 	{ MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
853 	{ MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
854 
855 	/* (*THEN) verb. */
856 	{ MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
857 	{ MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
858 	{ MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
859 	{ MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
860 	{ MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
861 	{ MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
862 	{ MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
863 	{ MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
864 	{ MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
865 	{ MU, A, 0, 0, "(?=(*THEN: ))* ", " " },
866 	{ MU, A, 0, 0, "a(*THEN)(?R) |", "a" },
867 
868 	/* Recurse and control verbs. */
869 	{ MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" },
870 	{ MU, A, 0, 0, "((a)\\2(*ACCEPT)b){0}a(?1)b", "aaacaaabb" },
871 	{ MU, A, 0, 0, "((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_", "_ababababax_ _ababababa_" },
872 	{ MU, A, 0, 0, "((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_", "_bcdaAdcb_bcdaAdcb_" },
873 	{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_", "_ab_" },
874 	{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)", "_aa_" },
875 	{ MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
876 	{ MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
877 
878 #ifdef SUPPORT_UNICODE
879 	/* Script runs and iterations. */
880 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
881 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
882 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
883 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
884 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
885 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)++#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
886 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)?#", "!ab!abc!ab!ab#" },
887 	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)??#", "!ab!abc!ab!ab#" },
888 #endif
889 
890 	/* Deep recursion. */
891 	{ MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
892 	{ MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
893 	{ MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
894 
895 	/* Deep recursion: Stack limit reached. */
896 	{ M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
897 	{ M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
898 	{ M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
899 	{ M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
900 	{ M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
901 
902 	{ 0, 0, 0, 0, NULL, NULL }
903 };
904 
905 #ifdef SUPPORT_PCRE2_8
callback8(void * arg)906 static pcre2_jit_stack_8* callback8(void *arg)
907 {
908 	return (pcre2_jit_stack_8 *)arg;
909 }
910 #endif
911 
912 #ifdef SUPPORT_PCRE2_16
callback16(void * arg)913 static pcre2_jit_stack_16* callback16(void *arg)
914 {
915 	return (pcre2_jit_stack_16 *)arg;
916 }
917 #endif
918 
919 #ifdef SUPPORT_PCRE2_32
callback32(void * arg)920 static pcre2_jit_stack_32* callback32(void *arg)
921 {
922 	return (pcre2_jit_stack_32 *)arg;
923 }
924 #endif
925 
926 #ifdef SUPPORT_PCRE2_8
927 static pcre2_jit_stack_8 *stack8;
928 
getstack8(void)929 static pcre2_jit_stack_8 *getstack8(void)
930 {
931 	if (!stack8)
932 		stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL);
933 	return stack8;
934 }
935 
setstack8(pcre2_match_context_8 * mcontext)936 static void setstack8(pcre2_match_context_8 *mcontext)
937 {
938 	if (!mcontext) {
939 		if (stack8)
940 			pcre2_jit_stack_free_8(stack8);
941 		stack8 = NULL;
942 		return;
943 	}
944 
945 	pcre2_jit_stack_assign_8(mcontext, callback8, getstack8());
946 }
947 #endif /* SUPPORT_PCRE2_8 */
948 
949 #ifdef SUPPORT_PCRE2_16
950 static pcre2_jit_stack_16 *stack16;
951 
getstack16(void)952 static pcre2_jit_stack_16 *getstack16(void)
953 {
954 	if (!stack16)
955 		stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL);
956 	return stack16;
957 }
958 
setstack16(pcre2_match_context_16 * mcontext)959 static void setstack16(pcre2_match_context_16 *mcontext)
960 {
961 	if (!mcontext) {
962 		if (stack16)
963 			pcre2_jit_stack_free_16(stack16);
964 		stack16 = NULL;
965 		return;
966 	}
967 
968 	pcre2_jit_stack_assign_16(mcontext, callback16, getstack16());
969 }
970 #endif /* SUPPORT_PCRE2_16 */
971 
972 #ifdef SUPPORT_PCRE2_32
973 static pcre2_jit_stack_32 *stack32;
974 
getstack32(void)975 static pcre2_jit_stack_32 *getstack32(void)
976 {
977 	if (!stack32)
978 		stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL);
979 	return stack32;
980 }
981 
setstack32(pcre2_match_context_32 * mcontext)982 static void setstack32(pcre2_match_context_32 *mcontext)
983 {
984 	if (!mcontext) {
985 		if (stack32)
986 			pcre2_jit_stack_free_32(stack32);
987 		stack32 = NULL;
988 		return;
989 	}
990 
991 	pcre2_jit_stack_assign_32(mcontext, callback32, getstack32());
992 }
993 #endif /* SUPPORT_PCRE2_32 */
994 
995 #ifdef SUPPORT_PCRE2_16
996 
convert_utf8_to_utf16(PCRE2_SPTR8 input,PCRE2_UCHAR16 * output,int * offsetmap,int max_length)997 static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length)
998 {
999 	PCRE2_SPTR8 iptr = input;
1000 	PCRE2_UCHAR16 *optr = output;
1001 	unsigned int c;
1002 
1003 	if (max_length == 0)
1004 		return 0;
1005 
1006 	while (*iptr && max_length > 1) {
1007 		c = 0;
1008 		if (offsetmap)
1009 			*offsetmap++ = (int)(iptr - (unsigned char*)input);
1010 
1011 		if (*iptr < 0xc0)
1012 			c = *iptr++;
1013 		else if (!(*iptr & 0x20)) {
1014 			c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1015 			iptr += 2;
1016 		} else if (!(*iptr & 0x10)) {
1017 			c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1018 			iptr += 3;
1019 		} else if (!(*iptr & 0x08)) {
1020 			c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1021 			iptr += 4;
1022 		}
1023 
1024 		if (c < 65536) {
1025 			*optr++ = c;
1026 			max_length--;
1027 		} else if (max_length <= 2) {
1028 			*optr = '\0';
1029 			return (int)(optr - output);
1030 		} else {
1031 			c -= 0x10000;
1032 			*optr++ = 0xd800 | ((c >> 10) & 0x3ff);
1033 			*optr++ = 0xdc00 | (c & 0x3ff);
1034 			max_length -= 2;
1035 			if (offsetmap)
1036 				offsetmap++;
1037 		}
1038 	}
1039 	if (offsetmap)
1040 		*offsetmap = (int)(iptr - (unsigned char*)input);
1041 	*optr = '\0';
1042 	return (int)(optr - output);
1043 }
1044 
copy_char8_to_char16(PCRE2_SPTR8 input,PCRE2_UCHAR16 * output,int max_length)1045 static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length)
1046 {
1047 	PCRE2_SPTR8 iptr = input;
1048 	PCRE2_UCHAR16 *optr = output;
1049 
1050 	if (max_length == 0)
1051 		return 0;
1052 
1053 	while (*iptr && max_length > 1) {
1054 		*optr++ = *iptr++;
1055 		max_length--;
1056 	}
1057 	*optr = '\0';
1058 	return (int)(optr - output);
1059 }
1060 
1061 #define REGTEST_MAX_LENGTH16 4096
1062 static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1063 static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1064 
1065 #endif /* SUPPORT_PCRE2_16 */
1066 
1067 #ifdef SUPPORT_PCRE2_32
1068 
convert_utf8_to_utf32(PCRE2_SPTR8 input,PCRE2_UCHAR32 * output,int * offsetmap,int max_length)1069 static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length)
1070 {
1071 	PCRE2_SPTR8 iptr = input;
1072 	PCRE2_UCHAR32 *optr = output;
1073 	unsigned int c;
1074 
1075 	if (max_length == 0)
1076 		return 0;
1077 
1078 	while (*iptr && max_length > 1) {
1079 		c = 0;
1080 		if (offsetmap)
1081 			*offsetmap++ = (int)(iptr - (unsigned char*)input);
1082 
1083 		if (*iptr < 0xc0)
1084 			c = *iptr++;
1085 		else if (!(*iptr & 0x20)) {
1086 			c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1087 			iptr += 2;
1088 		} else if (!(*iptr & 0x10)) {
1089 			c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1090 			iptr += 3;
1091 		} else if (!(*iptr & 0x08)) {
1092 			c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1093 			iptr += 4;
1094 		}
1095 
1096 		*optr++ = c;
1097 		max_length--;
1098 	}
1099 	if (offsetmap)
1100 		*offsetmap = (int)(iptr - (unsigned char*)input);
1101 	*optr = 0;
1102 	return (int)(optr - output);
1103 }
1104 
copy_char8_to_char32(PCRE2_SPTR8 input,PCRE2_UCHAR32 * output,int max_length)1105 static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length)
1106 {
1107 	PCRE2_SPTR8 iptr = input;
1108 	PCRE2_UCHAR32 *optr = output;
1109 
1110 	if (max_length == 0)
1111 		return 0;
1112 
1113 	while (*iptr && max_length > 1) {
1114 		*optr++ = *iptr++;
1115 		max_length--;
1116 	}
1117 	*optr = '\0';
1118 	return (int)(optr - output);
1119 }
1120 
1121 #define REGTEST_MAX_LENGTH32 4096
1122 static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1123 static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1124 
1125 #endif /* SUPPORT_PCRE2_32 */
1126 
check_ascii(const char * input)1127 static int check_ascii(const char *input)
1128 {
1129 	const unsigned char *ptr = (unsigned char *)input;
1130 	while (*ptr) {
1131 		if (*ptr > 127)
1132 			return 0;
1133 		ptr++;
1134 	}
1135 	return 1;
1136 }
1137 
1138 #define OVECTOR_SIZE 15
1139 
regression_tests(void)1140 static int regression_tests(void)
1141 {
1142 	struct regression_test_case *current = regression_test_cases;
1143 	int error;
1144 	PCRE2_SIZE err_offs;
1145 	int is_successful;
1146 	int is_ascii;
1147 	int total = 0;
1148 	int successful = 0;
1149 	int successful_row = 0;
1150 	int counter = 0;
1151 	int jit_compile_mode;
1152 	int utf = 0;
1153 	int disabled_options = 0;
1154 	int i;
1155 #ifdef SUPPORT_PCRE2_8
1156 	pcre2_code_8 *re8;
1157 	pcre2_compile_context_8 *ccontext8;
1158 	pcre2_match_data_8 *mdata8_1;
1159 	pcre2_match_data_8 *mdata8_2;
1160 	pcre2_match_context_8 *mcontext8;
1161 	PCRE2_SIZE *ovector8_1 = NULL;
1162 	PCRE2_SIZE *ovector8_2 = NULL;
1163 	int return_value8[2];
1164 #endif
1165 #ifdef SUPPORT_PCRE2_16
1166 	pcre2_code_16 *re16;
1167 	pcre2_compile_context_16 *ccontext16;
1168 	pcre2_match_data_16 *mdata16_1;
1169 	pcre2_match_data_16 *mdata16_2;
1170 	pcre2_match_context_16 *mcontext16;
1171 	PCRE2_SIZE *ovector16_1 = NULL;
1172 	PCRE2_SIZE *ovector16_2 = NULL;
1173 	int return_value16[2];
1174 	int length16;
1175 #endif
1176 #ifdef SUPPORT_PCRE2_32
1177 	pcre2_code_32 *re32;
1178 	pcre2_compile_context_32 *ccontext32;
1179 	pcre2_match_data_32 *mdata32_1;
1180 	pcre2_match_data_32 *mdata32_2;
1181 	pcre2_match_context_32 *mcontext32;
1182 	PCRE2_SIZE *ovector32_1 = NULL;
1183 	PCRE2_SIZE *ovector32_2 = NULL;
1184 	int return_value32[2];
1185 	int length32;
1186 #endif
1187 
1188 #if defined SUPPORT_PCRE2_8
1189 	PCRE2_UCHAR8 cpu_info[128];
1190 #elif defined SUPPORT_PCRE2_16
1191 	PCRE2_UCHAR16 cpu_info[128];
1192 #elif defined SUPPORT_PCRE2_32
1193 	PCRE2_UCHAR32 cpu_info[128];
1194 #endif
1195 #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1196 	int return_value;
1197 #endif
1198 
1199 	/* This test compares the behaviour of interpreter and JIT. Although disabling
1200 	utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
1201 	still considered successful from pcre_jit_test point of view. */
1202 
1203 #if defined SUPPORT_PCRE2_8
1204 	pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info);
1205 #elif defined SUPPORT_PCRE2_16
1206 	pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info);
1207 #elif defined SUPPORT_PCRE2_32
1208 	pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info);
1209 #endif
1210 
1211 	printf("Running JIT regression tests\n");
1212 	printf("  target CPU of SLJIT compiler: ");
1213 	for (i = 0; cpu_info[i]; i++)
1214 		printf("%c", (char)(cpu_info[i]));
1215 	printf("\n");
1216 
1217 #if defined SUPPORT_PCRE2_8
1218 	pcre2_config_8(PCRE2_CONFIG_UNICODE, &utf);
1219 #elif defined SUPPORT_PCRE2_16
1220 	pcre2_config_16(PCRE2_CONFIG_UNICODE, &utf);
1221 #elif defined SUPPORT_PCRE2_32
1222 	pcre2_config_32(PCRE2_CONFIG_UNICODE, &utf);
1223 #endif
1224 
1225 	if (!utf)
1226 		disabled_options |= PCRE2_UTF;
1227 #ifdef SUPPORT_PCRE2_8
1228 	printf("  in  8 bit mode with UTF-8  %s:\n", utf ? "enabled" : "disabled");
1229 #endif
1230 #ifdef SUPPORT_PCRE2_16
1231 	printf("  in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled");
1232 #endif
1233 #ifdef SUPPORT_PCRE2_32
1234 	printf("  in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled");
1235 #endif
1236 
1237 	while (current->pattern) {
1238 		/* printf("\nPattern: %s :\n", current->pattern); */
1239 		total++;
1240 		is_ascii = 0;
1241 		if (!(current->start_offset & F_PROPERTY))
1242 			is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
1243 
1244 		if (current->match_options & PCRE2_PARTIAL_SOFT)
1245 			jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT;
1246 		else if (current->match_options & PCRE2_PARTIAL_HARD)
1247 			jit_compile_mode = PCRE2_JIT_PARTIAL_HARD;
1248 		else
1249 			jit_compile_mode = PCRE2_JIT_COMPLETE;
1250 		error = 0;
1251 #ifdef SUPPORT_PCRE2_8
1252 		re8 = NULL;
1253 		ccontext8 = pcre2_compile_context_create_8(NULL);
1254 		if (ccontext8) {
1255 			if (GET_NEWLINE(current->newline))
1256 				pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline));
1257 			if (GET_BSR(current->newline))
1258 				pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline));
1259 
1260 			if (!(current->start_offset & F_NO8)) {
1261 				re8 = pcre2_compile_8((PCRE2_SPTR8)current->pattern, PCRE2_ZERO_TERMINATED,
1262 					current->compile_options & ~disabled_options,
1263 					&error, &err_offs, ccontext8);
1264 
1265 				if (!re8 && (utf || is_ascii))
1266 					printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1267 			}
1268 			pcre2_compile_context_free_8(ccontext8);
1269 		}
1270 		else
1271 			printf("\n8 bit: Cannot allocate compile context\n");
1272 #endif
1273 #ifdef SUPPORT_PCRE2_16
1274 		if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1275 			convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1276 		else
1277 			copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1278 
1279 		re16 = NULL;
1280 		ccontext16 = pcre2_compile_context_create_16(NULL);
1281 		if (ccontext16) {
1282 			if (GET_NEWLINE(current->newline))
1283 				pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline));
1284 			if (GET_BSR(current->newline))
1285 				pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline));
1286 
1287 			if (!(current->start_offset & F_NO16)) {
1288 				re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED,
1289 					current->compile_options & ~disabled_options,
1290 					&error, &err_offs, ccontext16);
1291 
1292 				if (!re16 && (utf || is_ascii))
1293 					printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1294 			}
1295 			pcre2_compile_context_free_16(ccontext16);
1296 		}
1297 		else
1298 			printf("\n16 bit: Cannot allocate compile context\n");
1299 #endif
1300 #ifdef SUPPORT_PCRE2_32
1301 		if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1302 			convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1303 		else
1304 			copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1305 
1306 		re32 = NULL;
1307 		ccontext32 = pcre2_compile_context_create_32(NULL);
1308 		if (ccontext32) {
1309 			if (GET_NEWLINE(current->newline))
1310 				pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline));
1311 			if (GET_BSR(current->newline))
1312 				pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline));
1313 
1314 			if (!(current->start_offset & F_NO32)) {
1315 				re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED,
1316 					current->compile_options & ~disabled_options,
1317 					&error, &err_offs, ccontext32);
1318 
1319 				if (!re32 && (utf || is_ascii))
1320 					printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1321 			}
1322 			pcre2_compile_context_free_32(ccontext32);
1323 		}
1324 		else
1325 			printf("\n32 bit: Cannot allocate compile context\n");
1326 #endif
1327 
1328 		counter++;
1329 		if ((counter & 0x3) != 0) {
1330 #ifdef SUPPORT_PCRE2_8
1331 			setstack8(NULL);
1332 #endif
1333 #ifdef SUPPORT_PCRE2_16
1334 			setstack16(NULL);
1335 #endif
1336 #ifdef SUPPORT_PCRE2_32
1337 			setstack32(NULL);
1338 #endif
1339 		}
1340 
1341 #ifdef SUPPORT_PCRE2_8
1342 		return_value8[0] = -1000;
1343 		return_value8[1] = -1000;
1344 		mdata8_1 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1345 		mdata8_2 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1346 		mcontext8 = pcre2_match_context_create_8(NULL);
1347 		if (!mdata8_1 || !mdata8_2 || !mcontext8) {
1348 			printf("\n8 bit: Cannot allocate match data\n");
1349 			pcre2_match_data_free_8(mdata8_1);
1350 			pcre2_match_data_free_8(mdata8_2);
1351 			pcre2_match_context_free_8(mcontext8);
1352 			pcre2_code_free_8(re8);
1353 			re8 = NULL;
1354 		} else {
1355 			ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1);
1356 			ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2);
1357 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1358 				ovector8_1[i] = -2;
1359 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1360 				ovector8_2[i] = -2;
1361 			pcre2_set_match_limit_8(mcontext8, 10000000);
1362 		}
1363 		if (re8) {
1364 			return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1365 				current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
1366 
1367 			if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
1368 				printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1369 			} else if ((counter & 0x1) != 0) {
1370 				setstack8(mcontext8);
1371 				return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1372 					current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1373 			} else {
1374 				pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8());
1375 				return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1376 					current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1377 			}
1378 		}
1379 #endif
1380 
1381 #ifdef SUPPORT_PCRE2_16
1382 		return_value16[0] = -1000;
1383 		return_value16[1] = -1000;
1384 		mdata16_1 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1385 		mdata16_2 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1386 		mcontext16 = pcre2_match_context_create_16(NULL);
1387 		if (!mdata16_1 || !mdata16_2 || !mcontext16) {
1388 			printf("\n16 bit: Cannot allocate match data\n");
1389 			pcre2_match_data_free_16(mdata16_1);
1390 			pcre2_match_data_free_16(mdata16_2);
1391 			pcre2_match_context_free_16(mcontext16);
1392 			pcre2_code_free_16(re16);
1393 			re16 = NULL;
1394 		} else {
1395 			ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1);
1396 			ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2);
1397 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1398 				ovector16_1[i] = -2;
1399 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1400 				ovector16_2[i] = -2;
1401 			pcre2_set_match_limit_16(mcontext16, 10000000);
1402 		}
1403 		if (re16) {
1404 			if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1405 				length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1406 			else
1407 				length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1408 
1409 			return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
1410 				current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
1411 
1412 			if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
1413 				printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1414 			} else if ((counter & 0x1) != 0) {
1415 				setstack16(mcontext16);
1416 				return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16,
1417 					current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1418 			} else {
1419 				pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16());
1420 				return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16,
1421 					current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1422 			}
1423 		}
1424 #endif
1425 
1426 #ifdef SUPPORT_PCRE2_32
1427 		return_value32[0] = -1000;
1428 		return_value32[1] = -1000;
1429 		mdata32_1 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1430 		mdata32_2 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1431 		mcontext32 = pcre2_match_context_create_32(NULL);
1432 		if (!mdata32_1 || !mdata32_2 || !mcontext32) {
1433 			printf("\n32 bit: Cannot allocate match data\n");
1434 			pcre2_match_data_free_32(mdata32_1);
1435 			pcre2_match_data_free_32(mdata32_2);
1436 			pcre2_match_context_free_32(mcontext32);
1437 			pcre2_code_free_32(re32);
1438 			re32 = NULL;
1439 		} else {
1440 			ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1);
1441 			ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2);
1442 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1443 				ovector32_1[i] = -2;
1444 			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1445 				ovector32_2[i] = -2;
1446 			pcre2_set_match_limit_32(mcontext32, 10000000);
1447 		}
1448 		if (re32) {
1449 			if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1450 				length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1451 			else
1452 				length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1453 
1454 			return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
1455 				current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
1456 
1457 			if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
1458 				printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1459 			} else if ((counter & 0x1) != 0) {
1460 				setstack32(mcontext32);
1461 				return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32,
1462 					current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1463 			} else {
1464 				pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32());
1465 				return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32,
1466 					current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1467 			}
1468 		}
1469 #endif
1470 
1471 		/* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1472 			return_value8[0], return_value16[0], return_value32[0],
1473 			(int)ovector8_1[0], (int)ovector8_1[1],
1474 			(int)ovector16_1[0], (int)ovector16_1[1],
1475 			(int)ovector32_1[0], (int)ovector32_1[1],
1476 			(current->compile_options & PCRE2_CASELESS) ? "C" : ""); */
1477 
1478 		/* If F_DIFF is set, just run the test, but do not compare the results.
1479 		Segfaults can still be captured. */
1480 
1481 		is_successful = 1;
1482 		if (!(current->start_offset & F_DIFF)) {
1483 #if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1484 			if (!(current->start_offset & F_FORCECONV)) {
1485 
1486 				/* All results must be the same. */
1487 #ifdef SUPPORT_PCRE2_8
1488 				if ((return_value = return_value8[0]) != return_value8[1]) {
1489 					printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1490 						return_value8[0], return_value8[1], total, current->pattern, current->input);
1491 					is_successful = 0;
1492 				} else
1493 #endif
1494 #ifdef SUPPORT_PCRE2_16
1495 				if ((return_value = return_value16[0]) != return_value16[1]) {
1496 					printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1497 						return_value16[0], return_value16[1], total, current->pattern, current->input);
1498 					is_successful = 0;
1499 				} else
1500 #endif
1501 #ifdef SUPPORT_PCRE2_32
1502 				if ((return_value = return_value32[0]) != return_value32[1]) {
1503 					printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1504 						return_value32[0], return_value32[1], total, current->pattern, current->input);
1505 					is_successful = 0;
1506 				} else
1507 #endif
1508 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1509 				if (return_value8[0] != return_value16[0]) {
1510 					printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1511 						return_value8[0], return_value16[0],
1512 						total, current->pattern, current->input);
1513 					is_successful = 0;
1514 				} else
1515 #endif
1516 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1517 				if (return_value8[0] != return_value32[0]) {
1518 					printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1519 						return_value8[0], return_value32[0],
1520 						total, current->pattern, current->input);
1521 					is_successful = 0;
1522 				} else
1523 #endif
1524 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1525 				if (return_value16[0] != return_value32[0]) {
1526 					printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1527 						return_value16[0], return_value32[0],
1528 						total, current->pattern, current->input);
1529 					is_successful = 0;
1530 				} else
1531 #endif
1532 				if (return_value >= 0 || return_value == PCRE2_ERROR_PARTIAL) {
1533 					if (return_value == PCRE2_ERROR_PARTIAL) {
1534 						return_value = 2;
1535 					} else {
1536 						return_value *= 2;
1537 					}
1538 #ifdef SUPPORT_PCRE2_8
1539 					return_value8[0] = return_value;
1540 #endif
1541 #ifdef SUPPORT_PCRE2_16
1542 					return_value16[0] = return_value;
1543 #endif
1544 #ifdef SUPPORT_PCRE2_32
1545 					return_value32[0] = return_value;
1546 #endif
1547 					/* Transform back the results. */
1548 					if (current->compile_options & PCRE2_UTF) {
1549 #ifdef SUPPORT_PCRE2_16
1550 						for (i = 0; i < return_value; ++i) {
1551 							if (ovector16_1[i] != PCRE2_UNSET)
1552 								ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1553 							if (ovector16_2[i] != PCRE2_UNSET)
1554 								ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1555 						}
1556 #endif
1557 #ifdef SUPPORT_PCRE2_32
1558 						for (i = 0; i < return_value; ++i) {
1559 							if (ovector32_1[i] != PCRE2_UNSET)
1560 								ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1561 							if (ovector32_2[i] != PCRE2_UNSET)
1562 								ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1563 						}
1564 #endif
1565 					}
1566 
1567 					for (i = 0; i < return_value; ++i) {
1568 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1569 						if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1570 							printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1571 								i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i],
1572 								total, current->pattern, current->input);
1573 							is_successful = 0;
1574 						}
1575 #endif
1576 #if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1577 						if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1578 							printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1579 								i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1580 								total, current->pattern, current->input);
1581 							is_successful = 0;
1582 						}
1583 #endif
1584 #if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1585 						if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
1586 							printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1587 								i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1588 								total, current->pattern, current->input);
1589 							is_successful = 0;
1590 						}
1591 #endif
1592 					}
1593 				}
1594 			} else
1595 #endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */
1596 			{
1597 #ifdef SUPPORT_PCRE2_8
1598 				if (return_value8[0] != return_value8[1]) {
1599 					printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1600 						return_value8[0], return_value8[1], total, current->pattern, current->input);
1601 					is_successful = 0;
1602 				} else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) {
1603 					if (return_value8[0] == PCRE2_ERROR_PARTIAL)
1604 						return_value8[0] = 2;
1605 					else
1606 						return_value8[0] *= 2;
1607 
1608 					for (i = 0; i < return_value8[0]; ++i)
1609 						if (ovector8_1[i] != ovector8_2[i]) {
1610 							printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1611 								i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input);
1612 							is_successful = 0;
1613 						}
1614 				}
1615 #endif
1616 
1617 #ifdef SUPPORT_PCRE2_16
1618 				if (return_value16[0] != return_value16[1]) {
1619 					printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1620 						return_value16[0], return_value16[1], total, current->pattern, current->input);
1621 					is_successful = 0;
1622 				} else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) {
1623 					if (return_value16[0] == PCRE2_ERROR_PARTIAL)
1624 						return_value16[0] = 2;
1625 					else
1626 						return_value16[0] *= 2;
1627 
1628 					for (i = 0; i < return_value16[0]; ++i)
1629 						if (ovector16_1[i] != ovector16_2[i]) {
1630 							printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1631 								i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input);
1632 							is_successful = 0;
1633 						}
1634 				}
1635 #endif
1636 
1637 #ifdef SUPPORT_PCRE2_32
1638 				if (return_value32[0] != return_value32[1]) {
1639 					printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1640 						return_value32[0], return_value32[1], total, current->pattern, current->input);
1641 					is_successful = 0;
1642 				} else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) {
1643 					if (return_value32[0] == PCRE2_ERROR_PARTIAL)
1644 						return_value32[0] = 2;
1645 					else
1646 						return_value32[0] *= 2;
1647 
1648 					for (i = 0; i < return_value32[0]; ++i)
1649 						if (ovector32_1[i] != ovector32_2[i]) {
1650 							printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1651 								i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input);
1652 							is_successful = 0;
1653 						}
1654 				}
1655 #endif
1656 			}
1657 		}
1658 
1659 		if (is_successful) {
1660 #ifdef SUPPORT_PCRE2_8
1661 			if (!(current->start_offset & F_NO8) && (utf || is_ascii)) {
1662 				if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1663 					printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1664 						total, current->pattern, current->input);
1665 					is_successful = 0;
1666 				}
1667 
1668 				if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1669 					printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1670 						total, current->pattern, current->input);
1671 					is_successful = 0;
1672 				}
1673 			}
1674 #endif
1675 #ifdef SUPPORT_PCRE2_16
1676 			if (!(current->start_offset & F_NO16) && (utf || is_ascii)) {
1677 				if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1678 					printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1679 						total, current->pattern, current->input);
1680 					is_successful = 0;
1681 				}
1682 
1683 				if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1684 					printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1685 						total, current->pattern, current->input);
1686 					is_successful = 0;
1687 				}
1688 			}
1689 #endif
1690 #ifdef SUPPORT_PCRE2_32
1691 			if (!(current->start_offset & F_NO32) && (utf || is_ascii)) {
1692 				if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1693 					printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1694 						total, current->pattern, current->input);
1695 					is_successful = 0;
1696 				}
1697 
1698 				if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1699 					printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1700 						total, current->pattern, current->input);
1701 					is_successful = 0;
1702 				}
1703 			}
1704 #endif
1705 		}
1706 
1707 		if (is_successful) {
1708 #ifdef SUPPORT_PCRE2_8
1709 			if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) {
1710 				printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1711 					total, current->pattern, current->input);
1712 				is_successful = 0;
1713 			}
1714 #endif
1715 #ifdef SUPPORT_PCRE2_16
1716 			if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) {
1717 				printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1718 					total, current->pattern, current->input);
1719 				is_successful = 0;
1720 			}
1721 #endif
1722 #ifdef SUPPORT_PCRE2_32
1723 			if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) {
1724 				printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1725 					total, current->pattern, current->input);
1726 				is_successful = 0;
1727 			}
1728 #endif
1729 		}
1730 
1731 #ifdef SUPPORT_PCRE2_8
1732 		pcre2_code_free_8(re8);
1733 		pcre2_match_data_free_8(mdata8_1);
1734 		pcre2_match_data_free_8(mdata8_2);
1735 		pcre2_match_context_free_8(mcontext8);
1736 #endif
1737 #ifdef SUPPORT_PCRE2_16
1738 		pcre2_code_free_16(re16);
1739 		pcre2_match_data_free_16(mdata16_1);
1740 		pcre2_match_data_free_16(mdata16_2);
1741 		pcre2_match_context_free_16(mcontext16);
1742 #endif
1743 #ifdef SUPPORT_PCRE2_32
1744 		pcre2_code_free_32(re32);
1745 		pcre2_match_data_free_32(mdata32_1);
1746 		pcre2_match_data_free_32(mdata32_2);
1747 		pcre2_match_context_free_32(mcontext32);
1748 #endif
1749 
1750 		if (is_successful) {
1751 			successful++;
1752 			successful_row++;
1753 			printf(".");
1754 			if (successful_row >= 60) {
1755 				successful_row = 0;
1756 				printf("\n");
1757 			}
1758 		} else
1759 			successful_row = 0;
1760 
1761 		fflush(stdout);
1762 		current++;
1763 	}
1764 #ifdef SUPPORT_PCRE2_8
1765 	setstack8(NULL);
1766 #endif
1767 #ifdef SUPPORT_PCRE2_16
1768 	setstack16(NULL);
1769 #endif
1770 #ifdef SUPPORT_PCRE2_32
1771 	setstack32(NULL);
1772 #endif
1773 
1774 	if (total == successful) {
1775 		printf("\nAll JIT regression tests are successfully passed.\n");
1776 		return 0;
1777 	} else {
1778 		printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1779 		return 1;
1780 	}
1781 }
1782 
1783 #if defined SUPPORT_UNICODE
1784 
check_invalid_utf_result(int pattern_index,const char * type,int result,int match_start,int match_end,PCRE2_SIZE * ovector)1785 static int check_invalid_utf_result(int pattern_index, const char *type, int result,
1786 	int match_start, int match_end, PCRE2_SIZE *ovector)
1787 {
1788 	if (match_start < 0) {
1789 		if (result != -1) {
1790 			printf("Pattern[%d] %s result is not -1.\n", pattern_index, type);
1791 			return 1;
1792 		}
1793 		return 0;
1794 	}
1795 
1796 	if (result <= 0) {
1797 		printf("Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index, type, result);
1798 		return 1;
1799 	}
1800 
1801 	if (ovector[0] != (PCRE2_SIZE)match_start) {
1802 		printf("Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n",
1803 			pattern_index, type, (int)ovector[0], match_start);
1804 		return 1;
1805 	}
1806 
1807 	if (ovector[1] != (PCRE2_SIZE)match_end) {
1808 		printf("Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n",
1809 			pattern_index, type, (int)ovector[1], match_end);
1810 		return 1;
1811 	}
1812 
1813 	return 0;
1814 }
1815 
1816 #endif /* SUPPORT_UNICODE */
1817 
1818 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
1819 
1820 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
1821 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
1822 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
1823 
1824 struct invalid_utf8_regression_test_case {
1825 	int compile_options;
1826 	int jit_compile_options;
1827 	int start_offset;
1828 	int skip_left;
1829 	int skip_right;
1830 	int match_start;
1831 	int match_end;
1832 	const char *pattern[2];
1833 	const char *input;
1834 };
1835 
1836 static const char invalid_utf8_newline_cr;
1837 
1838 static const struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
1839 	{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1840 	{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf0\x90\x80\x80" },
1841 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
1842 	{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1843 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
1844 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
1845 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
1846 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf#" },
1847 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf" },
1848 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80#" },
1849 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80" },
1850 	{ UDA, CI, 0, 0, 2, -1, -1, { ".", NULL }, "\xef\xbf\xbf#" },
1851 	{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xef\xbf\xbf" },
1852 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\x7f#" },
1853 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\xc0" },
1854 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
1855 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf" },
1856 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xed\x9f\xbf#" },
1857 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xa0\x80#" },
1858 	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xee\x80\x80#" },
1859 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xbf\xbf#" },
1860 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf##" },
1861 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf#" },
1862 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf" },
1863 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80##" },
1864 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80#" },
1865 	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80" },
1866 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80##" },
1867 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0##" },
1868 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80" },
1869 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0" },
1870 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf##" },
1871 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf" },
1872 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80###" },
1873 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80" },
1874 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8###" },
1875 	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8" },
1876 	{ UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
1877 
1878 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
1879 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80\xf4\xa0\x80\x80" },
1880 	{ UDA, CPI, 4, 1, 1, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf" },
1881 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" },
1882 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" },
1883 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
1884 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
1885 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf\xf0\x8f\xbf\xbf" },
1886 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80\xf5\x80\x80\x80" },
1887 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80\xf4\x90\x80\x80" },
1888 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff\xf4\x8f\xbf\xff" },
1889 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf\xf4\x8f\xff\xbf" },
1890 	{ UDA, CPI, 4, 0, 1, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80\xef\x80\x80" },
1891 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80\x80\x80\x80\x80" },
1892 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf\xe0\x9f\xbf#" },
1893 	{ UDA, CPI, 4, 2, 2, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80\xe0\xa0\x80#" },
1894 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80\xf0\x80\x80#" },
1895 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80\xed\xa0\x80#" },
1896 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" },
1897 	{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" },
1898 	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" },
1899 	{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" },
1900 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf\xc1\xbf##" },
1901 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0\xdf\xc0##" },
1902 	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80\xe0\x80##" },
1903 
1904 	{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" },
1905 	{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" },
1906 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf\xe0\x9f\xbf" },
1907 	{ UDA, CPI, 3, 1, 1, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf\xef\xbf\xbf" },
1908 	{ UDA, CPI, 3, 0, 1, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80\xdf\x80" },
1909 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff\xef\xbf\xff" },
1910 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf\xef\xff\xbf" },
1911 	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf\xed\xbf\xbf" },
1912 
1913 	{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" },
1914 	{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" },
1915 	{ UDA, CPI, 2, 1, 1, -1, -1, { "\\B", "\\b" }, "\xdf\xbf\xdf\xbf" },
1916 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf\xc1\xbf" },
1917 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80\xe0\x80" },
1918 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff\xdf\xff" },
1919 	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf\xff\xbf" },
1920 
1921 	{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" },
1922 	{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" },
1923 	{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80" },
1924 	{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\xb0\xb0" },
1925 
1926 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
1927 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" },
1928 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1929 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1930 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
1931 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1932 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1933 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1934 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1935 
1936 	{ UDA, CPI, 0, 0, 0, 0, 1, { "\\X", NULL }, "A" },
1937 	{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xff" },
1938 	{ UDA, CPI, 0, 0, 0, 0, 2, { "\\X", NULL }, "\xc3\xa1" },
1939 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xc3\xa1" },
1940 	{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xc3\x7f" },
1941 	{ UDA, CPI, 0, 0, 0, 0, 3, { "\\X", NULL }, "\xe1\xbd\xb8" },
1942 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
1943 	{ UDA, CPI, 0, 0, 0, 0, 4, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1944 	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1945 
1946 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "#" },
1947 	{ UDA, CPI, 0, 0, 0, 0, 4, { "[^#]", NULL }, "\xf4\x8f\xbf\xbf" },
1948 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xf4\x90\x80\x80" },
1949 	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xc1\x80" },
1950 
1951 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { "^\\W", NULL }, " \x0a#"},
1952 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 14, 15, { "^\\W", NULL }, " \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"},
1953 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf8\x0a#"},
1954 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xc3\x0a#"},
1955 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf1\x0a#"},
1956 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xf2\xbf\x0a#"},
1957 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \xf2\xbf\xbf\x0a#"},
1958 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xef\x0a#"},
1959 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xef\xbf\x0a#"},
1960 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \x85#\xc2\x85#"},
1961 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 7, 8, { "^\\W", NULL }, " \xe2\x80\xf8\xe2\x80\xa8#"},
1962 
1963 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xe2\x80\xf8\xe2\x80\xa8#"},
1964 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 3, 4, { "#", NULL }, "\xe2\x80\xf8#\xe2\x80\xa8#"},
1965 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "abcd\xc2\x85#"},
1966 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 1, 2, { "#", NULL }, "\x85#\xc2\x85#"},
1967 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 5, 6, { "#", NULL }, "\xef,\x80,\xf8#\x0a"},
1968 	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xef,\x80,\xf8\x0a#"},
1969 
1970 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1971 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1972 	{ PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1973 	{ PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1974 
1975 	{ PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" },
1976 
1977 	/* These two are not invalid UTF tests, but this infrastructure fits better for them. */
1978 	{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" },
1979 	{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" },
1980 
1981 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 0, 0, 0, -1, -1, { "^.a", &invalid_utf8_newline_cr }, "\xc3\xa7#a" },
1982 
1983 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
1984 };
1985 
1986 #undef UDA
1987 #undef CI
1988 #undef CPI
1989 
run_invalid_utf8_test(const struct invalid_utf8_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_8 * ccontext,pcre2_match_data_8 * mdata)1990 static int run_invalid_utf8_test(const struct invalid_utf8_regression_test_case *current,
1991 	int pattern_index, int i, pcre2_compile_context_8 *ccontext, pcre2_match_data_8 *mdata)
1992 {
1993 	pcre2_code_8 *code;
1994 	int result, errorcode;
1995 	PCRE2_SIZE length, erroroffset;
1996 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_8(mdata);
1997 
1998 	if (current->pattern[i] == NULL)
1999 		return 1;
2000 
2001 	code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
2002 		current->compile_options, &errorcode, &erroroffset, ccontext);
2003 
2004 	if (!code) {
2005 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2006 		return 0;
2007 	}
2008 
2009 	if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) {
2010 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2011 		pcre2_code_free_8(code);
2012 		return 0;
2013 	}
2014 
2015 	length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right);
2016 
2017 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2018 		result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2019 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2020 
2021 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2022 			pcre2_code_free_8(code);
2023 			return 0;
2024 		}
2025 	}
2026 
2027 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2028 		result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2029 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2030 
2031 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2032 			pcre2_code_free_8(code);
2033 			return 0;
2034 		}
2035 	}
2036 
2037 	pcre2_code_free_8(code);
2038 	return 1;
2039 }
2040 
invalid_utf8_regression_tests(void)2041 static int invalid_utf8_regression_tests(void)
2042 {
2043 	const struct invalid_utf8_regression_test_case *current;
2044 	pcre2_compile_context_8 *ccontext;
2045 	pcre2_match_data_8 *mdata;
2046 	int total = 0, successful = 0;
2047 	int result;
2048 
2049 	printf("\nRunning invalid-utf8 JIT regression tests\n");
2050 
2051 	ccontext = pcre2_compile_context_create_8(NULL);
2052 	pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
2053 	mdata = pcre2_match_data_create_8(4, NULL);
2054 
2055 	for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
2056 		/* printf("\nPattern: %s :\n", current->pattern); */
2057 		total++;
2058 
2059 		result = 1;
2060 		if (current->pattern[1] != &invalid_utf8_newline_cr)
2061 		{
2062 			if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2063 				result = 0;
2064 			if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata))
2065 				result = 0;
2066 		} else {
2067 			pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_CR);
2068 			if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2069 				result = 0;
2070 			pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
2071 		}
2072 
2073 		if (result) {
2074 			successful++;
2075 		}
2076 
2077 		printf(".");
2078 		if ((total % 60) == 0)
2079 			printf("\n");
2080 	}
2081 
2082 	if ((total % 60) != 0)
2083 		printf("\n");
2084 
2085 	pcre2_match_data_free_8(mdata);
2086 	pcre2_compile_context_free_8(ccontext);
2087 
2088 	if (total == successful) {
2089 		printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
2090 		return 0;
2091 	} else {
2092 		printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2093 		return 1;
2094 	}
2095 }
2096 
2097 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */
2098 
invalid_utf8_regression_tests(void)2099 static int invalid_utf8_regression_tests(void)
2100 {
2101 	return 0;
2102 }
2103 
2104 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */
2105 
2106 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16
2107 
2108 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2109 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2110 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2111 
2112 struct invalid_utf16_regression_test_case {
2113 	int compile_options;
2114 	int jit_compile_options;
2115 	int start_offset;
2116 	int skip_left;
2117 	int skip_right;
2118 	int match_start;
2119 	int match_end;
2120 	const PCRE2_UCHAR16 *pattern[2];
2121 	const PCRE2_UCHAR16 *input;
2122 };
2123 
2124 static PCRE2_UCHAR16 allany16[] = { '.', 0 };
2125 static PCRE2_UCHAR16 non_word_boundary16[] = { '\\', 'B', 0 };
2126 static PCRE2_UCHAR16 word_boundary16[] = { '\\', 'b', 0 };
2127 static PCRE2_UCHAR16 backreference16[] = { '(', '.', ')', '\\', '1', 0 };
2128 static PCRE2_UCHAR16 grapheme16[] = { '\\', 'X', 0 };
2129 static PCRE2_UCHAR16 nothashmark16[] = { '[', '^', '#', ']', 0 };
2130 static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 };
2131 static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 };
2132 static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
2133 static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, 0xd800, 0xdc00, 0 };
2134 static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, 0xdbff, 0xdfff, 0 };
2135 static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, 0xd800, 0xdbff, 0 };
2136 static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, 0xdc00, '#', 0 };
2137 static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 };
2138 static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
2139 static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 };
2140 static PCRE2_UCHAR16 test16_9[] = { ' ', 0x2028, '#', 0 };
2141 static PCRE2_UCHAR16 test16_10[] = { ' ', 0xdc00, 0xd800, 0x2028, '#', 0 };
2142 static PCRE2_UCHAR16 test16_11[] = { 0xdc00, 0xdc00, 0xd800, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2143 static PCRE2_UCHAR16 test16_12[] = { '#', 0xd800, 0xdc00, 0xd800, '#', 0xd800, 0xdc00, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2144 
2145 static const struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
2146 	{ UDA, CI, 0, 0, 0, 0, 1, { allany16, NULL }, test16_1 },
2147 	{ UDA, CI, 1, 0, 0, 1, 2, { allany16, NULL }, test16_1 },
2148 	{ UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 },
2149 	{ UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 },
2150 	{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 },
2151 	{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_2 },
2152 	{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 },
2153 	{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 },
2154 	{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_3 },
2155 	{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 },
2156 
2157 	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 },
2158 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_1 },
2159 	{ UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary16, NULL }, test16_1 },
2160 	{ UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 },
2161 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 },
2162 	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 },
2163 	{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
2164 	{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
2165 	{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 },
2166 	{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 },
2167 
2168 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference16, NULL }, test16_6 },
2169 	{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference16, NULL }, test16_6 },
2170 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference16, NULL }, test16_7 },
2171 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference16, NULL }, test16_7 },
2172 
2173 	{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme16, NULL }, test16_6 },
2174 	{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme16, NULL }, test16_6 },
2175 	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme16, NULL }, test16_6 },
2176 	{ UDA, CPI, 0, 0, 0, 0, 2, { grapheme16, NULL }, test16_7 },
2177 	{ UDA, CPI, 2, 0, 0, 2, 4, { grapheme16, NULL }, test16_7 },
2178 	{ UDA, CPI, 1, 0, 0, -1, -1, { grapheme16, NULL }, test16_7 },
2179 
2180 	{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2181 	{ UDA, CPI, 1, 0, 0, 1, 3, { nothashmark16, NULL }, test16_8 },
2182 	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2183 
2184 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl16, NULL }, test16_9 },
2185 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { afternl16, NULL }, test16_10 },
2186 
2187 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2188 	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2189 	{ PCRE2_UTF, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2190 	{ PCRE2_UTF, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2191 
2192 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2193 };
2194 
2195 #undef UDA
2196 #undef CI
2197 #undef CPI
2198 
run_invalid_utf16_test(const struct invalid_utf16_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_16 * ccontext,pcre2_match_data_16 * mdata)2199 static int run_invalid_utf16_test(const struct invalid_utf16_regression_test_case *current,
2200 	int pattern_index, int i, pcre2_compile_context_16 *ccontext, pcre2_match_data_16 *mdata)
2201 {
2202 	pcre2_code_16 *code;
2203 	int result, errorcode;
2204 	PCRE2_SIZE length, erroroffset;
2205 	const PCRE2_UCHAR16 *input;
2206 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(mdata);
2207 
2208 	if (current->pattern[i] == NULL)
2209 		return 1;
2210 
2211 	code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
2212 		current->compile_options, &errorcode, &erroroffset, ccontext);
2213 
2214 	if (!code) {
2215 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2216 		return 0;
2217 	}
2218 
2219 	if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) {
2220 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2221 		pcre2_code_free_16(code);
2222 		return 0;
2223 	}
2224 
2225 	input = current->input;
2226 	length = 0;
2227 
2228 	while (*input++ != 0)
2229 		length++;
2230 
2231 	length -= current->skip_left + current->skip_right;
2232 
2233 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2234 		result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2235 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2236 
2237 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2238 			pcre2_code_free_16(code);
2239 			return 0;
2240 		}
2241 	}
2242 
2243 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2244 		result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2245 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2246 
2247 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2248 			pcre2_code_free_16(code);
2249 			return 0;
2250 		}
2251 	}
2252 
2253 	pcre2_code_free_16(code);
2254 	return 1;
2255 }
2256 
invalid_utf16_regression_tests(void)2257 static int invalid_utf16_regression_tests(void)
2258 {
2259 	const struct invalid_utf16_regression_test_case *current;
2260 	pcre2_compile_context_16 *ccontext;
2261 	pcre2_match_data_16 *mdata;
2262 	int total = 0, successful = 0;
2263 	int result;
2264 
2265 	printf("\nRunning invalid-utf16 JIT regression tests\n");
2266 
2267 	ccontext = pcre2_compile_context_create_16(NULL);
2268 	pcre2_set_newline_16(ccontext, PCRE2_NEWLINE_ANY);
2269 	mdata = pcre2_match_data_create_16(4, NULL);
2270 
2271 	for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
2272 		/* printf("\nPattern: %s :\n", current->pattern); */
2273 		total++;
2274 
2275 		result = 1;
2276 		if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata))
2277 			result = 0;
2278 		if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata))
2279 			result = 0;
2280 
2281 		if (result) {
2282 			successful++;
2283 		}
2284 
2285 		printf(".");
2286 		if ((total % 60) == 0)
2287 			printf("\n");
2288 	}
2289 
2290 	if ((total % 60) != 0)
2291 		printf("\n");
2292 
2293 	pcre2_match_data_free_16(mdata);
2294 	pcre2_compile_context_free_16(ccontext);
2295 
2296 	if (total == successful) {
2297 		printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
2298 		return 0;
2299 	} else {
2300 		printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2301 		return 1;
2302 	}
2303 }
2304 
2305 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */
2306 
invalid_utf16_regression_tests(void)2307 static int invalid_utf16_regression_tests(void)
2308 {
2309 	return 0;
2310 }
2311 
2312 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */
2313 
2314 #if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_32
2315 
2316 #define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2317 #define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2318 #define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2319 
2320 struct invalid_utf32_regression_test_case {
2321 	int compile_options;
2322 	int jit_compile_options;
2323 	int start_offset;
2324 	int skip_left;
2325 	int skip_right;
2326 	int match_start;
2327 	int match_end;
2328 	const PCRE2_UCHAR32 *pattern[2];
2329 	const PCRE2_UCHAR32 *input;
2330 };
2331 
2332 static PCRE2_UCHAR32 allany32[] = { '.', 0 };
2333 static PCRE2_UCHAR32 non_word_boundary32[] = { '\\', 'B', 0 };
2334 static PCRE2_UCHAR32 word_boundary32[] = { '\\', 'b', 0 };
2335 static PCRE2_UCHAR32 backreference32[] = { '(', '.', ')', '\\', '1', 0 };
2336 static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 };
2337 static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 };
2338 static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 };
2339 static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x110000, 0x10ffff, 0 };
2340 static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0xdfff, 0xd800, 0 };
2341 static PCRE2_UCHAR32 test32_3[] = { 'a', 'A', 0x110000, 0 };
2342 static PCRE2_UCHAR32 test32_4[] = { '#', 0x10ffff, 0x110000, 0 };
2343 static PCRE2_UCHAR32 test32_5[] = { ' ', 0x2028, '#', 0 };
2344 static PCRE2_UCHAR32 test32_6[] = { ' ', 0x110000, 0x2028, '#', 0 };
2345 
2346 static const struct invalid_utf32_regression_test_case invalid_utf32_regression_test_cases[] = {
2347 	{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_1 },
2348 	{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_1 },
2349 	{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_2 },
2350 	{ UDA, CI, 1, 0, 0, 1, 2, { allany32, NULL }, test32_2 },
2351 	{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
2352 	{ UDA, CI, 3, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
2353 
2354 	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 },
2355 	{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
2356 	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_2 },
2357 	{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2358 	{ UDA, CPI, 6, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2359 
2360 	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_3 },
2361 	{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_3 },
2362 
2363 	{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme32, NULL }, test32_1 },
2364 	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_1 },
2365 	{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme32, NULL }, test32_2 },
2366 	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
2367 	{ UDA, CPI, 3, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
2368 	{ UDA, CPI, 4, 0, 0, 4, 5, { grapheme32, NULL }, test32_2 },
2369 
2370 	{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
2371 	{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_4 },
2372 	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
2373 	{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_2 },
2374 	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_2 },
2375 
2376 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl32, NULL }, test32_5 },
2377 	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { afternl32, NULL }, test32_6 },
2378 
2379 	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2380 };
2381 
2382 #undef UDA
2383 #undef CI
2384 #undef CPI
2385 
run_invalid_utf32_test(const struct invalid_utf32_regression_test_case * current,int pattern_index,int i,pcre2_compile_context_32 * ccontext,pcre2_match_data_32 * mdata)2386 static int run_invalid_utf32_test(const struct invalid_utf32_regression_test_case *current,
2387 	int pattern_index, int i, pcre2_compile_context_32 *ccontext, pcre2_match_data_32 *mdata)
2388 {
2389 	pcre2_code_32 *code;
2390 	int result, errorcode;
2391 	PCRE2_SIZE length, erroroffset;
2392 	const PCRE2_UCHAR32 *input;
2393 	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(mdata);
2394 
2395 	if (current->pattern[i] == NULL)
2396 		return 1;
2397 
2398 	code = pcre2_compile_32(current->pattern[i], PCRE2_ZERO_TERMINATED,
2399 		current->compile_options, &errorcode, &erroroffset, ccontext);
2400 
2401 	if (!code) {
2402 		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2403 		return 0;
2404 	}
2405 
2406 	if (pcre2_jit_compile_32(code, current->jit_compile_options) != 0) {
2407 		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2408 		pcre2_code_free_32(code);
2409 		return 0;
2410 	}
2411 
2412 	input = current->input;
2413 	length = 0;
2414 
2415 	while (*input++ != 0)
2416 		length++;
2417 
2418 	length -= current->skip_left + current->skip_right;
2419 
2420 	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2421 		result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2422 			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2423 
2424 		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2425 			pcre2_code_free_32(code);
2426 			return 0;
2427 		}
2428 	}
2429 
2430 	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2431 		result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2432 			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2433 
2434 		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2435 			pcre2_code_free_32(code);
2436 			return 0;
2437 		}
2438 	}
2439 
2440 	pcre2_code_free_32(code);
2441 	return 1;
2442 }
2443 
invalid_utf32_regression_tests(void)2444 static int invalid_utf32_regression_tests(void)
2445 {
2446 	const struct invalid_utf32_regression_test_case *current;
2447 	pcre2_compile_context_32 *ccontext;
2448 	pcre2_match_data_32 *mdata;
2449 	int total = 0, successful = 0;
2450 	int result;
2451 
2452 	printf("\nRunning invalid-utf32 JIT regression tests\n");
2453 
2454 	ccontext = pcre2_compile_context_create_32(NULL);
2455 	pcre2_set_newline_32(ccontext, PCRE2_NEWLINE_ANY);
2456 	mdata = pcre2_match_data_create_32(4, NULL);
2457 
2458 	for (current = invalid_utf32_regression_test_cases; current->pattern[0]; current++) {
2459 		/* printf("\nPattern: %s :\n", current->pattern); */
2460 		total++;
2461 
2462 		result = 1;
2463 		if (!run_invalid_utf32_test(current, total - 1, 0, ccontext, mdata))
2464 			result = 0;
2465 		if (!run_invalid_utf32_test(current, total - 1, 1, ccontext, mdata))
2466 			result = 0;
2467 
2468 		if (result) {
2469 			successful++;
2470 		}
2471 
2472 		printf(".");
2473 		if ((total % 60) == 0)
2474 			printf("\n");
2475 	}
2476 
2477 	if ((total % 60) != 0)
2478 		printf("\n");
2479 
2480 	pcre2_match_data_free_32(mdata);
2481 	pcre2_compile_context_free_32(ccontext);
2482 
2483 	if (total == successful) {
2484 		printf("\nAll invalid UTF32 JIT regression tests are successfully passed.\n");
2485 		return 0;
2486 	} else {
2487 		printf("\nInvalid UTF32 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2488 		return 1;
2489 	}
2490 }
2491 
2492 #else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_32 */
2493 
invalid_utf32_regression_tests(void)2494 static int invalid_utf32_regression_tests(void)
2495 {
2496 	return 0;
2497 }
2498 
2499 #endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_32 */
2500 
2501 /* End of pcre2_jit_test.c */
2502