1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2004-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File reapits.c
11 *
12 *********************************************************************************/
13 /*C API TEST FOR Regular Expressions */
14 /**
15 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
16 * try to test the full functionality. It just calls each function and verifies that it
17 * works on a basic level.
18 *
19 * More complete testing of regular expression functionality is done with the C++ tests.
20 **/
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
25
26 #include <stdlib.h>
27 #include <string.h>
28 #include "unicode/uloc.h"
29 #include "unicode/uregex.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utext.h"
32 #include "unicode/utf8.h"
33 #include "cintltst.h"
34 #include "cmemory.h"
35
36 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
37 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
38
39 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
40 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
41
42 /*
43 * TEST_SETUP and TEST_TEARDOWN
44 * macros to handle the boilerplate around setting up regex test cases.
45 * parameteres to setup:
46 * pattern: The regex pattern, a (char *) null terminated C string.
47 * testString: The string data, also a (char *) C string.
48 * flags: Regex flags to set when compiling the pattern
49 *
50 * Put arbitrary test code between SETUP and TEARDOWN.
51 * 're" is the compiled, ready-to-go regular expression.
52 */
53 #define TEST_SETUP(pattern, testString, flags) { \
54 UChar *srcString = NULL; \
55 status = U_ZERO_ERROR; \
56 re = uregex_openC(pattern, flags, NULL, &status); \
57 TEST_ASSERT_SUCCESS(status); \
58 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
59 u_uastrncpy(srcString, testString, strlen(testString)+1); \
60 uregex_setText(re, srcString, -1, &status); \
61 TEST_ASSERT_SUCCESS(status); \
62 if (U_SUCCESS(status)) {
63
64 #define TEST_TEARDOWN \
65 } \
66 TEST_ASSERT_SUCCESS(status); \
67 uregex_close(re); \
68 free(srcString); \
69 }
70
71
72 /**
73 * @param expected utf-8 array of bytes to be expected
74 */
test_assert_string(const char * expected,const UChar * actual,UBool nulTerm,const char * file,int line)75 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
76 char buf_inside_macro[120];
77 int32_t len = (int32_t)strlen(expected);
78 UBool success;
79 if (nulTerm) {
80 u_austrncpy(buf_inside_macro, (actual), len+1);
81 buf_inside_macro[len+2] = 0;
82 success = (strcmp((expected), buf_inside_macro) == 0);
83 } else {
84 u_austrncpy(buf_inside_macro, (actual), len);
85 buf_inside_macro[len+1] = 0;
86 success = (strncmp((expected), buf_inside_macro, len) == 0);
87 }
88 if (success == FALSE) {
89 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
90 file, line, (expected), buf_inside_macro);
91 }
92 }
93
94 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
95
96
equals_utf8_utext(const char * utf8,UText * utext)97 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
98 int32_t u8i = 0;
99 UChar32 u8c = 0;
100 UChar32 utc = 0;
101 UBool stringsEqual = TRUE;
102 utext_setNativeIndex(utext, 0);
103 for (;;) {
104 U8_NEXT_UNSAFE(utf8, u8i, u8c);
105 utc = utext_next32(utext);
106 if (u8c == 0 && utc == U_SENTINEL) {
107 break;
108 }
109 if (u8c != utc || u8c == 0) {
110 stringsEqual = FALSE;
111 break;
112 }
113 }
114 return stringsEqual;
115 }
116
117
test_assert_utext(const char * expected,UText * actual,const char * file,int line)118 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
119 utext_setNativeIndex(actual, 0);
120 if (!equals_utf8_utext(expected, actual)) {
121 UChar32 c;
122 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
123 c = utext_next32From(actual, 0);
124 while (c != U_SENTINEL) {
125 if (0x20<c && c <0x7e) {
126 log_err("%c", c);
127 } else {
128 log_err("%#x", c);
129 }
130 c = UTEXT_NEXT32(actual);
131 }
132 log_err("\"\n");
133 }
134 }
135
136 /*
137 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
138 * Note: Expected is a UTF-8 encoded string, _not_ the system code page.
139 */
140 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
141
testUTextEqual(UText * uta,UText * utb)142 static UBool testUTextEqual(UText *uta, UText *utb) {
143 UChar32 ca = 0;
144 UChar32 cb = 0;
145 utext_setNativeIndex(uta, 0);
146 utext_setNativeIndex(utb, 0);
147 do {
148 ca = utext_next32(uta);
149 cb = utext_next32(utb);
150 if (ca != cb) {
151 break;
152 }
153 } while (ca != U_SENTINEL);
154 return ca == cb;
155 }
156
157
158
159
160 static void TestRegexCAPI(void);
161 static void TestBug4315(void);
162 static void TestUTextAPI(void);
163 static void TestRefreshInput(void);
164 static void TestBug8421(void);
165 static void TestBug10815(void);
166
167 void addURegexTest(TestNode** root);
168
addURegexTest(TestNode ** root)169 void addURegexTest(TestNode** root)
170 {
171 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
172 addTest(root, &TestBug4315, "regex/TestBug4315");
173 addTest(root, &TestUTextAPI, "regex/TestUTextAPI");
174 addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
175 addTest(root, &TestBug8421, "regex/TestBug8421");
176 addTest(root, &TestBug10815, "regex/TestBug10815");
177 }
178
179 /*
180 * Call back function and context struct used for testing
181 * regular expression user callbacks. This test is mostly the same as
182 * the corresponding C++ test in intltest.
183 */
184 typedef struct callBackContext {
185 int32_t maxCalls;
186 int32_t numCalls;
187 int32_t lastSteps;
188 } callBackContext;
189
190 static UBool U_EXPORT2 U_CALLCONV
TestCallbackFn(const void * context,int32_t steps)191 TestCallbackFn(const void *context, int32_t steps) {
192 callBackContext *info = (callBackContext *)context;
193 if (info->lastSteps+1 != steps) {
194 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps);
195 }
196 info->lastSteps = steps;
197 info->numCalls++;
198 return (info->numCalls < info->maxCalls);
199 }
200
201 /*
202 * Regular Expression C API Tests
203 */
TestRegexCAPI(void)204 static void TestRegexCAPI(void) {
205 UErrorCode status = U_ZERO_ERROR;
206 URegularExpression *re;
207 UChar pat[200];
208 UChar *minus1;
209
210 memset(&minus1, -1, sizeof(minus1));
211
212 /* Mimimalist open/close */
213 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
214 re = uregex_open(pat, -1, 0, 0, &status);
215 if (U_FAILURE(status)) {
216 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
217 return;
218 }
219 uregex_close(re);
220
221 /* Open with all flag values set */
222 status = U_ZERO_ERROR;
223 re = uregex_open(pat, -1,
224 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
225 0, &status);
226 TEST_ASSERT_SUCCESS(status);
227 uregex_close(re);
228
229 /* Open with an invalid flag */
230 status = U_ZERO_ERROR;
231 re = uregex_open(pat, -1, 0x40000000, 0, &status);
232 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
233 uregex_close(re);
234
235 /* Open with an unimplemented flag */
236 status = U_ZERO_ERROR;
237 re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
238 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
239 uregex_close(re);
240
241 /* openC with an invalid parameter */
242 status = U_ZERO_ERROR;
243 re = uregex_openC(NULL,
244 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
245 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
246
247 /* openC with an invalid parameter */
248 status = U_USELESS_COLLATOR_ERROR;
249 re = uregex_openC(NULL,
250 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
251 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
252
253 /* openC open from a C string */
254 {
255 const UChar *p;
256 int32_t len;
257 status = U_ZERO_ERROR;
258 re = uregex_openC("abc*", 0, 0, &status);
259 TEST_ASSERT_SUCCESS(status);
260 p = uregex_pattern(re, &len, &status);
261 TEST_ASSERT_SUCCESS(status);
262
263 /* The TEST_ASSERT_SUCCESS above should change too... */
264 if(U_SUCCESS(status)) {
265 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
266 TEST_ASSERT(u_strcmp(pat, p) == 0);
267 TEST_ASSERT(len==(int32_t)strlen("abc*"));
268 }
269
270 uregex_close(re);
271
272 /* TODO: Open with ParseError parameter */
273 }
274
275 /*
276 * clone
277 */
278 {
279 URegularExpression *clone1;
280 URegularExpression *clone2;
281 URegularExpression *clone3;
282 UChar testString1[30];
283 UChar testString2[30];
284 UBool result;
285
286
287 status = U_ZERO_ERROR;
288 re = uregex_openC("abc*", 0, 0, &status);
289 TEST_ASSERT_SUCCESS(status);
290 clone1 = uregex_clone(re, &status);
291 TEST_ASSERT_SUCCESS(status);
292 TEST_ASSERT(clone1 != NULL);
293
294 status = U_ZERO_ERROR;
295 clone2 = uregex_clone(re, &status);
296 TEST_ASSERT_SUCCESS(status);
297 TEST_ASSERT(clone2 != NULL);
298 uregex_close(re);
299
300 status = U_ZERO_ERROR;
301 clone3 = uregex_clone(clone2, &status);
302 TEST_ASSERT_SUCCESS(status);
303 TEST_ASSERT(clone3 != NULL);
304
305 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
306 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
307
308 status = U_ZERO_ERROR;
309 uregex_setText(clone1, testString1, -1, &status);
310 TEST_ASSERT_SUCCESS(status);
311 result = uregex_lookingAt(clone1, 0, &status);
312 TEST_ASSERT_SUCCESS(status);
313 TEST_ASSERT(result==TRUE);
314
315 status = U_ZERO_ERROR;
316 uregex_setText(clone2, testString2, -1, &status);
317 TEST_ASSERT_SUCCESS(status);
318 result = uregex_lookingAt(clone2, 0, &status);
319 TEST_ASSERT_SUCCESS(status);
320 TEST_ASSERT(result==FALSE);
321 result = uregex_find(clone2, 0, &status);
322 TEST_ASSERT_SUCCESS(status);
323 TEST_ASSERT(result==TRUE);
324
325 uregex_close(clone1);
326 uregex_close(clone2);
327 uregex_close(clone3);
328
329 }
330
331 /*
332 * pattern()
333 */
334 {
335 const UChar *resultPat;
336 int32_t resultLen;
337 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
338 status = U_ZERO_ERROR;
339 re = uregex_open(pat, -1, 0, NULL, &status);
340 resultPat = uregex_pattern(re, &resultLen, &status);
341 TEST_ASSERT_SUCCESS(status);
342
343 /* The TEST_ASSERT_SUCCESS above should change too... */
344 if (U_SUCCESS(status)) {
345 TEST_ASSERT(resultLen == -1);
346 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
347 }
348
349 uregex_close(re);
350
351 status = U_ZERO_ERROR;
352 re = uregex_open(pat, 3, 0, NULL, &status);
353 resultPat = uregex_pattern(re, &resultLen, &status);
354 TEST_ASSERT_SUCCESS(status);
355 TEST_ASSERT_SUCCESS(status);
356
357 /* The TEST_ASSERT_SUCCESS above should change too... */
358 if (U_SUCCESS(status)) {
359 TEST_ASSERT(resultLen == 3);
360 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
361 TEST_ASSERT(u_strlen(resultPat) == 3);
362 }
363
364 uregex_close(re);
365 }
366
367 /*
368 * flags()
369 */
370 {
371 int32_t t;
372
373 status = U_ZERO_ERROR;
374 re = uregex_open(pat, -1, 0, NULL, &status);
375 t = uregex_flags(re, &status);
376 TEST_ASSERT_SUCCESS(status);
377 TEST_ASSERT(t == 0);
378 uregex_close(re);
379
380 status = U_ZERO_ERROR;
381 re = uregex_open(pat, -1, 0, NULL, &status);
382 t = uregex_flags(re, &status);
383 TEST_ASSERT_SUCCESS(status);
384 TEST_ASSERT(t == 0);
385 uregex_close(re);
386
387 status = U_ZERO_ERROR;
388 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
389 t = uregex_flags(re, &status);
390 TEST_ASSERT_SUCCESS(status);
391 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
392 uregex_close(re);
393 }
394
395 /*
396 * setText() and lookingAt()
397 */
398 {
399 UChar text1[50];
400 UChar text2[50];
401 UBool result;
402
403 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
404 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
405 status = U_ZERO_ERROR;
406 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
407 re = uregex_open(pat, -1, 0, NULL, &status);
408 TEST_ASSERT_SUCCESS(status);
409
410 /* Operation before doing a setText should fail... */
411 status = U_ZERO_ERROR;
412 uregex_lookingAt(re, 0, &status);
413 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
414
415 status = U_ZERO_ERROR;
416 uregex_setText(re, text1, -1, &status);
417 result = uregex_lookingAt(re, 0, &status);
418 TEST_ASSERT(result == TRUE);
419 TEST_ASSERT_SUCCESS(status);
420
421 status = U_ZERO_ERROR;
422 uregex_setText(re, text2, -1, &status);
423 result = uregex_lookingAt(re, 0, &status);
424 TEST_ASSERT(result == FALSE);
425 TEST_ASSERT_SUCCESS(status);
426
427 status = U_ZERO_ERROR;
428 uregex_setText(re, text1, -1, &status);
429 result = uregex_lookingAt(re, 0, &status);
430 TEST_ASSERT(result == TRUE);
431 TEST_ASSERT_SUCCESS(status);
432
433 status = U_ZERO_ERROR;
434 uregex_setText(re, text1, 5, &status);
435 result = uregex_lookingAt(re, 0, &status);
436 TEST_ASSERT(result == FALSE);
437 TEST_ASSERT_SUCCESS(status);
438
439 status = U_ZERO_ERROR;
440 uregex_setText(re, text1, 6, &status);
441 result = uregex_lookingAt(re, 0, &status);
442 TEST_ASSERT(result == TRUE);
443 TEST_ASSERT_SUCCESS(status);
444
445 uregex_close(re);
446 }
447
448
449 /*
450 * getText()
451 */
452 {
453 UChar text1[50];
454 UChar text2[50];
455 const UChar *result;
456 int32_t textLength;
457
458 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
459 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
460 status = U_ZERO_ERROR;
461 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
462 re = uregex_open(pat, -1, 0, NULL, &status);
463
464 uregex_setText(re, text1, -1, &status);
465 result = uregex_getText(re, &textLength, &status);
466 TEST_ASSERT(result == text1);
467 TEST_ASSERT(textLength == -1);
468 TEST_ASSERT_SUCCESS(status);
469
470 status = U_ZERO_ERROR;
471 uregex_setText(re, text2, 7, &status);
472 result = uregex_getText(re, &textLength, &status);
473 TEST_ASSERT(result == text2);
474 TEST_ASSERT(textLength == 7);
475 TEST_ASSERT_SUCCESS(status);
476
477 status = U_ZERO_ERROR;
478 uregex_setText(re, text2, 4, &status);
479 result = uregex_getText(re, &textLength, &status);
480 TEST_ASSERT(result == text2);
481 TEST_ASSERT(textLength == 4);
482 TEST_ASSERT_SUCCESS(status);
483 uregex_close(re);
484 }
485
486 /*
487 * matches()
488 */
489 {
490 UChar text1[50];
491 UBool result;
492 int len;
493 UChar nullString[] = {0,0,0};
494
495 u_uastrncpy(text1, "abcccde", UPRV_LENGTHOF(text1));
496 status = U_ZERO_ERROR;
497 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
498 re = uregex_open(pat, -1, 0, NULL, &status);
499
500 uregex_setText(re, text1, -1, &status);
501 result = uregex_matches(re, 0, &status);
502 TEST_ASSERT(result == FALSE);
503 TEST_ASSERT_SUCCESS(status);
504
505 status = U_ZERO_ERROR;
506 uregex_setText(re, text1, 6, &status);
507 result = uregex_matches(re, 0, &status);
508 TEST_ASSERT(result == TRUE);
509 TEST_ASSERT_SUCCESS(status);
510
511 status = U_ZERO_ERROR;
512 uregex_setText(re, text1, 6, &status);
513 result = uregex_matches(re, 1, &status);
514 TEST_ASSERT(result == FALSE);
515 TEST_ASSERT_SUCCESS(status);
516 uregex_close(re);
517
518 status = U_ZERO_ERROR;
519 re = uregex_openC(".?", 0, NULL, &status);
520 uregex_setText(re, text1, -1, &status);
521 len = u_strlen(text1);
522 result = uregex_matches(re, len, &status);
523 TEST_ASSERT(result == TRUE);
524 TEST_ASSERT_SUCCESS(status);
525
526 status = U_ZERO_ERROR;
527 uregex_setText(re, nullString, -1, &status);
528 TEST_ASSERT_SUCCESS(status);
529 result = uregex_matches(re, 0, &status);
530 TEST_ASSERT(result == TRUE);
531 TEST_ASSERT_SUCCESS(status);
532 uregex_close(re);
533 }
534
535
536 /*
537 * lookingAt() Used in setText test.
538 */
539
540
541 /*
542 * find(), findNext, start, end, reset
543 */
544 {
545 UChar text1[50];
546 UBool result;
547 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
548 status = U_ZERO_ERROR;
549 re = uregex_openC("rx", 0, NULL, &status);
550
551 uregex_setText(re, text1, -1, &status);
552 result = uregex_find(re, 0, &status);
553 TEST_ASSERT(result == TRUE);
554 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
555 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
556 TEST_ASSERT_SUCCESS(status);
557
558 result = uregex_find(re, 9, &status);
559 TEST_ASSERT(result == TRUE);
560 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
561 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
562 TEST_ASSERT_SUCCESS(status);
563
564 result = uregex_find(re, 14, &status);
565 TEST_ASSERT(result == FALSE);
566 TEST_ASSERT_SUCCESS(status);
567
568 status = U_ZERO_ERROR;
569 uregex_reset(re, 0, &status);
570
571 result = uregex_findNext(re, &status);
572 TEST_ASSERT(result == TRUE);
573 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
574 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
575 TEST_ASSERT_SUCCESS(status);
576
577 result = uregex_findNext(re, &status);
578 TEST_ASSERT(result == TRUE);
579 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
580 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
581 TEST_ASSERT_SUCCESS(status);
582
583 status = U_ZERO_ERROR;
584 uregex_reset(re, 12, &status);
585
586 result = uregex_findNext(re, &status);
587 TEST_ASSERT(result == TRUE);
588 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
589 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
590 TEST_ASSERT_SUCCESS(status);
591
592 result = uregex_findNext(re, &status);
593 TEST_ASSERT(result == FALSE);
594 TEST_ASSERT_SUCCESS(status);
595
596 uregex_close(re);
597 }
598
599 /*
600 * groupCount
601 */
602 {
603 int32_t result;
604
605 status = U_ZERO_ERROR;
606 re = uregex_openC("abc", 0, NULL, &status);
607 result = uregex_groupCount(re, &status);
608 TEST_ASSERT_SUCCESS(status);
609 TEST_ASSERT(result == 0);
610 uregex_close(re);
611
612 status = U_ZERO_ERROR;
613 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
614 result = uregex_groupCount(re, &status);
615 TEST_ASSERT_SUCCESS(status);
616 TEST_ASSERT(result == 3);
617 uregex_close(re);
618
619 }
620
621
622 /*
623 * group()
624 */
625 {
626 UChar text1[80];
627 UChar buf[80];
628 UBool result;
629 int32_t resultSz;
630 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
631
632 status = U_ZERO_ERROR;
633 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
634 TEST_ASSERT_SUCCESS(status);
635
636
637 uregex_setText(re, text1, -1, &status);
638 result = uregex_find(re, 0, &status);
639 TEST_ASSERT(result==TRUE);
640
641 /* Capture Group 0, the full match. Should succeed. */
642 status = U_ZERO_ERROR;
643 resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
644 TEST_ASSERT_SUCCESS(status);
645 TEST_ASSERT_STRING("abc interior def", buf, TRUE);
646 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
647
648 /* Capture group #1. Should succeed. */
649 status = U_ZERO_ERROR;
650 resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
651 TEST_ASSERT_SUCCESS(status);
652 TEST_ASSERT_STRING(" interior ", buf, TRUE);
653 TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
654
655 /* Capture group out of range. Error. */
656 status = U_ZERO_ERROR;
657 uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
658 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
659
660 /* NULL buffer, pure pre-flight */
661 status = U_ZERO_ERROR;
662 resultSz = uregex_group(re, 0, NULL, 0, &status);
663 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
664 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
665
666 /* Too small buffer, truncated string */
667 status = U_ZERO_ERROR;
668 memset(buf, -1, sizeof(buf));
669 resultSz = uregex_group(re, 0, buf, 5, &status);
670 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
671 TEST_ASSERT_STRING("abc i", buf, FALSE);
672 TEST_ASSERT(buf[5] == (UChar)0xffff);
673 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
674
675 /* Output string just fits buffer, no NUL term. */
676 status = U_ZERO_ERROR;
677 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
678 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
679 TEST_ASSERT_STRING("abc interior def", buf, FALSE);
680 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
681 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
682
683 uregex_close(re);
684
685 }
686
687 /*
688 * Regions
689 */
690
691
692 /* SetRegion(), getRegion() do something */
693 TEST_SETUP(".*", "0123456789ABCDEF", 0)
694 UChar resultString[40];
695 TEST_ASSERT(uregex_regionStart(re, &status) == 0);
696 TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
697 uregex_setRegion(re, 3, 6, &status);
698 TEST_ASSERT(uregex_regionStart(re, &status) == 3);
699 TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
700 TEST_ASSERT(uregex_findNext(re, &status));
701 TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3)
702 TEST_ASSERT_STRING("345", resultString, TRUE);
703 TEST_TEARDOWN;
704
705 /* find(start=-1) uses regions */
706 TEST_SETUP(".*", "0123456789ABCDEF", 0);
707 uregex_setRegion(re, 4, 6, &status);
708 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
709 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
710 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
711 TEST_TEARDOWN;
712
713 /* find (start >=0) does not use regions */
714 TEST_SETUP(".*", "0123456789ABCDEF", 0);
715 uregex_setRegion(re, 4, 6, &status);
716 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
717 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
718 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
719 TEST_TEARDOWN;
720
721 /* findNext() obeys regions */
722 TEST_SETUP(".", "0123456789ABCDEF", 0);
723 uregex_setRegion(re, 4, 6, &status);
724 TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
725 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
726 TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
727 TEST_ASSERT(uregex_start(re, 0, &status) == 5);
728 TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
729 TEST_TEARDOWN;
730
731 /* matches(start=-1) uses regions */
732 /* Also, verify that non-greedy *? succeeds in finding the full match. */
733 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
734 uregex_setRegion(re, 4, 6, &status);
735 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
736 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
737 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
738 TEST_TEARDOWN;
739
740 /* matches (start >=0) does not use regions */
741 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
742 uregex_setRegion(re, 4, 6, &status);
743 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
744 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
745 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
746 TEST_TEARDOWN;
747
748 /* lookingAt(start=-1) uses regions */
749 /* Also, verify that non-greedy *? finds the first (shortest) match. */
750 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
751 uregex_setRegion(re, 4, 6, &status);
752 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
753 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
754 TEST_ASSERT(uregex_end(re, 0, &status) == 4);
755 TEST_TEARDOWN;
756
757 /* lookingAt (start >=0) does not use regions */
758 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
759 uregex_setRegion(re, 4, 6, &status);
760 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
761 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
762 TEST_ASSERT(uregex_end(re, 0, &status) == 0);
763 TEST_TEARDOWN;
764
765 /* hitEnd() */
766 TEST_SETUP("[a-f]*", "abcdefghij", 0);
767 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
768 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
769 TEST_TEARDOWN;
770
771 TEST_SETUP("[a-f]*", "abcdef", 0);
772 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
773 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
774 TEST_TEARDOWN;
775
776 /* requireEnd */
777 TEST_SETUP("abcd", "abcd", 0);
778 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
779 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
780 TEST_TEARDOWN;
781
782 TEST_SETUP("abcd$", "abcd", 0);
783 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
784 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
785 TEST_TEARDOWN;
786
787 /* anchoringBounds */
788 TEST_SETUP("abc$", "abcdef", 0);
789 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
790 uregex_useAnchoringBounds(re, FALSE, &status);
791 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
792
793 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
794 uregex_useAnchoringBounds(re, TRUE, &status);
795 uregex_setRegion(re, 0, 3, &status);
796 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
797 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
798 TEST_TEARDOWN;
799
800 /* Transparent Bounds */
801 TEST_SETUP("abc(?=def)", "abcdef", 0);
802 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
803 uregex_useTransparentBounds(re, TRUE, &status);
804 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
805
806 uregex_useTransparentBounds(re, FALSE, &status);
807 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */
808 uregex_setRegion(re, 0, 3, &status);
809 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */
810 uregex_useTransparentBounds(re, TRUE, &status);
811 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */
812 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
813 TEST_TEARDOWN;
814
815
816 /*
817 * replaceFirst()
818 */
819 {
820 UChar text1[80];
821 UChar text2[80];
822 UChar replText[80];
823 UChar buf[80];
824 int32_t resultSz;
825 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
826 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
827 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
828
829 status = U_ZERO_ERROR;
830 re = uregex_openC("x(.*?)x", 0, NULL, &status);
831 TEST_ASSERT_SUCCESS(status);
832
833 /* Normal case, with match */
834 uregex_setText(re, text1, -1, &status);
835 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
836 TEST_ASSERT_SUCCESS(status);
837 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
838 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
839
840 /* No match. Text should copy to output with no changes. */
841 status = U_ZERO_ERROR;
842 uregex_setText(re, text2, -1, &status);
843 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
844 TEST_ASSERT_SUCCESS(status);
845 TEST_ASSERT_STRING("No match here.", buf, TRUE);
846 TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
847
848 /* Match, output just fills buffer, no termination warning. */
849 status = U_ZERO_ERROR;
850 uregex_setText(re, text1, -1, &status);
851 memset(buf, -1, sizeof(buf));
852 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
853 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
854 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
855 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
856 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
857
858 /* Do the replaceFirst again, without first resetting anything.
859 * Should give the same results.
860 */
861 status = U_ZERO_ERROR;
862 memset(buf, -1, sizeof(buf));
863 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
864 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
865 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
866 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
867 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
868
869 /* NULL buffer, zero buffer length */
870 status = U_ZERO_ERROR;
871 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
872 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
873 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
874
875 /* Buffer too small by one */
876 status = U_ZERO_ERROR;
877 memset(buf, -1, sizeof(buf));
878 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
879 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
880 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
881 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
882 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
883
884 uregex_close(re);
885 }
886
887
888 /*
889 * replaceAll()
890 */
891 {
892 UChar text1[80]; /* "Replace xaax x1x x...x." */
893 UChar text2[80]; /* "No match Here" */
894 UChar replText[80]; /* "<$1>" */
895 UChar replText2[80]; /* "<<$1>>" */
896 const char * pattern = "x(.*?)x";
897 const char * expectedResult = "Replace <aa> <1> <...>.";
898 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
899 UChar buf[80];
900 int32_t resultSize;
901 int32_t expectedResultSize;
902 int32_t expectedResultSize2;
903 int32_t i;
904
905 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
906 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
907 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
908 u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
909 expectedResultSize = strlen(expectedResult);
910 expectedResultSize2 = strlen(expectedResult2);
911
912 status = U_ZERO_ERROR;
913 re = uregex_openC(pattern, 0, NULL, &status);
914 TEST_ASSERT_SUCCESS(status);
915
916 /* Normal case, with match */
917 uregex_setText(re, text1, -1, &status);
918 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
919 TEST_ASSERT_SUCCESS(status);
920 TEST_ASSERT_STRING(expectedResult, buf, TRUE);
921 TEST_ASSERT(resultSize == expectedResultSize);
922
923 /* No match. Text should copy to output with no changes. */
924 status = U_ZERO_ERROR;
925 uregex_setText(re, text2, -1, &status);
926 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
927 TEST_ASSERT_SUCCESS(status);
928 TEST_ASSERT_STRING("No match here.", buf, TRUE);
929 TEST_ASSERT(resultSize == u_strlen(text2));
930
931 /* Match, output just fills buffer, no termination warning. */
932 status = U_ZERO_ERROR;
933 uregex_setText(re, text1, -1, &status);
934 memset(buf, -1, sizeof(buf));
935 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
936 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
937 TEST_ASSERT_STRING(expectedResult, buf, FALSE);
938 TEST_ASSERT(resultSize == expectedResultSize);
939 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
940
941 /* Do the replaceFirst again, without first resetting anything.
942 * Should give the same results.
943 */
944 status = U_ZERO_ERROR;
945 memset(buf, -1, sizeof(buf));
946 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
947 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
948 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
949 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
950 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
951
952 /* NULL buffer, zero buffer length */
953 status = U_ZERO_ERROR;
954 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
955 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
956 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
957
958 /* Buffer too small. Try every size, which will tickle edge cases
959 * in uregex_appendReplacement (used by replaceAll) */
960 for (i=0; i<expectedResultSize; i++) {
961 char expected[80];
962 status = U_ZERO_ERROR;
963 memset(buf, -1, sizeof(buf));
964 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
965 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
966 strcpy(expected, expectedResult);
967 expected[i] = 0;
968 TEST_ASSERT_STRING(expected, buf, FALSE);
969 TEST_ASSERT(resultSize == expectedResultSize);
970 TEST_ASSERT(buf[i] == (UChar)0xffff);
971 }
972
973 /* Buffer too small. Same as previous test, except this time the replacement
974 * text is longer than the match capture group, making the length of the complete
975 * replacement longer than the original string.
976 */
977 for (i=0; i<expectedResultSize2; i++) {
978 char expected[80];
979 status = U_ZERO_ERROR;
980 memset(buf, -1, sizeof(buf));
981 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
982 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
983 strcpy(expected, expectedResult2);
984 expected[i] = 0;
985 TEST_ASSERT_STRING(expected, buf, FALSE);
986 TEST_ASSERT(resultSize == expectedResultSize2);
987 TEST_ASSERT(buf[i] == (UChar)0xffff);
988 }
989
990
991 uregex_close(re);
992 }
993
994
995 /*
996 * appendReplacement()
997 */
998 {
999 UChar text[100];
1000 UChar repl[100];
1001 UChar buf[100];
1002 UChar *bufPtr;
1003 int32_t bufCap;
1004
1005
1006 status = U_ZERO_ERROR;
1007 re = uregex_openC(".*", 0, 0, &status);
1008 TEST_ASSERT_SUCCESS(status);
1009
1010 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1011 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1012 uregex_setText(re, text, -1, &status);
1013
1014 /* match covers whole target string */
1015 uregex_find(re, 0, &status);
1016 TEST_ASSERT_SUCCESS(status);
1017 bufPtr = buf;
1018 bufCap = UPRV_LENGTHOF(buf);
1019 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1020 TEST_ASSERT_SUCCESS(status);
1021 TEST_ASSERT_STRING("some other", buf, TRUE);
1022
1023 /* Match has \u \U escapes */
1024 uregex_find(re, 0, &status);
1025 TEST_ASSERT_SUCCESS(status);
1026 bufPtr = buf;
1027 bufCap = UPRV_LENGTHOF(buf);
1028 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1029 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1030 TEST_ASSERT_SUCCESS(status);
1031 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1032
1033 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1034 status = U_ZERO_ERROR;
1035 uregex_find(re, 0, &status);
1036 TEST_ASSERT_SUCCESS(status);
1037 bufPtr = buf;
1038 status = U_BUFFER_OVERFLOW_ERROR;
1039 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1040 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1041
1042 uregex_close(re);
1043 }
1044
1045
1046 /*
1047 * appendTail(). Checked in ReplaceFirst(), replaceAll().
1048 */
1049
1050 /*
1051 * split()
1052 */
1053 {
1054 UChar textToSplit[80];
1055 UChar text2[80];
1056 UChar buf[200];
1057 UChar *fields[10];
1058 int32_t numFields;
1059 int32_t requiredCapacity;
1060 int32_t spaceNeeded;
1061 int32_t sz;
1062
1063 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1064 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1065
1066 status = U_ZERO_ERROR;
1067 re = uregex_openC(":", 0, NULL, &status);
1068
1069
1070 /* Simple split */
1071
1072 uregex_setText(re, textToSplit, -1, &status);
1073 TEST_ASSERT_SUCCESS(status);
1074
1075 /* The TEST_ASSERT_SUCCESS call above should change too... */
1076 if (U_SUCCESS(status)) {
1077 memset(fields, -1, sizeof(fields));
1078 numFields =
1079 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1080 TEST_ASSERT_SUCCESS(status);
1081
1082 /* The TEST_ASSERT_SUCCESS call above should change too... */
1083 if(U_SUCCESS(status)) {
1084 TEST_ASSERT(numFields == 3);
1085 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1086 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1087 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1088 TEST_ASSERT(fields[3] == NULL);
1089
1090 spaceNeeded = u_strlen(textToSplit) -
1091 (numFields - 1) + /* Field delimiters do not appear in output */
1092 numFields; /* Each field gets a NUL terminator */
1093
1094 TEST_ASSERT(spaceNeeded == requiredCapacity);
1095 }
1096 }
1097
1098 uregex_close(re);
1099
1100
1101 /* Split with too few output strings available */
1102 status = U_ZERO_ERROR;
1103 re = uregex_openC(":", 0, NULL, &status);
1104 uregex_setText(re, textToSplit, -1, &status);
1105 TEST_ASSERT_SUCCESS(status);
1106
1107 /* The TEST_ASSERT_SUCCESS call above should change too... */
1108 if(U_SUCCESS(status)) {
1109 memset(fields, -1, sizeof(fields));
1110 numFields =
1111 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1112 TEST_ASSERT_SUCCESS(status);
1113
1114 /* The TEST_ASSERT_SUCCESS call above should change too... */
1115 if(U_SUCCESS(status)) {
1116 TEST_ASSERT(numFields == 2);
1117 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1118 TEST_ASSERT_STRING(" second: third", fields[1], TRUE);
1119 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1120
1121 spaceNeeded = u_strlen(textToSplit) -
1122 (numFields - 1) + /* Field delimiters do not appear in output */
1123 numFields; /* Each field gets a NUL terminator */
1124
1125 TEST_ASSERT(spaceNeeded == requiredCapacity);
1126
1127 /* Split with a range of output buffer sizes. */
1128 spaceNeeded = u_strlen(textToSplit) -
1129 (numFields - 1) + /* Field delimiters do not appear in output */
1130 numFields; /* Each field gets a NUL terminator */
1131
1132 for (sz=0; sz < spaceNeeded+1; sz++) {
1133 memset(fields, -1, sizeof(fields));
1134 status = U_ZERO_ERROR;
1135 numFields =
1136 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1137 if (sz >= spaceNeeded) {
1138 TEST_ASSERT_SUCCESS(status);
1139 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1140 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1141 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1142 } else {
1143 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1144 }
1145 TEST_ASSERT(numFields == 3);
1146 TEST_ASSERT(fields[3] == NULL);
1147 TEST_ASSERT(spaceNeeded == requiredCapacity);
1148 }
1149 }
1150 }
1151
1152 uregex_close(re);
1153 }
1154
1155
1156
1157
1158 /* Split(), part 2. Patterns with capture groups. The capture group text
1159 * comes out as additional fields. */
1160 {
1161 UChar textToSplit[80];
1162 UChar buf[200];
1163 UChar *fields[10];
1164 int32_t numFields;
1165 int32_t requiredCapacity;
1166 int32_t spaceNeeded;
1167 int32_t sz;
1168
1169 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
1170
1171 status = U_ZERO_ERROR;
1172 re = uregex_openC("<(.*?)>", 0, NULL, &status);
1173
1174 uregex_setText(re, textToSplit, -1, &status);
1175 TEST_ASSERT_SUCCESS(status);
1176
1177 /* The TEST_ASSERT_SUCCESS call above should change too... */
1178 if(U_SUCCESS(status)) {
1179 memset(fields, -1, sizeof(fields));
1180 numFields =
1181 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1182 TEST_ASSERT_SUCCESS(status);
1183
1184 /* The TEST_ASSERT_SUCCESS call above should change too... */
1185 if(U_SUCCESS(status)) {
1186 TEST_ASSERT(numFields == 5);
1187 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1188 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1189 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1190 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1191 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1192 TEST_ASSERT(fields[5] == NULL);
1193 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1194 TEST_ASSERT(spaceNeeded == requiredCapacity);
1195 }
1196 }
1197
1198 /* Split with too few output strings available (2) */
1199 status = U_ZERO_ERROR;
1200 memset(fields, -1, sizeof(fields));
1201 numFields =
1202 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1203 TEST_ASSERT_SUCCESS(status);
1204
1205 /* The TEST_ASSERT_SUCCESS call above should change too... */
1206 if(U_SUCCESS(status)) {
1207 TEST_ASSERT(numFields == 2);
1208 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1209 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE);
1210 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1211
1212 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */
1213 TEST_ASSERT(spaceNeeded == requiredCapacity);
1214 }
1215
1216 /* Split with too few output strings available (3) */
1217 status = U_ZERO_ERROR;
1218 memset(fields, -1, sizeof(fields));
1219 numFields =
1220 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
1221 TEST_ASSERT_SUCCESS(status);
1222
1223 /* The TEST_ASSERT_SUCCESS call above should change too... */
1224 if(U_SUCCESS(status)) {
1225 TEST_ASSERT(numFields == 3);
1226 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1227 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1228 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE);
1229 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1230
1231 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
1232 TEST_ASSERT(spaceNeeded == requiredCapacity);
1233 }
1234
1235 /* Split with just enough output strings available (5) */
1236 status = U_ZERO_ERROR;
1237 memset(fields, -1, sizeof(fields));
1238 numFields =
1239 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
1240 TEST_ASSERT_SUCCESS(status);
1241
1242 /* The TEST_ASSERT_SUCCESS call above should change too... */
1243 if(U_SUCCESS(status)) {
1244 TEST_ASSERT(numFields == 5);
1245 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1246 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1247 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1248 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1249 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1250 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1251
1252 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1253 TEST_ASSERT(spaceNeeded == requiredCapacity);
1254 }
1255
1256 /* Split, end of text is a field delimiter. */
1257 status = U_ZERO_ERROR;
1258 sz = strlen("first <tag-a> second<tag-b>");
1259 uregex_setText(re, textToSplit, sz, &status);
1260 TEST_ASSERT_SUCCESS(status);
1261
1262 /* The TEST_ASSERT_SUCCESS call above should change too... */
1263 if(U_SUCCESS(status)) {
1264 memset(fields, -1, sizeof(fields));
1265 numFields =
1266 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
1267 TEST_ASSERT_SUCCESS(status);
1268
1269 /* The TEST_ASSERT_SUCCESS call above should change too... */
1270 if(U_SUCCESS(status)) {
1271 TEST_ASSERT(numFields == 5);
1272 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1273 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1274 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1275 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1276 TEST_ASSERT_STRING("", fields[4], TRUE);
1277 TEST_ASSERT(fields[5] == NULL);
1278 TEST_ASSERT(fields[8] == NULL);
1279 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1280 spaceNeeded = strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */
1281 TEST_ASSERT(spaceNeeded == requiredCapacity);
1282 }
1283 }
1284
1285 uregex_close(re);
1286 }
1287
1288 /*
1289 * set/getTimeLimit
1290 */
1291 TEST_SETUP("abc$", "abcdef", 0);
1292 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1293 uregex_setTimeLimit(re, 1000, &status);
1294 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1295 TEST_ASSERT_SUCCESS(status);
1296 uregex_setTimeLimit(re, -1, &status);
1297 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1298 status = U_ZERO_ERROR;
1299 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1300 TEST_TEARDOWN;
1301
1302 /*
1303 * set/get Stack Limit
1304 */
1305 TEST_SETUP("abc$", "abcdef", 0);
1306 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1307 uregex_setStackLimit(re, 40000, &status);
1308 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1309 TEST_ASSERT_SUCCESS(status);
1310 uregex_setStackLimit(re, -1, &status);
1311 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1312 status = U_ZERO_ERROR;
1313 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1314 TEST_TEARDOWN;
1315
1316
1317 /*
1318 * Get/Set callback functions
1319 * This test is copied from intltest regex/Callbacks
1320 * The pattern and test data will run long enough to cause the callback
1321 * to be invoked. The nested '+' operators give exponential time
1322 * behavior with increasing string length.
1323 */
1324 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1325 callBackContext cbInfo = {4, 0, 0};
1326 const void *pContext = &cbInfo;
1327 URegexMatchCallback *returnedFn = &TestCallbackFn;
1328
1329 /* Getting the callback fn when it hasn't been set must return NULL */
1330 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1331 TEST_ASSERT_SUCCESS(status);
1332 TEST_ASSERT(returnedFn == NULL);
1333 TEST_ASSERT(pContext == NULL);
1334
1335 /* Set thecallback and do a match. */
1336 /* The callback function should record that it has been called. */
1337 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1338 TEST_ASSERT_SUCCESS(status);
1339 TEST_ASSERT(cbInfo.numCalls == 0);
1340 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1341 TEST_ASSERT_SUCCESS(status);
1342 TEST_ASSERT(cbInfo.numCalls > 0);
1343
1344 /* Getting the callback should return the values that were set above. */
1345 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1346 TEST_ASSERT(returnedFn == &TestCallbackFn);
1347 TEST_ASSERT(pContext == &cbInfo);
1348
1349 TEST_TEARDOWN;
1350 }
1351
1352
1353
TestBug4315(void)1354 static void TestBug4315(void) {
1355 UErrorCode theICUError = U_ZERO_ERROR;
1356 URegularExpression *theRegEx;
1357 UChar *textBuff;
1358 const char *thePattern;
1359 UChar theString[100];
1360 UChar *destFields[24];
1361 int32_t neededLength1;
1362 int32_t neededLength2;
1363
1364 int32_t wordCount = 0;
1365 int32_t destFieldsSize = 24;
1366
1367 thePattern = "ck ";
1368 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1369
1370 /* open a regex */
1371 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1372 TEST_ASSERT_SUCCESS(theICUError);
1373
1374 /* set the input string */
1375 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1376 TEST_ASSERT_SUCCESS(theICUError);
1377
1378 /* split */
1379 /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1380 * error occurs! */
1381 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1382 destFieldsSize, &theICUError);
1383
1384 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1385 TEST_ASSERT(wordCount==3);
1386
1387 if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1388 {
1389 theICUError = U_ZERO_ERROR;
1390 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1391 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1392 destFields, destFieldsSize, &theICUError);
1393 TEST_ASSERT(wordCount==3);
1394 TEST_ASSERT_SUCCESS(theICUError);
1395 TEST_ASSERT(neededLength1 == neededLength2);
1396 TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1397 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1398 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1399 TEST_ASSERT(destFields[3] == NULL);
1400 free(textBuff);
1401 }
1402 uregex_close(theRegEx);
1403 }
1404
1405 /* Based on TestRegexCAPI() */
TestUTextAPI(void)1406 static void TestUTextAPI(void) {
1407 UErrorCode status = U_ZERO_ERROR;
1408 URegularExpression *re;
1409 UText patternText = UTEXT_INITIALIZER;
1410 UChar pat[200];
1411 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1412
1413 /* Mimimalist open/close */
1414 utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1415 re = uregex_openUText(&patternText, 0, 0, &status);
1416 if (U_FAILURE(status)) {
1417 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1418 utext_close(&patternText);
1419 return;
1420 }
1421 uregex_close(re);
1422
1423 /* Open with all flag values set */
1424 status = U_ZERO_ERROR;
1425 re = uregex_openUText(&patternText,
1426 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1427 0, &status);
1428 TEST_ASSERT_SUCCESS(status);
1429 uregex_close(re);
1430
1431 /* Open with an invalid flag */
1432 status = U_ZERO_ERROR;
1433 re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1434 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1435 uregex_close(re);
1436
1437 /* open with an invalid parameter */
1438 status = U_ZERO_ERROR;
1439 re = uregex_openUText(NULL,
1440 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1441 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1442
1443 /*
1444 * clone
1445 */
1446 {
1447 URegularExpression *clone1;
1448 URegularExpression *clone2;
1449 URegularExpression *clone3;
1450 UChar testString1[30];
1451 UChar testString2[30];
1452 UBool result;
1453
1454
1455 status = U_ZERO_ERROR;
1456 re = uregex_openUText(&patternText, 0, 0, &status);
1457 TEST_ASSERT_SUCCESS(status);
1458 clone1 = uregex_clone(re, &status);
1459 TEST_ASSERT_SUCCESS(status);
1460 TEST_ASSERT(clone1 != NULL);
1461
1462 status = U_ZERO_ERROR;
1463 clone2 = uregex_clone(re, &status);
1464 TEST_ASSERT_SUCCESS(status);
1465 TEST_ASSERT(clone2 != NULL);
1466 uregex_close(re);
1467
1468 status = U_ZERO_ERROR;
1469 clone3 = uregex_clone(clone2, &status);
1470 TEST_ASSERT_SUCCESS(status);
1471 TEST_ASSERT(clone3 != NULL);
1472
1473 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1474 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
1475
1476 status = U_ZERO_ERROR;
1477 uregex_setText(clone1, testString1, -1, &status);
1478 TEST_ASSERT_SUCCESS(status);
1479 result = uregex_lookingAt(clone1, 0, &status);
1480 TEST_ASSERT_SUCCESS(status);
1481 TEST_ASSERT(result==TRUE);
1482
1483 status = U_ZERO_ERROR;
1484 uregex_setText(clone2, testString2, -1, &status);
1485 TEST_ASSERT_SUCCESS(status);
1486 result = uregex_lookingAt(clone2, 0, &status);
1487 TEST_ASSERT_SUCCESS(status);
1488 TEST_ASSERT(result==FALSE);
1489 result = uregex_find(clone2, 0, &status);
1490 TEST_ASSERT_SUCCESS(status);
1491 TEST_ASSERT(result==TRUE);
1492
1493 uregex_close(clone1);
1494 uregex_close(clone2);
1495 uregex_close(clone3);
1496
1497 }
1498
1499 /*
1500 * pattern() and patternText()
1501 */
1502 {
1503 const UChar *resultPat;
1504 int32_t resultLen;
1505 UText *resultText;
1506 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1507 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1508 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
1509 status = U_ZERO_ERROR;
1510
1511 utext_openUTF8(&patternText, str_hello, -1, &status);
1512 re = uregex_open(pat, -1, 0, NULL, &status);
1513 resultPat = uregex_pattern(re, &resultLen, &status);
1514 TEST_ASSERT_SUCCESS(status);
1515
1516 /* The TEST_ASSERT_SUCCESS above should change too... */
1517 if (U_SUCCESS(status)) {
1518 TEST_ASSERT(resultLen == -1);
1519 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1520 }
1521
1522 resultText = uregex_patternUText(re, &status);
1523 TEST_ASSERT_SUCCESS(status);
1524 TEST_ASSERT_UTEXT(str_hello, resultText);
1525
1526 uregex_close(re);
1527
1528 status = U_ZERO_ERROR;
1529 re = uregex_open(pat, 3, 0, NULL, &status);
1530 resultPat = uregex_pattern(re, &resultLen, &status);
1531 TEST_ASSERT_SUCCESS(status);
1532
1533 /* The TEST_ASSERT_SUCCESS above should change too... */
1534 if (U_SUCCESS(status)) {
1535 TEST_ASSERT(resultLen == 3);
1536 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1537 TEST_ASSERT(u_strlen(resultPat) == 3);
1538 }
1539
1540 resultText = uregex_patternUText(re, &status);
1541 TEST_ASSERT_SUCCESS(status);
1542 TEST_ASSERT_UTEXT(str_hel, resultText);
1543
1544 uregex_close(re);
1545 }
1546
1547 /*
1548 * setUText() and lookingAt()
1549 */
1550 {
1551 UText text1 = UTEXT_INITIALIZER;
1552 UText text2 = UTEXT_INITIALIZER;
1553 UBool result;
1554 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1555 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1556 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1557 status = U_ZERO_ERROR;
1558 utext_openUTF8(&text1, str_abcccd, -1, &status);
1559 utext_openUTF8(&text2, str_abcccxd, -1, &status);
1560
1561 utext_openUTF8(&patternText, str_abcd, -1, &status);
1562 re = uregex_openUText(&patternText, 0, NULL, &status);
1563 TEST_ASSERT_SUCCESS(status);
1564
1565 /* Operation before doing a setText should fail... */
1566 status = U_ZERO_ERROR;
1567 uregex_lookingAt(re, 0, &status);
1568 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1569
1570 status = U_ZERO_ERROR;
1571 uregex_setUText(re, &text1, &status);
1572 result = uregex_lookingAt(re, 0, &status);
1573 TEST_ASSERT(result == TRUE);
1574 TEST_ASSERT_SUCCESS(status);
1575
1576 status = U_ZERO_ERROR;
1577 uregex_setUText(re, &text2, &status);
1578 result = uregex_lookingAt(re, 0, &status);
1579 TEST_ASSERT(result == FALSE);
1580 TEST_ASSERT_SUCCESS(status);
1581
1582 status = U_ZERO_ERROR;
1583 uregex_setUText(re, &text1, &status);
1584 result = uregex_lookingAt(re, 0, &status);
1585 TEST_ASSERT(result == TRUE);
1586 TEST_ASSERT_SUCCESS(status);
1587
1588 uregex_close(re);
1589 utext_close(&text1);
1590 utext_close(&text2);
1591 }
1592
1593
1594 /*
1595 * getText() and getUText()
1596 */
1597 {
1598 UText text1 = UTEXT_INITIALIZER;
1599 UText text2 = UTEXT_INITIALIZER;
1600 UChar text2Chars[20];
1601 UText *resultText;
1602 const UChar *result;
1603 int32_t textLength;
1604 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1605 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1606 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1607
1608
1609 status = U_ZERO_ERROR;
1610 utext_openUTF8(&text1, str_abcccd, -1, &status);
1611 u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
1612 utext_openUChars(&text2, text2Chars, -1, &status);
1613
1614 utext_openUTF8(&patternText, str_abcd, -1, &status);
1615 re = uregex_openUText(&patternText, 0, NULL, &status);
1616
1617 /* First set a UText */
1618 uregex_setUText(re, &text1, &status);
1619 resultText = uregex_getUText(re, NULL, &status);
1620 TEST_ASSERT_SUCCESS(status);
1621 TEST_ASSERT(resultText != &text1);
1622 utext_setNativeIndex(resultText, 0);
1623 utext_setNativeIndex(&text1, 0);
1624 TEST_ASSERT(testUTextEqual(resultText, &text1));
1625 utext_close(resultText);
1626
1627 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1628 (void)result; /* Suppress set but not used warning. */
1629 TEST_ASSERT(textLength == -1 || textLength == 6);
1630 resultText = uregex_getUText(re, NULL, &status);
1631 TEST_ASSERT_SUCCESS(status);
1632 TEST_ASSERT(resultText != &text1);
1633 utext_setNativeIndex(resultText, 0);
1634 utext_setNativeIndex(&text1, 0);
1635 TEST_ASSERT(testUTextEqual(resultText, &text1));
1636 utext_close(resultText);
1637
1638 /* Then set a UChar * */
1639 uregex_setText(re, text2Chars, 7, &status);
1640 resultText = uregex_getUText(re, NULL, &status);
1641 TEST_ASSERT_SUCCESS(status);
1642 utext_setNativeIndex(resultText, 0);
1643 utext_setNativeIndex(&text2, 0);
1644 TEST_ASSERT(testUTextEqual(resultText, &text2));
1645 utext_close(resultText);
1646 result = uregex_getText(re, &textLength, &status);
1647 TEST_ASSERT(textLength == 7);
1648
1649 uregex_close(re);
1650 utext_close(&text1);
1651 utext_close(&text2);
1652 }
1653
1654 /*
1655 * matches()
1656 */
1657 {
1658 UText text1 = UTEXT_INITIALIZER;
1659 UBool result;
1660 UText nullText = UTEXT_INITIALIZER;
1661 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1662 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1663
1664 status = U_ZERO_ERROR;
1665 utext_openUTF8(&text1, str_abcccde, -1, &status);
1666 utext_openUTF8(&patternText, str_abcd, -1, &status);
1667 re = uregex_openUText(&patternText, 0, NULL, &status);
1668
1669 uregex_setUText(re, &text1, &status);
1670 result = uregex_matches(re, 0, &status);
1671 TEST_ASSERT(result == FALSE);
1672 TEST_ASSERT_SUCCESS(status);
1673 uregex_close(re);
1674
1675 status = U_ZERO_ERROR;
1676 re = uregex_openC(".?", 0, NULL, &status);
1677 uregex_setUText(re, &text1, &status);
1678 result = uregex_matches(re, 7, &status);
1679 TEST_ASSERT(result == TRUE);
1680 TEST_ASSERT_SUCCESS(status);
1681
1682 status = U_ZERO_ERROR;
1683 utext_openUTF8(&nullText, "", -1, &status);
1684 uregex_setUText(re, &nullText, &status);
1685 TEST_ASSERT_SUCCESS(status);
1686 result = uregex_matches(re, 0, &status);
1687 TEST_ASSERT(result == TRUE);
1688 TEST_ASSERT_SUCCESS(status);
1689
1690 uregex_close(re);
1691 utext_close(&text1);
1692 utext_close(&nullText);
1693 }
1694
1695
1696 /*
1697 * lookingAt() Used in setText test.
1698 */
1699
1700
1701 /*
1702 * find(), findNext, start, end, reset
1703 */
1704 {
1705 UChar text1[50];
1706 UBool result;
1707 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
1708 status = U_ZERO_ERROR;
1709 re = uregex_openC("rx", 0, NULL, &status);
1710
1711 uregex_setText(re, text1, -1, &status);
1712 result = uregex_find(re, 0, &status);
1713 TEST_ASSERT(result == TRUE);
1714 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1715 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1716 TEST_ASSERT_SUCCESS(status);
1717
1718 result = uregex_find(re, 9, &status);
1719 TEST_ASSERT(result == TRUE);
1720 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1721 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1722 TEST_ASSERT_SUCCESS(status);
1723
1724 result = uregex_find(re, 14, &status);
1725 TEST_ASSERT(result == FALSE);
1726 TEST_ASSERT_SUCCESS(status);
1727
1728 status = U_ZERO_ERROR;
1729 uregex_reset(re, 0, &status);
1730
1731 result = uregex_findNext(re, &status);
1732 TEST_ASSERT(result == TRUE);
1733 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1734 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1735 TEST_ASSERT_SUCCESS(status);
1736
1737 result = uregex_findNext(re, &status);
1738 TEST_ASSERT(result == TRUE);
1739 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1740 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1741 TEST_ASSERT_SUCCESS(status);
1742
1743 status = U_ZERO_ERROR;
1744 uregex_reset(re, 12, &status);
1745
1746 result = uregex_findNext(re, &status);
1747 TEST_ASSERT(result == TRUE);
1748 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1749 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1750 TEST_ASSERT_SUCCESS(status);
1751
1752 result = uregex_findNext(re, &status);
1753 TEST_ASSERT(result == FALSE);
1754 TEST_ASSERT_SUCCESS(status);
1755
1756 uregex_close(re);
1757 }
1758
1759 /*
1760 * groupUText()
1761 */
1762 {
1763 UChar text1[80];
1764 UText *actual;
1765 UBool result;
1766 int64_t groupLen = 0;
1767 UChar groupBuf[20];
1768
1769 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
1770
1771 status = U_ZERO_ERROR;
1772 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1773 TEST_ASSERT_SUCCESS(status);
1774
1775 uregex_setText(re, text1, -1, &status);
1776 result = uregex_find(re, 0, &status);
1777 TEST_ASSERT(result==TRUE);
1778
1779 /* Capture Group 0 with shallow clone API. Should succeed. */
1780 status = U_ZERO_ERROR;
1781 actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1782 TEST_ASSERT_SUCCESS(status);
1783
1784 TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " within "noise abc ..." */
1785 TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */
1786 utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1787
1788 TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
1789 utext_close(actual);
1790
1791 /* Capture group #1. Should succeed. */
1792 status = U_ZERO_ERROR;
1793
1794 actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
1795 TEST_ASSERT_SUCCESS(status);
1796 TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interior " within "noise abc interior def ... " */
1797 /* (within the string text1) */
1798 TEST_ASSERT(10 == groupLen); /* length of " interior " */
1799 utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1800 TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
1801
1802 utext_close(actual);
1803
1804 /* Capture group out of range. Error. */
1805 status = U_ZERO_ERROR;
1806 actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
1807 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1808 utext_close(actual);
1809
1810 uregex_close(re);
1811 }
1812
1813 /*
1814 * replaceFirst()
1815 */
1816 {
1817 UChar text1[80];
1818 UChar text2[80];
1819 UText replText = UTEXT_INITIALIZER;
1820 UText *result;
1821 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1822 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1823 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1824 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1825 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1826 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1827 status = U_ZERO_ERROR;
1828 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1829 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1830 utext_openUTF8(&replText, str_1x, -1, &status);
1831
1832 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1833 TEST_ASSERT_SUCCESS(status);
1834
1835 /* Normal case, with match */
1836 uregex_setText(re, text1, -1, &status);
1837 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1838 TEST_ASSERT_SUCCESS(status);
1839 TEST_ASSERT_UTEXT(str_Replxxx, result);
1840 utext_close(result);
1841
1842 /* No match. Text should copy to output with no changes. */
1843 uregex_setText(re, text2, -1, &status);
1844 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1845 TEST_ASSERT_SUCCESS(status);
1846 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1847 utext_close(result);
1848
1849 /* Unicode escapes */
1850 uregex_setText(re, text1, -1, &status);
1851 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1852 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1853 TEST_ASSERT_SUCCESS(status);
1854 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1855 utext_close(result);
1856
1857 uregex_close(re);
1858 utext_close(&replText);
1859 }
1860
1861
1862 /*
1863 * replaceAll()
1864 */
1865 {
1866 UChar text1[80];
1867 UChar text2[80];
1868 UText replText = UTEXT_INITIALIZER;
1869 UText *result;
1870 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1871 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1872 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1873 status = U_ZERO_ERROR;
1874 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1875 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1876 utext_openUTF8(&replText, str_1, -1, &status);
1877
1878 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1879 TEST_ASSERT_SUCCESS(status);
1880
1881 /* Normal case, with match */
1882 uregex_setText(re, text1, -1, &status);
1883 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1884 TEST_ASSERT_SUCCESS(status);
1885 TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1886 utext_close(result);
1887
1888 /* No match. Text should copy to output with no changes. */
1889 uregex_setText(re, text2, -1, &status);
1890 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1891 TEST_ASSERT_SUCCESS(status);
1892 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1893 utext_close(result);
1894
1895 uregex_close(re);
1896 utext_close(&replText);
1897 }
1898
1899
1900 /*
1901 * appendReplacement()
1902 */
1903 {
1904 UChar text[100];
1905 UChar repl[100];
1906 UChar buf[100];
1907 UChar *bufPtr;
1908 int32_t bufCap;
1909
1910 status = U_ZERO_ERROR;
1911 re = uregex_openC(".*", 0, 0, &status);
1912 TEST_ASSERT_SUCCESS(status);
1913
1914 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1915 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1916 uregex_setText(re, text, -1, &status);
1917
1918 /* match covers whole target string */
1919 uregex_find(re, 0, &status);
1920 TEST_ASSERT_SUCCESS(status);
1921 bufPtr = buf;
1922 bufCap = UPRV_LENGTHOF(buf);
1923 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1924 TEST_ASSERT_SUCCESS(status);
1925 TEST_ASSERT_STRING("some other", buf, TRUE);
1926
1927 /* Match has \u \U escapes */
1928 uregex_find(re, 0, &status);
1929 TEST_ASSERT_SUCCESS(status);
1930 bufPtr = buf;
1931 bufCap = UPRV_LENGTHOF(buf);
1932 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1933 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1934 TEST_ASSERT_SUCCESS(status);
1935 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1936
1937 uregex_close(re);
1938 }
1939
1940
1941 /*
1942 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1943 */
1944
1945 /*
1946 * splitUText()
1947 */
1948 {
1949 UChar textToSplit[80];
1950 UChar text2[80];
1951 UText *fields[10];
1952 int32_t numFields;
1953 int32_t i;
1954
1955 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1956 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1957
1958 status = U_ZERO_ERROR;
1959 re = uregex_openC(":", 0, NULL, &status);
1960
1961
1962 /* Simple split */
1963
1964 uregex_setText(re, textToSplit, -1, &status);
1965 TEST_ASSERT_SUCCESS(status);
1966
1967 /* The TEST_ASSERT_SUCCESS call above should change too... */
1968 if (U_SUCCESS(status)) {
1969 memset(fields, 0, sizeof(fields));
1970 numFields = uregex_splitUText(re, fields, 10, &status);
1971 TEST_ASSERT_SUCCESS(status);
1972
1973 /* The TEST_ASSERT_SUCCESS call above should change too... */
1974 if(U_SUCCESS(status)) {
1975 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1976 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
1977 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
1978 TEST_ASSERT(numFields == 3);
1979 TEST_ASSERT_UTEXT(str_first, fields[0]);
1980 TEST_ASSERT_UTEXT(str_second, fields[1]);
1981 TEST_ASSERT_UTEXT(str_third, fields[2]);
1982 TEST_ASSERT(fields[3] == NULL);
1983 }
1984 for(i = 0; i < numFields; i++) {
1985 utext_close(fields[i]);
1986 }
1987 }
1988
1989 uregex_close(re);
1990
1991
1992 /* Split with too few output strings available */
1993 status = U_ZERO_ERROR;
1994 re = uregex_openC(":", 0, NULL, &status);
1995 uregex_setText(re, textToSplit, -1, &status);
1996 TEST_ASSERT_SUCCESS(status);
1997
1998 /* The TEST_ASSERT_SUCCESS call above should change too... */
1999 if(U_SUCCESS(status)) {
2000 fields[0] = NULL;
2001 fields[1] = NULL;
2002 fields[2] = &patternText;
2003 numFields = uregex_splitUText(re, fields, 2, &status);
2004 TEST_ASSERT_SUCCESS(status);
2005
2006 /* The TEST_ASSERT_SUCCESS call above should change too... */
2007 if(U_SUCCESS(status)) {
2008 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2009 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
2010 TEST_ASSERT(numFields == 2);
2011 TEST_ASSERT_UTEXT(str_first, fields[0]);
2012 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2013 TEST_ASSERT(fields[2] == &patternText);
2014 }
2015 for(i = 0; i < numFields; i++) {
2016 utext_close(fields[i]);
2017 }
2018 }
2019
2020 uregex_close(re);
2021 }
2022
2023 /* splitUText(), part 2. Patterns with capture groups. The capture group text
2024 * comes out as additional fields. */
2025 {
2026 UChar textToSplit[80];
2027 UText *fields[10];
2028 int32_t numFields;
2029 int32_t i;
2030
2031 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
2032
2033 status = U_ZERO_ERROR;
2034 re = uregex_openC("<(.*?)>", 0, NULL, &status);
2035
2036 uregex_setText(re, textToSplit, -1, &status);
2037 TEST_ASSERT_SUCCESS(status);
2038
2039 /* The TEST_ASSERT_SUCCESS call above should change too... */
2040 if(U_SUCCESS(status)) {
2041 memset(fields, 0, sizeof(fields));
2042 numFields = uregex_splitUText(re, fields, 10, &status);
2043 TEST_ASSERT_SUCCESS(status);
2044
2045 /* The TEST_ASSERT_SUCCESS call above should change too... */
2046 if(U_SUCCESS(status)) {
2047 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2048 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2049 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2050 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2051 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2052
2053 TEST_ASSERT(numFields == 5);
2054 TEST_ASSERT_UTEXT(str_first, fields[0]);
2055 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2056 TEST_ASSERT_UTEXT(str_second, fields[2]);
2057 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2058 TEST_ASSERT_UTEXT(str_third, fields[4]);
2059 TEST_ASSERT(fields[5] == NULL);
2060 }
2061 for(i = 0; i < numFields; i++) {
2062 utext_close(fields[i]);
2063 }
2064 }
2065
2066 /* Split with too few output strings available (2) */
2067 status = U_ZERO_ERROR;
2068 fields[0] = NULL;
2069 fields[1] = NULL;
2070 fields[2] = &patternText;
2071 numFields = uregex_splitUText(re, fields, 2, &status);
2072 TEST_ASSERT_SUCCESS(status);
2073
2074 /* The TEST_ASSERT_SUCCESS call above should change too... */
2075 if(U_SUCCESS(status)) {
2076 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2077 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2078 TEST_ASSERT(numFields == 2);
2079 TEST_ASSERT_UTEXT(str_first, fields[0]);
2080 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2081 TEST_ASSERT(fields[2] == &patternText);
2082 }
2083 for(i = 0; i < numFields; i++) {
2084 utext_close(fields[i]);
2085 }
2086
2087
2088 /* Split with too few output strings available (3) */
2089 status = U_ZERO_ERROR;
2090 fields[0] = NULL;
2091 fields[1] = NULL;
2092 fields[2] = NULL;
2093 fields[3] = &patternText;
2094 numFields = uregex_splitUText(re, fields, 3, &status);
2095 TEST_ASSERT_SUCCESS(status);
2096
2097 /* The TEST_ASSERT_SUCCESS call above should change too... */
2098 if(U_SUCCESS(status)) {
2099 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2100 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2101 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2102 TEST_ASSERT(numFields == 3);
2103 TEST_ASSERT_UTEXT(str_first, fields[0]);
2104 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2105 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2106 TEST_ASSERT(fields[3] == &patternText);
2107 }
2108 for(i = 0; i < numFields; i++) {
2109 utext_close(fields[i]);
2110 }
2111
2112 /* Split with just enough output strings available (5) */
2113 status = U_ZERO_ERROR;
2114 fields[0] = NULL;
2115 fields[1] = NULL;
2116 fields[2] = NULL;
2117 fields[3] = NULL;
2118 fields[4] = NULL;
2119 fields[5] = &patternText;
2120 numFields = uregex_splitUText(re, fields, 5, &status);
2121 TEST_ASSERT_SUCCESS(status);
2122
2123 /* The TEST_ASSERT_SUCCESS call above should change too... */
2124 if(U_SUCCESS(status)) {
2125 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2126 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2127 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2128 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2129 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2130
2131 TEST_ASSERT(numFields == 5);
2132 TEST_ASSERT_UTEXT(str_first, fields[0]);
2133 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2134 TEST_ASSERT_UTEXT(str_second, fields[2]);
2135 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2136 TEST_ASSERT_UTEXT(str_third, fields[4]);
2137 TEST_ASSERT(fields[5] == &patternText);
2138 }
2139 for(i = 0; i < numFields; i++) {
2140 utext_close(fields[i]);
2141 }
2142
2143 /* Split, end of text is a field delimiter. */
2144 status = U_ZERO_ERROR;
2145 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2146 TEST_ASSERT_SUCCESS(status);
2147
2148 /* The TEST_ASSERT_SUCCESS call above should change too... */
2149 if(U_SUCCESS(status)) {
2150 memset(fields, 0, sizeof(fields));
2151 fields[9] = &patternText;
2152 numFields = uregex_splitUText(re, fields, 9, &status);
2153 TEST_ASSERT_SUCCESS(status);
2154
2155 /* The TEST_ASSERT_SUCCESS call above should change too... */
2156 if(U_SUCCESS(status)) {
2157 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2158 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2159 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2160 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2161 const char str_empty[] = { 0x00 };
2162
2163 TEST_ASSERT(numFields == 5);
2164 TEST_ASSERT_UTEXT(str_first, fields[0]);
2165 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2166 TEST_ASSERT_UTEXT(str_second, fields[2]);
2167 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2168 TEST_ASSERT_UTEXT(str_empty, fields[4]);
2169 TEST_ASSERT(fields[5] == NULL);
2170 TEST_ASSERT(fields[8] == NULL);
2171 TEST_ASSERT(fields[9] == &patternText);
2172 }
2173 for(i = 0; i < numFields; i++) {
2174 utext_close(fields[i]);
2175 }
2176 }
2177
2178 uregex_close(re);
2179 }
2180 utext_close(&patternText);
2181 }
2182
2183
TestRefreshInput(void)2184 static void TestRefreshInput(void) {
2185 /*
2186 * RefreshInput changes out the input of a URegularExpression without
2187 * changing anything else in the match state. Used with Java JNI,
2188 * when Java moves the underlying string storage. This test
2189 * runs a find() loop, moving the text after the first match.
2190 * The right number of matches should still be found.
2191 */
2192 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
2193 UChar movedStr[] = { 0, 0, 0, 0, 0, 0};
2194 UErrorCode status = U_ZERO_ERROR;
2195 URegularExpression *re;
2196 UText ut1 = UTEXT_INITIALIZER;
2197 UText ut2 = UTEXT_INITIALIZER;
2198
2199 re = uregex_openC("[ABC]", 0, 0, &status);
2200 TEST_ASSERT_SUCCESS(status);
2201
2202 utext_openUChars(&ut1, testStr, -1, &status);
2203 TEST_ASSERT_SUCCESS(status);
2204 uregex_setUText(re, &ut1, &status);
2205 TEST_ASSERT_SUCCESS(status);
2206
2207 /* Find the first match "A" in the original string */
2208 TEST_ASSERT(uregex_findNext(re, &status));
2209 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2210
2211 /* Move the string, kill the original string. */
2212 u_strcpy(movedStr, testStr);
2213 u_memset(testStr, 0, u_strlen(testStr));
2214 utext_openUChars(&ut2, movedStr, -1, &status);
2215 TEST_ASSERT_SUCCESS(status);
2216 uregex_refreshUText(re, &ut2, &status);
2217 TEST_ASSERT_SUCCESS(status);
2218
2219 /* Find the following two matches, now working in the moved string. */
2220 TEST_ASSERT(uregex_findNext(re, &status));
2221 TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2222 TEST_ASSERT(uregex_findNext(re, &status));
2223 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2224 TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2225
2226 uregex_close(re);
2227 }
2228
2229
TestBug8421(void)2230 static void TestBug8421(void) {
2231 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched
2232 * was failing.
2233 */
2234 URegularExpression *re;
2235 UErrorCode status = U_ZERO_ERROR;
2236 int32_t limit = -1;
2237
2238 re = uregex_openC("abc", 0, 0, &status);
2239 TEST_ASSERT_SUCCESS(status);
2240
2241 limit = uregex_getTimeLimit(re, &status);
2242 TEST_ASSERT_SUCCESS(status);
2243 TEST_ASSERT(limit == 0);
2244
2245 uregex_setTimeLimit(re, 100, &status);
2246 TEST_ASSERT_SUCCESS(status);
2247 limit = uregex_getTimeLimit(re, &status);
2248 TEST_ASSERT_SUCCESS(status);
2249 TEST_ASSERT(limit == 100);
2250
2251 uregex_close(re);
2252 }
2253
FindCallback(const void * context,int64_t matchIndex)2254 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2255 return FALSE;
2256 }
2257
MatchCallback(const void * context,int32_t steps)2258 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2259 return FALSE;
2260 }
2261
TestBug10815()2262 static void TestBug10815() {
2263 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2264 * when the callback function specified by uregex_setMatchCallback() returns FALSE
2265 */
2266 URegularExpression *re;
2267 UErrorCode status = U_ZERO_ERROR;
2268 UChar text[100];
2269
2270
2271 // findNext() with a find progress callback function.
2272
2273 re = uregex_openC(".z", 0, 0, &status);
2274 TEST_ASSERT_SUCCESS(status);
2275
2276 u_uastrncpy(text, "Hello, World.", UPRV_LENGTHOF(text));
2277 uregex_setText(re, text, -1, &status);
2278 TEST_ASSERT_SUCCESS(status);
2279
2280 uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2281 TEST_ASSERT_SUCCESS(status);
2282
2283 uregex_findNext(re, &status);
2284 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2285
2286 uregex_close(re);
2287
2288 // findNext() with a match progress callback function.
2289
2290 status = U_ZERO_ERROR;
2291 re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2292 TEST_ASSERT_SUCCESS(status);
2293
2294 // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2295 // it will appear to be stuck in a (near) infinite loop.
2296 u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text));
2297 uregex_setText(re, text, -1, &status);
2298 TEST_ASSERT_SUCCESS(status);
2299
2300 uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2301 TEST_ASSERT_SUCCESS(status);
2302
2303 uregex_findNext(re, &status);
2304 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2305
2306 uregex_close(re);
2307 }
2308
2309
2310 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
2311