• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 2004-2015, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 /********************************************************************************
7 *
8 * File reapits.c
9 *
10 *********************************************************************************/
11 /*C API TEST FOR Regular Expressions */
12 /**
13 *   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
14 *   try to test the full functionality.  It just calls each function and verifies that it
15 *   works on a basic level.
16 *
17 *   More complete testing of regular expression functionality is done with the C++ tests.
18 **/
19 
20 #include "unicode/utypes.h"
21 
22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
23 
24 #include <stdlib.h>
25 #include <string.h>
26 #include "unicode/uloc.h"
27 #include "unicode/uregex.h"
28 #include "unicode/ustring.h"
29 #include "unicode/utext.h"
30 #include "cintltst.h"
31 #include "cmemory.h"
32 
33 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
34 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
35 
36 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
37 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
38 
39 /*
40  *   TEST_SETUP and TEST_TEARDOWN
41  *         macros to handle the boilerplate around setting up regex test cases.
42  *         parameteres to setup:
43  *              pattern:     The regex pattern, a (char *) null terminated C string.
44  *              testString:  The string data, also a (char *) C string.
45  *              flags:       Regex flags to set when compiling the pattern
46  *
47  *         Put arbitrary test code between SETUP and TEARDOWN.
48  *         're" is the compiled, ready-to-go  regular expression.
49  */
50 #define TEST_SETUP(pattern, testString, flags) {  \
51     UChar   *srcString = NULL;  \
52     status = U_ZERO_ERROR; \
53     re = uregex_openC(pattern, flags, NULL, &status);  \
54     TEST_ASSERT_SUCCESS(status);   \
55     srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
56     u_uastrncpy(srcString, testString,  strlen(testString)+1); \
57     uregex_setText(re, srcString, -1, &status); \
58     TEST_ASSERT_SUCCESS(status);  \
59     if (U_SUCCESS(status)) {
60 
61 #define TEST_TEARDOWN  \
62     }  \
63     TEST_ASSERT_SUCCESS(status);  \
64     uregex_close(re);  \
65     free(srcString);   \
66     }
67 
68 
69 /**
70  * @param expected utf-8 array of bytes to be expected
71  */
test_assert_string(const char * expected,const UChar * actual,UBool nulTerm,const char * file,int line)72 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
73      char     buf_inside_macro[120];
74      int32_t  len = (int32_t)strlen(expected);
75      UBool    success;
76      if (nulTerm) {
77          u_austrncpy(buf_inside_macro, (actual), len+1);
78          buf_inside_macro[len+2] = 0;
79          success = (strcmp((expected), buf_inside_macro) == 0);
80      } else {
81          u_austrncpy(buf_inside_macro, (actual), len);
82          buf_inside_macro[len+1] = 0;
83          success = (strncmp((expected), buf_inside_macro, len) == 0);
84      }
85      if (success == FALSE) {
86          log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
87              file, line, (expected), buf_inside_macro);
88      }
89 }
90 
91 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
92 
93 
equals_utf8_utext(const char * utf8,UText * utext)94 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
95     int32_t u8i = 0;
96     UChar32 u8c = 0;
97     UChar32 utc = 0;
98     UBool   stringsEqual = TRUE;
99     utext_setNativeIndex(utext, 0);
100     for (;;) {
101         U8_NEXT_UNSAFE(utf8, u8i, u8c);
102         utc = utext_next32(utext);
103         if (u8c == 0 && utc == U_SENTINEL) {
104             break;
105         }
106         if (u8c != utc || u8c == 0) {
107             stringsEqual = FALSE;
108             break;
109         }
110     }
111     return stringsEqual;
112 }
113 
114 
test_assert_utext(const char * expected,UText * actual,const char * file,int line)115 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
116     utext_setNativeIndex(actual, 0);
117     if (!equals_utf8_utext(expected, actual)) {
118         UChar32 c;
119         log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
120         c = utext_next32From(actual, 0);
121         while (c != U_SENTINEL) {
122             if (0x20<c && c <0x7e) {
123                 log_err("%c", c);
124             } else {
125                 log_err("%#x", c);
126             }
127             c = UTEXT_NEXT32(actual);
128         }
129         log_err("\"\n");
130     }
131 }
132 
133 /*
134  * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
135  *     Note:  Expected is a UTF-8 encoded string, _not_ the system code page.
136  */
137 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
138 
testUTextEqual(UText * uta,UText * utb)139 static UBool testUTextEqual(UText *uta, UText *utb) {
140     UChar32 ca = 0;
141     UChar32 cb = 0;
142     utext_setNativeIndex(uta, 0);
143     utext_setNativeIndex(utb, 0);
144     do {
145         ca = utext_next32(uta);
146         cb = utext_next32(utb);
147         if (ca != cb) {
148             break;
149         }
150     } while (ca != U_SENTINEL);
151     return ca == cb;
152 }
153 
154 
155 
156 
157 static void TestRegexCAPI(void);
158 static void TestBug4315(void);
159 static void TestUTextAPI(void);
160 static void TestRefreshInput(void);
161 static void TestBug8421(void);
162 static void TestBug10815(void);
163 
164 void addURegexTest(TestNode** root);
165 
addURegexTest(TestNode ** root)166 void addURegexTest(TestNode** root)
167 {
168     addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
169     addTest(root, &TestBug4315,   "regex/TestBug4315");
170     addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
171     addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
172     addTest(root, &TestBug8421,   "regex/TestBug8421");
173     addTest(root, &TestBug10815,   "regex/TestBug10815");
174 }
175 
176 /*
177  * Call back function and context struct used for testing
178  *    regular expression user callbacks.  This test is mostly the same as
179  *   the corresponding C++ test in intltest.
180  */
181 typedef struct callBackContext {
182     int32_t          maxCalls;
183     int32_t          numCalls;
184     int32_t          lastSteps;
185 } callBackContext;
186 
187 static UBool U_EXPORT2 U_CALLCONV
TestCallbackFn(const void * context,int32_t steps)188 TestCallbackFn(const void *context, int32_t steps) {
189   callBackContext  *info = (callBackContext *)context;
190   if (info->lastSteps+1 != steps) {
191       log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
192   }
193   info->lastSteps = steps;
194   info->numCalls++;
195   return (info->numCalls < info->maxCalls);
196 }
197 
198 /*
199  *   Regular Expression C API Tests
200  */
TestRegexCAPI(void)201 static void TestRegexCAPI(void) {
202     UErrorCode           status = U_ZERO_ERROR;
203     URegularExpression  *re;
204     UChar                pat[200];
205     UChar               *minus1;
206 
207     memset(&minus1, -1, sizeof(minus1));
208 
209     /* Mimimalist open/close */
210     u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
211     re = uregex_open(pat, -1, 0, 0, &status);
212     if (U_FAILURE(status)) {
213          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
214          return;
215     }
216     uregex_close(re);
217 
218     /* Open with all flag values set */
219     status = U_ZERO_ERROR;
220     re = uregex_open(pat, -1,
221         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
222         0, &status);
223     TEST_ASSERT_SUCCESS(status);
224     uregex_close(re);
225 
226     /* Open with an invalid flag */
227     status = U_ZERO_ERROR;
228     re = uregex_open(pat, -1, 0x40000000, 0, &status);
229     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
230     uregex_close(re);
231 
232     /* Open with an unimplemented flag */
233     status = U_ZERO_ERROR;
234     re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
235     TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
236     uregex_close(re);
237 
238     /* openC with an invalid parameter */
239     status = U_ZERO_ERROR;
240     re = uregex_openC(NULL,
241         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
242     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
243 
244     /* openC with an invalid parameter */
245     status = U_USELESS_COLLATOR_ERROR;
246     re = uregex_openC(NULL,
247         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
248     TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
249 
250     /* openC   open from a C string */
251     {
252         const UChar   *p;
253         int32_t  len;
254         status = U_ZERO_ERROR;
255         re = uregex_openC("abc*", 0, 0, &status);
256         TEST_ASSERT_SUCCESS(status);
257         p = uregex_pattern(re, &len, &status);
258         TEST_ASSERT_SUCCESS(status);
259 
260         /* The TEST_ASSERT_SUCCESS above should change too... */
261         if(U_SUCCESS(status)) {
262             u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
263             TEST_ASSERT(u_strcmp(pat, p) == 0);
264             TEST_ASSERT(len==(int32_t)strlen("abc*"));
265         }
266 
267         uregex_close(re);
268 
269         /*  TODO:  Open with ParseError parameter */
270     }
271 
272     /*
273      *  clone
274      */
275     {
276         URegularExpression *clone1;
277         URegularExpression *clone2;
278         URegularExpression *clone3;
279         UChar  testString1[30];
280         UChar  testString2[30];
281         UBool  result;
282 
283 
284         status = U_ZERO_ERROR;
285         re = uregex_openC("abc*", 0, 0, &status);
286         TEST_ASSERT_SUCCESS(status);
287         clone1 = uregex_clone(re, &status);
288         TEST_ASSERT_SUCCESS(status);
289         TEST_ASSERT(clone1 != NULL);
290 
291         status = U_ZERO_ERROR;
292         clone2 = uregex_clone(re, &status);
293         TEST_ASSERT_SUCCESS(status);
294         TEST_ASSERT(clone2 != NULL);
295         uregex_close(re);
296 
297         status = U_ZERO_ERROR;
298         clone3 = uregex_clone(clone2, &status);
299         TEST_ASSERT_SUCCESS(status);
300         TEST_ASSERT(clone3 != NULL);
301 
302         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
303         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
304 
305         status = U_ZERO_ERROR;
306         uregex_setText(clone1, testString1, -1, &status);
307         TEST_ASSERT_SUCCESS(status);
308         result = uregex_lookingAt(clone1, 0, &status);
309         TEST_ASSERT_SUCCESS(status);
310         TEST_ASSERT(result==TRUE);
311 
312         status = U_ZERO_ERROR;
313         uregex_setText(clone2, testString2, -1, &status);
314         TEST_ASSERT_SUCCESS(status);
315         result = uregex_lookingAt(clone2, 0, &status);
316         TEST_ASSERT_SUCCESS(status);
317         TEST_ASSERT(result==FALSE);
318         result = uregex_find(clone2, 0, &status);
319         TEST_ASSERT_SUCCESS(status);
320         TEST_ASSERT(result==TRUE);
321 
322         uregex_close(clone1);
323         uregex_close(clone2);
324         uregex_close(clone3);
325 
326     }
327 
328     /*
329      *  pattern()
330     */
331     {
332         const UChar  *resultPat;
333         int32_t       resultLen;
334         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
335         status = U_ZERO_ERROR;
336         re = uregex_open(pat, -1, 0, NULL, &status);
337         resultPat = uregex_pattern(re, &resultLen, &status);
338         TEST_ASSERT_SUCCESS(status);
339 
340         /* The TEST_ASSERT_SUCCESS above should change too... */
341         if (U_SUCCESS(status)) {
342             TEST_ASSERT(resultLen == -1);
343             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
344         }
345 
346         uregex_close(re);
347 
348         status = U_ZERO_ERROR;
349         re = uregex_open(pat, 3, 0, NULL, &status);
350         resultPat = uregex_pattern(re, &resultLen, &status);
351         TEST_ASSERT_SUCCESS(status);
352         TEST_ASSERT_SUCCESS(status);
353 
354         /* The TEST_ASSERT_SUCCESS above should change too... */
355         if (U_SUCCESS(status)) {
356             TEST_ASSERT(resultLen == 3);
357             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
358             TEST_ASSERT(u_strlen(resultPat) == 3);
359         }
360 
361         uregex_close(re);
362     }
363 
364     /*
365      *  flags()
366      */
367     {
368         int32_t  t;
369 
370         status = U_ZERO_ERROR;
371         re = uregex_open(pat, -1, 0, NULL, &status);
372         t  = uregex_flags(re, &status);
373         TEST_ASSERT_SUCCESS(status);
374         TEST_ASSERT(t == 0);
375         uregex_close(re);
376 
377         status = U_ZERO_ERROR;
378         re = uregex_open(pat, -1, 0, NULL, &status);
379         t  = uregex_flags(re, &status);
380         TEST_ASSERT_SUCCESS(status);
381         TEST_ASSERT(t == 0);
382         uregex_close(re);
383 
384         status = U_ZERO_ERROR;
385         re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
386         t  = uregex_flags(re, &status);
387         TEST_ASSERT_SUCCESS(status);
388         TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
389         uregex_close(re);
390     }
391 
392     /*
393      *  setText() and lookingAt()
394      */
395     {
396         UChar  text1[50];
397         UChar  text2[50];
398         UBool  result;
399 
400         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
401         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
402         status = U_ZERO_ERROR;
403         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
404         re = uregex_open(pat, -1, 0, NULL, &status);
405         TEST_ASSERT_SUCCESS(status);
406 
407         /* Operation before doing a setText should fail... */
408         status = U_ZERO_ERROR;
409         uregex_lookingAt(re, 0, &status);
410         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
411 
412         status = U_ZERO_ERROR;
413         uregex_setText(re, text1, -1, &status);
414         result = uregex_lookingAt(re, 0, &status);
415         TEST_ASSERT(result == TRUE);
416         TEST_ASSERT_SUCCESS(status);
417 
418         status = U_ZERO_ERROR;
419         uregex_setText(re, text2, -1, &status);
420         result = uregex_lookingAt(re, 0, &status);
421         TEST_ASSERT(result == FALSE);
422         TEST_ASSERT_SUCCESS(status);
423 
424         status = U_ZERO_ERROR;
425         uregex_setText(re, text1, -1, &status);
426         result = uregex_lookingAt(re, 0, &status);
427         TEST_ASSERT(result == TRUE);
428         TEST_ASSERT_SUCCESS(status);
429 
430         status = U_ZERO_ERROR;
431         uregex_setText(re, text1, 5, &status);
432         result = uregex_lookingAt(re, 0, &status);
433         TEST_ASSERT(result == FALSE);
434         TEST_ASSERT_SUCCESS(status);
435 
436         status = U_ZERO_ERROR;
437         uregex_setText(re, text1, 6, &status);
438         result = uregex_lookingAt(re, 0, &status);
439         TEST_ASSERT(result == TRUE);
440         TEST_ASSERT_SUCCESS(status);
441 
442         uregex_close(re);
443     }
444 
445 
446     /*
447      *  getText()
448      */
449     {
450         UChar    text1[50];
451         UChar    text2[50];
452         const UChar   *result;
453         int32_t  textLength;
454 
455         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
456         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
457         status = U_ZERO_ERROR;
458         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
459         re = uregex_open(pat, -1, 0, NULL, &status);
460 
461         uregex_setText(re, text1, -1, &status);
462         result = uregex_getText(re, &textLength, &status);
463         TEST_ASSERT(result == text1);
464         TEST_ASSERT(textLength == -1);
465         TEST_ASSERT_SUCCESS(status);
466 
467         status = U_ZERO_ERROR;
468         uregex_setText(re, text2, 7, &status);
469         result = uregex_getText(re, &textLength, &status);
470         TEST_ASSERT(result == text2);
471         TEST_ASSERT(textLength == 7);
472         TEST_ASSERT_SUCCESS(status);
473 
474         status = U_ZERO_ERROR;
475         uregex_setText(re, text2, 4, &status);
476         result = uregex_getText(re, &textLength, &status);
477         TEST_ASSERT(result == text2);
478         TEST_ASSERT(textLength == 4);
479         TEST_ASSERT_SUCCESS(status);
480         uregex_close(re);
481     }
482 
483     /*
484      *  matches()
485      */
486     {
487         UChar   text1[50];
488         UBool   result;
489         int     len;
490         UChar   nullString[] = {0,0,0};
491 
492         u_uastrncpy(text1, "abcccde",  UPRV_LENGTHOF(text1));
493         status = U_ZERO_ERROR;
494         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
495         re = uregex_open(pat, -1, 0, NULL, &status);
496 
497         uregex_setText(re, text1, -1, &status);
498         result = uregex_matches(re, 0, &status);
499         TEST_ASSERT(result == FALSE);
500         TEST_ASSERT_SUCCESS(status);
501 
502         status = U_ZERO_ERROR;
503         uregex_setText(re, text1, 6, &status);
504         result = uregex_matches(re, 0, &status);
505         TEST_ASSERT(result == TRUE);
506         TEST_ASSERT_SUCCESS(status);
507 
508         status = U_ZERO_ERROR;
509         uregex_setText(re, text1, 6, &status);
510         result = uregex_matches(re, 1, &status);
511         TEST_ASSERT(result == FALSE);
512         TEST_ASSERT_SUCCESS(status);
513         uregex_close(re);
514 
515         status = U_ZERO_ERROR;
516         re = uregex_openC(".?", 0, NULL, &status);
517         uregex_setText(re, text1, -1, &status);
518         len = u_strlen(text1);
519         result = uregex_matches(re, len, &status);
520         TEST_ASSERT(result == TRUE);
521         TEST_ASSERT_SUCCESS(status);
522 
523         status = U_ZERO_ERROR;
524         uregex_setText(re, nullString, -1, &status);
525         TEST_ASSERT_SUCCESS(status);
526         result = uregex_matches(re, 0, &status);
527         TEST_ASSERT(result == TRUE);
528         TEST_ASSERT_SUCCESS(status);
529         uregex_close(re);
530     }
531 
532 
533     /*
534      *  lookingAt()    Used in setText test.
535      */
536 
537 
538     /*
539      *  find(), findNext, start, end, reset
540      */
541     {
542         UChar    text1[50];
543         UBool    result;
544         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
545         status = U_ZERO_ERROR;
546         re = uregex_openC("rx", 0, NULL, &status);
547 
548         uregex_setText(re, text1, -1, &status);
549         result = uregex_find(re, 0, &status);
550         TEST_ASSERT(result == TRUE);
551         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
552         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
553         TEST_ASSERT_SUCCESS(status);
554 
555         result = uregex_find(re, 9, &status);
556         TEST_ASSERT(result == TRUE);
557         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
558         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
559         TEST_ASSERT_SUCCESS(status);
560 
561         result = uregex_find(re, 14, &status);
562         TEST_ASSERT(result == FALSE);
563         TEST_ASSERT_SUCCESS(status);
564 
565         status = U_ZERO_ERROR;
566         uregex_reset(re, 0, &status);
567 
568         result = uregex_findNext(re, &status);
569         TEST_ASSERT(result == TRUE);
570         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
571         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
572         TEST_ASSERT_SUCCESS(status);
573 
574         result = uregex_findNext(re, &status);
575         TEST_ASSERT(result == TRUE);
576         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
577         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
578         TEST_ASSERT_SUCCESS(status);
579 
580         status = U_ZERO_ERROR;
581         uregex_reset(re, 12, &status);
582 
583         result = uregex_findNext(re, &status);
584         TEST_ASSERT(result == TRUE);
585         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
586         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
587         TEST_ASSERT_SUCCESS(status);
588 
589         result = uregex_findNext(re, &status);
590         TEST_ASSERT(result == FALSE);
591         TEST_ASSERT_SUCCESS(status);
592 
593         uregex_close(re);
594     }
595 
596     /*
597      *  groupCount
598      */
599     {
600         int32_t result;
601 
602         status = U_ZERO_ERROR;
603         re = uregex_openC("abc", 0, NULL, &status);
604         result = uregex_groupCount(re, &status);
605         TEST_ASSERT_SUCCESS(status);
606         TEST_ASSERT(result == 0);
607         uregex_close(re);
608 
609         status = U_ZERO_ERROR;
610         re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
611         result = uregex_groupCount(re, &status);
612         TEST_ASSERT_SUCCESS(status);
613         TEST_ASSERT(result == 3);
614         uregex_close(re);
615 
616     }
617 
618 
619     /*
620      *  group()
621      */
622     {
623         UChar    text1[80];
624         UChar    buf[80];
625         UBool    result;
626         int32_t  resultSz;
627         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
628 
629         status = U_ZERO_ERROR;
630         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
631         TEST_ASSERT_SUCCESS(status);
632 
633 
634         uregex_setText(re, text1, -1, &status);
635         result = uregex_find(re, 0, &status);
636         TEST_ASSERT(result==TRUE);
637 
638         /*  Capture Group 0, the full match.  Should succeed.  */
639         status = U_ZERO_ERROR;
640         resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
641         TEST_ASSERT_SUCCESS(status);
642         TEST_ASSERT_STRING("abc interior def", buf, TRUE);
643         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
644 
645         /*  Capture group #1.  Should succeed. */
646         status = U_ZERO_ERROR;
647         resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
648         TEST_ASSERT_SUCCESS(status);
649         TEST_ASSERT_STRING(" interior ", buf, TRUE);
650         TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
651 
652         /*  Capture group out of range.  Error. */
653         status = U_ZERO_ERROR;
654         uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
655         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
656 
657         /* NULL buffer, pure pre-flight */
658         status = U_ZERO_ERROR;
659         resultSz = uregex_group(re, 0, NULL, 0, &status);
660         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
661         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
662 
663         /* Too small buffer, truncated string */
664         status = U_ZERO_ERROR;
665         memset(buf, -1, sizeof(buf));
666         resultSz = uregex_group(re, 0, buf, 5, &status);
667         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
668         TEST_ASSERT_STRING("abc i", buf, FALSE);
669         TEST_ASSERT(buf[5] == (UChar)0xffff);
670         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
671 
672         /* Output string just fits buffer, no NUL term. */
673         status = U_ZERO_ERROR;
674         resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
675         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
676         TEST_ASSERT_STRING("abc interior def", buf, FALSE);
677         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
678         TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
679 
680         uregex_close(re);
681 
682     }
683 
684     /*
685      *  Regions
686      */
687 
688 
689         /* SetRegion(), getRegion() do something  */
690         TEST_SETUP(".*", "0123456789ABCDEF", 0)
691         UChar resultString[40];
692         TEST_ASSERT(uregex_regionStart(re, &status) == 0);
693         TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
694         uregex_setRegion(re, 3, 6, &status);
695         TEST_ASSERT(uregex_regionStart(re, &status) == 3);
696         TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
697         TEST_ASSERT(uregex_findNext(re, &status));
698         TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3)
699         TEST_ASSERT_STRING("345", resultString, TRUE);
700         TEST_TEARDOWN;
701 
702         /* find(start=-1) uses regions   */
703         TEST_SETUP(".*", "0123456789ABCDEF", 0);
704         uregex_setRegion(re, 4, 6, &status);
705         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
706         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
707         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
708         TEST_TEARDOWN;
709 
710         /* find (start >=0) does not use regions   */
711         TEST_SETUP(".*", "0123456789ABCDEF", 0);
712         uregex_setRegion(re, 4, 6, &status);
713         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
714         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
715         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
716         TEST_TEARDOWN;
717 
718         /* findNext() obeys regions    */
719         TEST_SETUP(".", "0123456789ABCDEF", 0);
720         uregex_setRegion(re, 4, 6, &status);
721         TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
722         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
723         TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
724         TEST_ASSERT(uregex_start(re, 0, &status) == 5);
725         TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
726         TEST_TEARDOWN;
727 
728         /* matches(start=-1) uses regions                                           */
729         /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
730         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
731         uregex_setRegion(re, 4, 6, &status);
732         TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
733         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
734         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
735         TEST_TEARDOWN;
736 
737         /* matches (start >=0) does not use regions       */
738         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
739         uregex_setRegion(re, 4, 6, &status);
740         TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
741         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
742         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
743         TEST_TEARDOWN;
744 
745         /* lookingAt(start=-1) uses regions                                         */
746         /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
747         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
748         uregex_setRegion(re, 4, 6, &status);
749         TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
750         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
751         TEST_ASSERT(uregex_end(re, 0, &status) == 4);
752         TEST_TEARDOWN;
753 
754         /* lookingAt (start >=0) does not use regions  */
755         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
756         uregex_setRegion(re, 4, 6, &status);
757         TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
758         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
759         TEST_ASSERT(uregex_end(re, 0, &status) == 0);
760         TEST_TEARDOWN;
761 
762         /* hitEnd()       */
763         TEST_SETUP("[a-f]*", "abcdefghij", 0);
764         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
765         TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
766         TEST_TEARDOWN;
767 
768         TEST_SETUP("[a-f]*", "abcdef", 0);
769         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
770         TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
771         TEST_TEARDOWN;
772 
773         /* requireEnd   */
774         TEST_SETUP("abcd", "abcd", 0);
775         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
776         TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
777         TEST_TEARDOWN;
778 
779         TEST_SETUP("abcd$", "abcd", 0);
780         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
781         TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
782         TEST_TEARDOWN;
783 
784         /* anchoringBounds        */
785         TEST_SETUP("abc$", "abcdef", 0);
786         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
787         uregex_useAnchoringBounds(re, FALSE, &status);
788         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
789 
790         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
791         uregex_useAnchoringBounds(re, TRUE, &status);
792         uregex_setRegion(re, 0, 3, &status);
793         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
794         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
795         TEST_TEARDOWN;
796 
797         /* Transparent Bounds      */
798         TEST_SETUP("abc(?=def)", "abcdef", 0);
799         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
800         uregex_useTransparentBounds(re, TRUE, &status);
801         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
802 
803         uregex_useTransparentBounds(re, FALSE, &status);
804         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
805         uregex_setRegion(re, 0, 3, &status);
806         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
807         uregex_useTransparentBounds(re, TRUE, &status);
808         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
809         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
810         TEST_TEARDOWN;
811 
812 
813     /*
814      *  replaceFirst()
815      */
816     {
817         UChar    text1[80];
818         UChar    text2[80];
819         UChar    replText[80];
820         UChar    buf[80];
821         int32_t  resultSz;
822         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
823         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
824         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
825 
826         status = U_ZERO_ERROR;
827         re = uregex_openC("x(.*?)x", 0, NULL, &status);
828         TEST_ASSERT_SUCCESS(status);
829 
830         /*  Normal case, with match */
831         uregex_setText(re, text1, -1, &status);
832         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
833         TEST_ASSERT_SUCCESS(status);
834         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
835         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
836 
837         /* No match.  Text should copy to output with no changes.  */
838         status = U_ZERO_ERROR;
839         uregex_setText(re, text2, -1, &status);
840         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
841         TEST_ASSERT_SUCCESS(status);
842         TEST_ASSERT_STRING("No match here.", buf, TRUE);
843         TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
844 
845         /*  Match, output just fills buffer, no termination warning. */
846         status = U_ZERO_ERROR;
847         uregex_setText(re, text1, -1, &status);
848         memset(buf, -1, sizeof(buf));
849         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
850         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
851         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
852         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
853         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
854 
855         /* Do the replaceFirst again, without first resetting anything.
856          *  Should give the same results.
857          */
858         status = U_ZERO_ERROR;
859         memset(buf, -1, sizeof(buf));
860         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
861         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
862         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
863         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
864         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
865 
866         /* NULL buffer, zero buffer length */
867         status = U_ZERO_ERROR;
868         resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
869         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
870         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
871 
872         /* Buffer too small by one */
873         status = U_ZERO_ERROR;
874         memset(buf, -1, sizeof(buf));
875         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
876         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
877         TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
878         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
879         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
880 
881         uregex_close(re);
882     }
883 
884 
885     /*
886      *  replaceAll()
887      */
888     {
889         UChar    text1[80];          /*  "Replace xaax x1x x...x." */
890         UChar    text2[80];          /*  "No match Here"           */
891         UChar    replText[80];       /*  "<$1>"                    */
892         UChar    replText2[80];      /*  "<<$1>>"                  */
893         const char * pattern = "x(.*?)x";
894         const char * expectedResult = "Replace <aa> <1> <...>.";
895         const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
896         UChar    buf[80];
897         int32_t  resultSize;
898         int32_t  expectedResultSize;
899         int32_t  expectedResultSize2;
900         int32_t  i;
901 
902         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
903         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
904         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
905         u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
906         expectedResultSize = strlen(expectedResult);
907         expectedResultSize2 = strlen(expectedResult2);
908 
909         status = U_ZERO_ERROR;
910         re = uregex_openC(pattern, 0, NULL, &status);
911         TEST_ASSERT_SUCCESS(status);
912 
913         /*  Normal case, with match */
914         uregex_setText(re, text1, -1, &status);
915         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
916         TEST_ASSERT_SUCCESS(status);
917         TEST_ASSERT_STRING(expectedResult, buf, TRUE);
918         TEST_ASSERT(resultSize == expectedResultSize);
919 
920         /* No match.  Text should copy to output with no changes.  */
921         status = U_ZERO_ERROR;
922         uregex_setText(re, text2, -1, &status);
923         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
924         TEST_ASSERT_SUCCESS(status);
925         TEST_ASSERT_STRING("No match here.", buf, TRUE);
926         TEST_ASSERT(resultSize == u_strlen(text2));
927 
928         /*  Match, output just fills buffer, no termination warning. */
929         status = U_ZERO_ERROR;
930         uregex_setText(re, text1, -1, &status);
931         memset(buf, -1, sizeof(buf));
932         resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
933         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
934         TEST_ASSERT_STRING(expectedResult, buf, FALSE);
935         TEST_ASSERT(resultSize == expectedResultSize);
936         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
937 
938         /* Do the replaceFirst again, without first resetting anything.
939          *  Should give the same results.
940          */
941         status = U_ZERO_ERROR;
942         memset(buf, -1, sizeof(buf));
943         resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
944         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
945         TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
946         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
947         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
948 
949         /* NULL buffer, zero buffer length */
950         status = U_ZERO_ERROR;
951         resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
952         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
953         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
954 
955         /* Buffer too small.  Try every size, which will tickle edge cases
956          * in uregex_appendReplacement (used by replaceAll)   */
957         for (i=0; i<expectedResultSize; i++) {
958             char  expected[80];
959             status = U_ZERO_ERROR;
960             memset(buf, -1, sizeof(buf));
961             resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
962             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
963             strcpy(expected, expectedResult);
964             expected[i] = 0;
965             TEST_ASSERT_STRING(expected, buf, FALSE);
966             TEST_ASSERT(resultSize == expectedResultSize);
967             TEST_ASSERT(buf[i] == (UChar)0xffff);
968         }
969 
970         /* Buffer too small.  Same as previous test, except this time the replacement
971          * text is longer than the match capture group, making the length of the complete
972          * replacement longer than the original string.
973          */
974         for (i=0; i<expectedResultSize2; i++) {
975             char  expected[80];
976             status = U_ZERO_ERROR;
977             memset(buf, -1, sizeof(buf));
978             resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
979             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
980             strcpy(expected, expectedResult2);
981             expected[i] = 0;
982             TEST_ASSERT_STRING(expected, buf, FALSE);
983             TEST_ASSERT(resultSize == expectedResultSize2);
984             TEST_ASSERT(buf[i] == (UChar)0xffff);
985         }
986 
987 
988         uregex_close(re);
989     }
990 
991 
992     /*
993      *  appendReplacement()
994      */
995     {
996         UChar    text[100];
997         UChar    repl[100];
998         UChar    buf[100];
999         UChar   *bufPtr;
1000         int32_t  bufCap;
1001 
1002 
1003         status = U_ZERO_ERROR;
1004         re = uregex_openC(".*", 0, 0, &status);
1005         TEST_ASSERT_SUCCESS(status);
1006 
1007         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1008         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1009         uregex_setText(re, text, -1, &status);
1010 
1011         /* match covers whole target string */
1012         uregex_find(re, 0, &status);
1013         TEST_ASSERT_SUCCESS(status);
1014         bufPtr = buf;
1015         bufCap = UPRV_LENGTHOF(buf);
1016         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1017         TEST_ASSERT_SUCCESS(status);
1018         TEST_ASSERT_STRING("some other", buf, TRUE);
1019 
1020         /* Match has \u \U escapes */
1021         uregex_find(re, 0, &status);
1022         TEST_ASSERT_SUCCESS(status);
1023         bufPtr = buf;
1024         bufCap = UPRV_LENGTHOF(buf);
1025         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1026         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1027         TEST_ASSERT_SUCCESS(status);
1028         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1029 
1030         /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1031         status = U_ZERO_ERROR;
1032         uregex_find(re, 0, &status);
1033         TEST_ASSERT_SUCCESS(status);
1034         bufPtr = buf;
1035         status = U_BUFFER_OVERFLOW_ERROR;
1036         uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1037         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1038 
1039         uregex_close(re);
1040     }
1041 
1042 
1043     /*
1044      *  appendTail().   Checked in ReplaceFirst(), replaceAll().
1045      */
1046 
1047     /*
1048      *  split()
1049      */
1050     {
1051         UChar    textToSplit[80];
1052         UChar    text2[80];
1053         UChar    buf[200];
1054         UChar    *fields[10];
1055         int32_t  numFields;
1056         int32_t  requiredCapacity;
1057         int32_t  spaceNeeded;
1058         int32_t  sz;
1059 
1060         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1061         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1062 
1063         status = U_ZERO_ERROR;
1064         re = uregex_openC(":", 0, NULL, &status);
1065 
1066 
1067         /*  Simple split */
1068 
1069         uregex_setText(re, textToSplit, -1, &status);
1070         TEST_ASSERT_SUCCESS(status);
1071 
1072         /* The TEST_ASSERT_SUCCESS call above should change too... */
1073         if (U_SUCCESS(status)) {
1074             memset(fields, -1, sizeof(fields));
1075             numFields =
1076                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1077             TEST_ASSERT_SUCCESS(status);
1078 
1079             /* The TEST_ASSERT_SUCCESS call above should change too... */
1080             if(U_SUCCESS(status)) {
1081                 TEST_ASSERT(numFields == 3);
1082                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1083                 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1084                 TEST_ASSERT_STRING("  third", fields[2], TRUE);
1085                 TEST_ASSERT(fields[3] == NULL);
1086 
1087                 spaceNeeded = u_strlen(textToSplit) -
1088                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1089                             numFields;          /* Each field gets a NUL terminator */
1090 
1091                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1092             }
1093         }
1094 
1095         uregex_close(re);
1096 
1097 
1098         /*  Split with too few output strings available */
1099         status = U_ZERO_ERROR;
1100         re = uregex_openC(":", 0, NULL, &status);
1101         uregex_setText(re, textToSplit, -1, &status);
1102         TEST_ASSERT_SUCCESS(status);
1103 
1104         /* The TEST_ASSERT_SUCCESS call above should change too... */
1105         if(U_SUCCESS(status)) {
1106             memset(fields, -1, sizeof(fields));
1107             numFields =
1108                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1109             TEST_ASSERT_SUCCESS(status);
1110 
1111             /* The TEST_ASSERT_SUCCESS call above should change too... */
1112             if(U_SUCCESS(status)) {
1113                 TEST_ASSERT(numFields == 2);
1114                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1115                 TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
1116                 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1117 
1118                 spaceNeeded = u_strlen(textToSplit) -
1119                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1120                             numFields;          /* Each field gets a NUL terminator */
1121 
1122                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1123 
1124                 /* Split with a range of output buffer sizes.  */
1125                 spaceNeeded = u_strlen(textToSplit) -
1126                     (numFields - 1)  +  /* Field delimiters do not appear in output */
1127                     numFields;          /* Each field gets a NUL terminator */
1128 
1129                 for (sz=0; sz < spaceNeeded+1; sz++) {
1130                     memset(fields, -1, sizeof(fields));
1131                     status = U_ZERO_ERROR;
1132                     numFields =
1133                         uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1134                     if (sz >= spaceNeeded) {
1135                         TEST_ASSERT_SUCCESS(status);
1136                         TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1137                         TEST_ASSERT_STRING(" second", fields[1], TRUE);
1138                         TEST_ASSERT_STRING("  third", fields[2], TRUE);
1139                     } else {
1140                         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1141                     }
1142                     TEST_ASSERT(numFields == 3);
1143                     TEST_ASSERT(fields[3] == NULL);
1144                     TEST_ASSERT(spaceNeeded == requiredCapacity);
1145                 }
1146             }
1147         }
1148 
1149         uregex_close(re);
1150     }
1151 
1152 
1153 
1154 
1155     /* Split(), part 2.  Patterns with capture groups.  The capture group text
1156      *                   comes out as additional fields.  */
1157     {
1158         UChar    textToSplit[80];
1159         UChar    buf[200];
1160         UChar    *fields[10];
1161         int32_t  numFields;
1162         int32_t  requiredCapacity;
1163         int32_t  spaceNeeded;
1164         int32_t  sz;
1165 
1166         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
1167 
1168         status = U_ZERO_ERROR;
1169         re = uregex_openC("<(.*?)>", 0, NULL, &status);
1170 
1171         uregex_setText(re, textToSplit, -1, &status);
1172         TEST_ASSERT_SUCCESS(status);
1173 
1174         /* The TEST_ASSERT_SUCCESS call above should change too... */
1175         if(U_SUCCESS(status)) {
1176             memset(fields, -1, sizeof(fields));
1177             numFields =
1178                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1179             TEST_ASSERT_SUCCESS(status);
1180 
1181             /* The TEST_ASSERT_SUCCESS call above should change too... */
1182             if(U_SUCCESS(status)) {
1183                 TEST_ASSERT(numFields == 5);
1184                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1185                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1186                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1187                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1188                 TEST_ASSERT_STRING("  third", fields[4], TRUE);
1189                 TEST_ASSERT(fields[5] == NULL);
1190                 spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1191                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1192             }
1193         }
1194 
1195         /*  Split with too few output strings available (2) */
1196         status = U_ZERO_ERROR;
1197         memset(fields, -1, sizeof(fields));
1198         numFields =
1199             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1200         TEST_ASSERT_SUCCESS(status);
1201 
1202         /* The TEST_ASSERT_SUCCESS call above should change too... */
1203         if(U_SUCCESS(status)) {
1204             TEST_ASSERT(numFields == 2);
1205             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1206             TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
1207             TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1208 
1209             spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
1210             TEST_ASSERT(spaceNeeded == requiredCapacity);
1211         }
1212 
1213         /*  Split with too few output strings available (3) */
1214         status = U_ZERO_ERROR;
1215         memset(fields, -1, sizeof(fields));
1216         numFields =
1217             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
1218         TEST_ASSERT_SUCCESS(status);
1219 
1220         /* The TEST_ASSERT_SUCCESS call above should change too... */
1221         if(U_SUCCESS(status)) {
1222             TEST_ASSERT(numFields == 3);
1223             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1224             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1225             TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
1226             TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1227 
1228             spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
1229             TEST_ASSERT(spaceNeeded == requiredCapacity);
1230         }
1231 
1232         /*  Split with just enough output strings available (5) */
1233         status = U_ZERO_ERROR;
1234         memset(fields, -1, sizeof(fields));
1235         numFields =
1236             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
1237         TEST_ASSERT_SUCCESS(status);
1238 
1239         /* The TEST_ASSERT_SUCCESS call above should change too... */
1240         if(U_SUCCESS(status)) {
1241             TEST_ASSERT(numFields == 5);
1242             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1243             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1244             TEST_ASSERT_STRING(" second", fields[2], TRUE);
1245             TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1246             TEST_ASSERT_STRING("  third", fields[4], TRUE);
1247             TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1248 
1249             spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1250             TEST_ASSERT(spaceNeeded == requiredCapacity);
1251         }
1252 
1253         /* Split, end of text is a field delimiter.   */
1254         status = U_ZERO_ERROR;
1255         sz = strlen("first <tag-a> second<tag-b>");
1256         uregex_setText(re, textToSplit, sz, &status);
1257         TEST_ASSERT_SUCCESS(status);
1258 
1259         /* The TEST_ASSERT_SUCCESS call above should change too... */
1260         if(U_SUCCESS(status)) {
1261             memset(fields, -1, sizeof(fields));
1262             numFields =
1263                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
1264             TEST_ASSERT_SUCCESS(status);
1265 
1266             /* The TEST_ASSERT_SUCCESS call above should change too... */
1267             if(U_SUCCESS(status)) {
1268                 TEST_ASSERT(numFields == 5);
1269                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1270                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1271                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1272                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1273                 TEST_ASSERT_STRING("",        fields[4], TRUE);
1274                 TEST_ASSERT(fields[5] == NULL);
1275                 TEST_ASSERT(fields[8] == NULL);
1276                 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1277                 spaceNeeded = strlen("first .tag-a. second.tag-b..");  /* "." at NUL positions */
1278                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1279             }
1280         }
1281 
1282         uregex_close(re);
1283     }
1284 
1285     /*
1286      * set/getTimeLimit
1287      */
1288      TEST_SETUP("abc$", "abcdef", 0);
1289      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1290      uregex_setTimeLimit(re, 1000, &status);
1291      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1292      TEST_ASSERT_SUCCESS(status);
1293      uregex_setTimeLimit(re, -1, &status);
1294      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1295      status = U_ZERO_ERROR;
1296      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1297      TEST_TEARDOWN;
1298 
1299      /*
1300       * set/get Stack Limit
1301       */
1302      TEST_SETUP("abc$", "abcdef", 0);
1303      TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1304      uregex_setStackLimit(re, 40000, &status);
1305      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1306      TEST_ASSERT_SUCCESS(status);
1307      uregex_setStackLimit(re, -1, &status);
1308      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1309      status = U_ZERO_ERROR;
1310      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1311      TEST_TEARDOWN;
1312 
1313 
1314      /*
1315       * Get/Set callback functions
1316       *     This test is copied from intltest regex/Callbacks
1317       *     The pattern and test data will run long enough to cause the callback
1318       *       to be invoked.  The nested '+' operators give exponential time
1319       *       behavior with increasing string length.
1320       */
1321      TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1322      callBackContext cbInfo = {4, 0, 0};
1323      const void     *pContext   = &cbInfo;
1324      URegexMatchCallback    *returnedFn = &TestCallbackFn;
1325 
1326      /*  Getting the callback fn when it hasn't been set must return NULL  */
1327      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1328      TEST_ASSERT_SUCCESS(status);
1329      TEST_ASSERT(returnedFn == NULL);
1330      TEST_ASSERT(pContext == NULL);
1331 
1332      /* Set thecallback and do a match.                                   */
1333      /* The callback function should record that it has been called.      */
1334      uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1335      TEST_ASSERT_SUCCESS(status);
1336      TEST_ASSERT(cbInfo.numCalls == 0);
1337      TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1338      TEST_ASSERT_SUCCESS(status);
1339      TEST_ASSERT(cbInfo.numCalls > 0);
1340 
1341      /* Getting the callback should return the values that were set above.  */
1342      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1343      TEST_ASSERT(returnedFn == &TestCallbackFn);
1344      TEST_ASSERT(pContext == &cbInfo);
1345 
1346      TEST_TEARDOWN;
1347 }
1348 
1349 
1350 
TestBug4315(void)1351 static void TestBug4315(void) {
1352     UErrorCode      theICUError = U_ZERO_ERROR;
1353     URegularExpression *theRegEx;
1354     UChar           *textBuff;
1355     const char      *thePattern;
1356     UChar            theString[100];
1357     UChar           *destFields[24];
1358     int32_t         neededLength1;
1359     int32_t         neededLength2;
1360 
1361     int32_t         wordCount = 0;
1362     int32_t         destFieldsSize = 24;
1363 
1364     thePattern  = "ck ";
1365     u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1366 
1367     /* open a regex */
1368     theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1369     TEST_ASSERT_SUCCESS(theICUError);
1370 
1371     /* set the input string */
1372     uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1373     TEST_ASSERT_SUCCESS(theICUError);
1374 
1375     /* split */
1376     /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1377      *  error occurs! */
1378     wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1379         destFieldsSize, &theICUError);
1380 
1381     TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1382     TEST_ASSERT(wordCount==3);
1383 
1384     if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1385     {
1386         theICUError = U_ZERO_ERROR;
1387         textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1388         wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1389             destFields, destFieldsSize, &theICUError);
1390         TEST_ASSERT(wordCount==3);
1391         TEST_ASSERT_SUCCESS(theICUError);
1392         TEST_ASSERT(neededLength1 == neededLength2);
1393         TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1394         TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1395         TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1396         TEST_ASSERT(destFields[3] == NULL);
1397         free(textBuff);
1398     }
1399     uregex_close(theRegEx);
1400 }
1401 
1402 /* Based on TestRegexCAPI() */
TestUTextAPI(void)1403 static void TestUTextAPI(void) {
1404     UErrorCode           status = U_ZERO_ERROR;
1405     URegularExpression  *re;
1406     UText                patternText = UTEXT_INITIALIZER;
1407     UChar                pat[200];
1408     const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1409 
1410     /* Mimimalist open/close */
1411     utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1412     re = uregex_openUText(&patternText, 0, 0, &status);
1413     if (U_FAILURE(status)) {
1414          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1415          utext_close(&patternText);
1416          return;
1417     }
1418     uregex_close(re);
1419 
1420     /* Open with all flag values set */
1421     status = U_ZERO_ERROR;
1422     re = uregex_openUText(&patternText,
1423         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1424         0, &status);
1425     TEST_ASSERT_SUCCESS(status);
1426     uregex_close(re);
1427 
1428     /* Open with an invalid flag */
1429     status = U_ZERO_ERROR;
1430     re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1431     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1432     uregex_close(re);
1433 
1434     /* open with an invalid parameter */
1435     status = U_ZERO_ERROR;
1436     re = uregex_openUText(NULL,
1437         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1438     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1439 
1440     /*
1441      *  clone
1442      */
1443     {
1444         URegularExpression *clone1;
1445         URegularExpression *clone2;
1446         URegularExpression *clone3;
1447         UChar  testString1[30];
1448         UChar  testString2[30];
1449         UBool  result;
1450 
1451 
1452         status = U_ZERO_ERROR;
1453         re = uregex_openUText(&patternText, 0, 0, &status);
1454         TEST_ASSERT_SUCCESS(status);
1455         clone1 = uregex_clone(re, &status);
1456         TEST_ASSERT_SUCCESS(status);
1457         TEST_ASSERT(clone1 != NULL);
1458 
1459         status = U_ZERO_ERROR;
1460         clone2 = uregex_clone(re, &status);
1461         TEST_ASSERT_SUCCESS(status);
1462         TEST_ASSERT(clone2 != NULL);
1463         uregex_close(re);
1464 
1465         status = U_ZERO_ERROR;
1466         clone3 = uregex_clone(clone2, &status);
1467         TEST_ASSERT_SUCCESS(status);
1468         TEST_ASSERT(clone3 != NULL);
1469 
1470         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1471         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
1472 
1473         status = U_ZERO_ERROR;
1474         uregex_setText(clone1, testString1, -1, &status);
1475         TEST_ASSERT_SUCCESS(status);
1476         result = uregex_lookingAt(clone1, 0, &status);
1477         TEST_ASSERT_SUCCESS(status);
1478         TEST_ASSERT(result==TRUE);
1479 
1480         status = U_ZERO_ERROR;
1481         uregex_setText(clone2, testString2, -1, &status);
1482         TEST_ASSERT_SUCCESS(status);
1483         result = uregex_lookingAt(clone2, 0, &status);
1484         TEST_ASSERT_SUCCESS(status);
1485         TEST_ASSERT(result==FALSE);
1486         result = uregex_find(clone2, 0, &status);
1487         TEST_ASSERT_SUCCESS(status);
1488         TEST_ASSERT(result==TRUE);
1489 
1490         uregex_close(clone1);
1491         uregex_close(clone2);
1492         uregex_close(clone3);
1493 
1494     }
1495 
1496     /*
1497      *  pattern() and patternText()
1498      */
1499     {
1500         const UChar  *resultPat;
1501         int32_t       resultLen;
1502         UText        *resultText;
1503         const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1504         const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1505         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
1506         status = U_ZERO_ERROR;
1507 
1508         utext_openUTF8(&patternText, str_hello, -1, &status);
1509         re = uregex_open(pat, -1, 0, NULL, &status);
1510         resultPat = uregex_pattern(re, &resultLen, &status);
1511         TEST_ASSERT_SUCCESS(status);
1512 
1513         /* The TEST_ASSERT_SUCCESS above should change too... */
1514         if (U_SUCCESS(status)) {
1515             TEST_ASSERT(resultLen == -1);
1516             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1517         }
1518 
1519         resultText = uregex_patternUText(re, &status);
1520         TEST_ASSERT_SUCCESS(status);
1521         TEST_ASSERT_UTEXT(str_hello, resultText);
1522 
1523         uregex_close(re);
1524 
1525         status = U_ZERO_ERROR;
1526         re = uregex_open(pat, 3, 0, NULL, &status);
1527         resultPat = uregex_pattern(re, &resultLen, &status);
1528         TEST_ASSERT_SUCCESS(status);
1529 
1530         /* The TEST_ASSERT_SUCCESS above should change too... */
1531         if (U_SUCCESS(status)) {
1532             TEST_ASSERT(resultLen == 3);
1533             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1534             TEST_ASSERT(u_strlen(resultPat) == 3);
1535         }
1536 
1537         resultText = uregex_patternUText(re, &status);
1538         TEST_ASSERT_SUCCESS(status);
1539         TEST_ASSERT_UTEXT(str_hel, resultText);
1540 
1541         uregex_close(re);
1542     }
1543 
1544     /*
1545      *  setUText() and lookingAt()
1546      */
1547     {
1548         UText  text1 = UTEXT_INITIALIZER;
1549         UText  text2 = UTEXT_INITIALIZER;
1550         UBool  result;
1551         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1552         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1553         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1554         status = U_ZERO_ERROR;
1555         utext_openUTF8(&text1, str_abcccd, -1, &status);
1556         utext_openUTF8(&text2, str_abcccxd, -1, &status);
1557 
1558         utext_openUTF8(&patternText, str_abcd, -1, &status);
1559         re = uregex_openUText(&patternText, 0, NULL, &status);
1560         TEST_ASSERT_SUCCESS(status);
1561 
1562         /* Operation before doing a setText should fail... */
1563         status = U_ZERO_ERROR;
1564         uregex_lookingAt(re, 0, &status);
1565         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1566 
1567         status = U_ZERO_ERROR;
1568         uregex_setUText(re, &text1, &status);
1569         result = uregex_lookingAt(re, 0, &status);
1570         TEST_ASSERT(result == TRUE);
1571         TEST_ASSERT_SUCCESS(status);
1572 
1573         status = U_ZERO_ERROR;
1574         uregex_setUText(re, &text2, &status);
1575         result = uregex_lookingAt(re, 0, &status);
1576         TEST_ASSERT(result == FALSE);
1577         TEST_ASSERT_SUCCESS(status);
1578 
1579         status = U_ZERO_ERROR;
1580         uregex_setUText(re, &text1, &status);
1581         result = uregex_lookingAt(re, 0, &status);
1582         TEST_ASSERT(result == TRUE);
1583         TEST_ASSERT_SUCCESS(status);
1584 
1585         uregex_close(re);
1586         utext_close(&text1);
1587         utext_close(&text2);
1588     }
1589 
1590 
1591     /*
1592      *  getText() and getUText()
1593      */
1594     {
1595         UText  text1 = UTEXT_INITIALIZER;
1596         UText  text2 = UTEXT_INITIALIZER;
1597         UChar  text2Chars[20];
1598         UText  *resultText;
1599         const UChar   *result;
1600         int32_t  textLength;
1601         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1602         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1603         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1604 
1605 
1606         status = U_ZERO_ERROR;
1607         utext_openUTF8(&text1, str_abcccd, -1, &status);
1608         u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
1609         utext_openUChars(&text2, text2Chars, -1, &status);
1610 
1611         utext_openUTF8(&patternText, str_abcd, -1, &status);
1612         re = uregex_openUText(&patternText, 0, NULL, &status);
1613 
1614         /* First set a UText */
1615         uregex_setUText(re, &text1, &status);
1616         resultText = uregex_getUText(re, NULL, &status);
1617         TEST_ASSERT_SUCCESS(status);
1618         TEST_ASSERT(resultText != &text1);
1619         utext_setNativeIndex(resultText, 0);
1620         utext_setNativeIndex(&text1, 0);
1621         TEST_ASSERT(testUTextEqual(resultText, &text1));
1622         utext_close(resultText);
1623 
1624         result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1625         (void)result;    /* Suppress set but not used warning. */
1626         TEST_ASSERT(textLength == -1 || textLength == 6);
1627         resultText = uregex_getUText(re, NULL, &status);
1628         TEST_ASSERT_SUCCESS(status);
1629         TEST_ASSERT(resultText != &text1);
1630         utext_setNativeIndex(resultText, 0);
1631         utext_setNativeIndex(&text1, 0);
1632         TEST_ASSERT(testUTextEqual(resultText, &text1));
1633         utext_close(resultText);
1634 
1635         /* Then set a UChar * */
1636         uregex_setText(re, text2Chars, 7, &status);
1637         resultText = uregex_getUText(re, NULL, &status);
1638         TEST_ASSERT_SUCCESS(status);
1639         utext_setNativeIndex(resultText, 0);
1640         utext_setNativeIndex(&text2, 0);
1641         TEST_ASSERT(testUTextEqual(resultText, &text2));
1642         utext_close(resultText);
1643         result = uregex_getText(re, &textLength, &status);
1644         TEST_ASSERT(textLength == 7);
1645 
1646         uregex_close(re);
1647         utext_close(&text1);
1648         utext_close(&text2);
1649     }
1650 
1651     /*
1652      *  matches()
1653      */
1654     {
1655         UText   text1 = UTEXT_INITIALIZER;
1656         UBool   result;
1657         UText   nullText = UTEXT_INITIALIZER;
1658         const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1659         const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1660 
1661         status = U_ZERO_ERROR;
1662         utext_openUTF8(&text1, str_abcccde, -1, &status);
1663         utext_openUTF8(&patternText, str_abcd, -1, &status);
1664         re = uregex_openUText(&patternText, 0, NULL, &status);
1665 
1666         uregex_setUText(re, &text1, &status);
1667         result = uregex_matches(re, 0, &status);
1668         TEST_ASSERT(result == FALSE);
1669         TEST_ASSERT_SUCCESS(status);
1670         uregex_close(re);
1671 
1672         status = U_ZERO_ERROR;
1673         re = uregex_openC(".?", 0, NULL, &status);
1674         uregex_setUText(re, &text1, &status);
1675         result = uregex_matches(re, 7, &status);
1676         TEST_ASSERT(result == TRUE);
1677         TEST_ASSERT_SUCCESS(status);
1678 
1679         status = U_ZERO_ERROR;
1680         utext_openUTF8(&nullText, "", -1, &status);
1681         uregex_setUText(re, &nullText, &status);
1682         TEST_ASSERT_SUCCESS(status);
1683         result = uregex_matches(re, 0, &status);
1684         TEST_ASSERT(result == TRUE);
1685         TEST_ASSERT_SUCCESS(status);
1686 
1687         uregex_close(re);
1688         utext_close(&text1);
1689         utext_close(&nullText);
1690     }
1691 
1692 
1693     /*
1694      *  lookingAt()    Used in setText test.
1695      */
1696 
1697 
1698     /*
1699      *  find(), findNext, start, end, reset
1700      */
1701     {
1702         UChar    text1[50];
1703         UBool    result;
1704         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
1705         status = U_ZERO_ERROR;
1706         re = uregex_openC("rx", 0, NULL, &status);
1707 
1708         uregex_setText(re, text1, -1, &status);
1709         result = uregex_find(re, 0, &status);
1710         TEST_ASSERT(result == TRUE);
1711         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1712         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1713         TEST_ASSERT_SUCCESS(status);
1714 
1715         result = uregex_find(re, 9, &status);
1716         TEST_ASSERT(result == TRUE);
1717         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1718         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1719         TEST_ASSERT_SUCCESS(status);
1720 
1721         result = uregex_find(re, 14, &status);
1722         TEST_ASSERT(result == FALSE);
1723         TEST_ASSERT_SUCCESS(status);
1724 
1725         status = U_ZERO_ERROR;
1726         uregex_reset(re, 0, &status);
1727 
1728         result = uregex_findNext(re, &status);
1729         TEST_ASSERT(result == TRUE);
1730         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1731         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1732         TEST_ASSERT_SUCCESS(status);
1733 
1734         result = uregex_findNext(re, &status);
1735         TEST_ASSERT(result == TRUE);
1736         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1737         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1738         TEST_ASSERT_SUCCESS(status);
1739 
1740         status = U_ZERO_ERROR;
1741         uregex_reset(re, 12, &status);
1742 
1743         result = uregex_findNext(re, &status);
1744         TEST_ASSERT(result == TRUE);
1745         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1746         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1747         TEST_ASSERT_SUCCESS(status);
1748 
1749         result = uregex_findNext(re, &status);
1750         TEST_ASSERT(result == FALSE);
1751         TEST_ASSERT_SUCCESS(status);
1752 
1753         uregex_close(re);
1754     }
1755 
1756     /*
1757      *  groupUText()
1758      */
1759     {
1760         UChar    text1[80];
1761         UText   *actual;
1762         UBool    result;
1763         int64_t  groupLen = 0;
1764         UChar    groupBuf[20];
1765 
1766         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
1767 
1768         status = U_ZERO_ERROR;
1769         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1770         TEST_ASSERT_SUCCESS(status);
1771 
1772         uregex_setText(re, text1, -1, &status);
1773         result = uregex_find(re, 0, &status);
1774         TEST_ASSERT(result==TRUE);
1775 
1776         /*  Capture Group 0 with shallow clone API.  Should succeed.  */
1777         status = U_ZERO_ERROR;
1778         actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1779         TEST_ASSERT_SUCCESS(status);
1780 
1781         TEST_ASSERT(utext_getNativeIndex(actual) == 6);  /* index of "abc " within "noise abc ..." */
1782         TEST_ASSERT(groupLen == 16);   /* length of "abc interior def"  */
1783         utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1784 
1785         TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
1786         utext_close(actual);
1787 
1788         /*  Capture group #1.  Should succeed. */
1789         status = U_ZERO_ERROR;
1790 
1791         actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
1792         TEST_ASSERT_SUCCESS(status);
1793         TEST_ASSERT(9 == utext_getNativeIndex(actual));    /* index of " interior " within "noise abc interior def ... " */
1794                                                            /*    (within the string text1)           */
1795         TEST_ASSERT(10 == groupLen);                       /* length of " interior " */
1796         utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1797         TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
1798 
1799         utext_close(actual);
1800 
1801         /*  Capture group out of range.  Error. */
1802         status = U_ZERO_ERROR;
1803         actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
1804         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1805         utext_close(actual);
1806 
1807         uregex_close(re);
1808     }
1809 
1810     /*
1811      *  replaceFirst()
1812      */
1813     {
1814         UChar    text1[80];
1815         UChar    text2[80];
1816         UText    replText = UTEXT_INITIALIZER;
1817         UText   *result;
1818         const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1819         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1820         const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1821                0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1822         const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1823         const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1824         status = U_ZERO_ERROR;
1825         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1826         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1827         utext_openUTF8(&replText, str_1x, -1, &status);
1828 
1829         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1830         TEST_ASSERT_SUCCESS(status);
1831 
1832         /*  Normal case, with match */
1833         uregex_setText(re, text1, -1, &status);
1834         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1835         TEST_ASSERT_SUCCESS(status);
1836         TEST_ASSERT_UTEXT(str_Replxxx, result);
1837         utext_close(result);
1838 
1839         /* No match.  Text should copy to output with no changes.  */
1840         uregex_setText(re, text2, -1, &status);
1841         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1842         TEST_ASSERT_SUCCESS(status);
1843         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1844         utext_close(result);
1845 
1846         /* Unicode escapes */
1847         uregex_setText(re, text1, -1, &status);
1848         utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1849         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1850         TEST_ASSERT_SUCCESS(status);
1851         TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1852         utext_close(result);
1853 
1854         uregex_close(re);
1855         utext_close(&replText);
1856     }
1857 
1858 
1859     /*
1860      *  replaceAll()
1861      */
1862     {
1863         UChar    text1[80];
1864         UChar    text2[80];
1865         UText    replText = UTEXT_INITIALIZER;
1866         UText   *result;
1867         const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1868         const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1869         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1870         status = U_ZERO_ERROR;
1871         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1872         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1873         utext_openUTF8(&replText, str_1, -1, &status);
1874 
1875         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1876         TEST_ASSERT_SUCCESS(status);
1877 
1878         /*  Normal case, with match */
1879         uregex_setText(re, text1, -1, &status);
1880         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1881         TEST_ASSERT_SUCCESS(status);
1882         TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1883         utext_close(result);
1884 
1885         /* No match.  Text should copy to output with no changes.  */
1886         uregex_setText(re, text2, -1, &status);
1887         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1888         TEST_ASSERT_SUCCESS(status);
1889         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1890         utext_close(result);
1891 
1892         uregex_close(re);
1893         utext_close(&replText);
1894     }
1895 
1896 
1897     /*
1898      *  appendReplacement()
1899      */
1900     {
1901         UChar    text[100];
1902         UChar    repl[100];
1903         UChar    buf[100];
1904         UChar   *bufPtr;
1905         int32_t  bufCap;
1906 
1907         status = U_ZERO_ERROR;
1908         re = uregex_openC(".*", 0, 0, &status);
1909         TEST_ASSERT_SUCCESS(status);
1910 
1911         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1912         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1913         uregex_setText(re, text, -1, &status);
1914 
1915         /* match covers whole target string */
1916         uregex_find(re, 0, &status);
1917         TEST_ASSERT_SUCCESS(status);
1918         bufPtr = buf;
1919         bufCap = UPRV_LENGTHOF(buf);
1920         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1921         TEST_ASSERT_SUCCESS(status);
1922         TEST_ASSERT_STRING("some other", buf, TRUE);
1923 
1924         /* Match has \u \U escapes */
1925         uregex_find(re, 0, &status);
1926         TEST_ASSERT_SUCCESS(status);
1927         bufPtr = buf;
1928         bufCap = UPRV_LENGTHOF(buf);
1929         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1930         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1931         TEST_ASSERT_SUCCESS(status);
1932         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1933 
1934         uregex_close(re);
1935     }
1936 
1937 
1938     /*
1939      *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1940      */
1941 
1942     /*
1943      *  splitUText()
1944      */
1945     {
1946         UChar    textToSplit[80];
1947         UChar    text2[80];
1948         UText    *fields[10];
1949         int32_t  numFields;
1950         int32_t i;
1951 
1952         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1953         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1954 
1955         status = U_ZERO_ERROR;
1956         re = uregex_openC(":", 0, NULL, &status);
1957 
1958 
1959         /*  Simple split */
1960 
1961         uregex_setText(re, textToSplit, -1, &status);
1962         TEST_ASSERT_SUCCESS(status);
1963 
1964         /* The TEST_ASSERT_SUCCESS call above should change too... */
1965         if (U_SUCCESS(status)) {
1966             memset(fields, 0, sizeof(fields));
1967             numFields = uregex_splitUText(re, fields, 10, &status);
1968             TEST_ASSERT_SUCCESS(status);
1969 
1970             /* The TEST_ASSERT_SUCCESS call above should change too... */
1971             if(U_SUCCESS(status)) {
1972               const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1973               const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
1974               const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
1975                 TEST_ASSERT(numFields == 3);
1976                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
1977                 TEST_ASSERT_UTEXT(str_second, fields[1]);
1978                 TEST_ASSERT_UTEXT(str_third, fields[2]);
1979                 TEST_ASSERT(fields[3] == NULL);
1980             }
1981             for(i = 0; i < numFields; i++) {
1982                 utext_close(fields[i]);
1983             }
1984         }
1985 
1986         uregex_close(re);
1987 
1988 
1989         /*  Split with too few output strings available */
1990         status = U_ZERO_ERROR;
1991         re = uregex_openC(":", 0, NULL, &status);
1992         uregex_setText(re, textToSplit, -1, &status);
1993         TEST_ASSERT_SUCCESS(status);
1994 
1995         /* The TEST_ASSERT_SUCCESS call above should change too... */
1996         if(U_SUCCESS(status)) {
1997             fields[0] = NULL;
1998             fields[1] = NULL;
1999             fields[2] = &patternText;
2000             numFields = uregex_splitUText(re, fields, 2, &status);
2001             TEST_ASSERT_SUCCESS(status);
2002 
2003             /* The TEST_ASSERT_SUCCESS call above should change too... */
2004             if(U_SUCCESS(status)) {
2005                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2006                 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
2007                 TEST_ASSERT(numFields == 2);
2008                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2009                 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2010                 TEST_ASSERT(fields[2] == &patternText);
2011             }
2012             for(i = 0; i < numFields; i++) {
2013                 utext_close(fields[i]);
2014             }
2015         }
2016 
2017         uregex_close(re);
2018     }
2019 
2020     /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
2021      *                   comes out as additional fields.  */
2022     {
2023         UChar    textToSplit[80];
2024         UText    *fields[10];
2025         int32_t  numFields;
2026         int32_t i;
2027 
2028         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
2029 
2030         status = U_ZERO_ERROR;
2031         re = uregex_openC("<(.*?)>", 0, NULL, &status);
2032 
2033         uregex_setText(re, textToSplit, -1, &status);
2034         TEST_ASSERT_SUCCESS(status);
2035 
2036         /* The TEST_ASSERT_SUCCESS call above should change too... */
2037         if(U_SUCCESS(status)) {
2038             memset(fields, 0, sizeof(fields));
2039             numFields = uregex_splitUText(re, fields, 10, &status);
2040             TEST_ASSERT_SUCCESS(status);
2041 
2042             /* The TEST_ASSERT_SUCCESS call above should change too... */
2043             if(U_SUCCESS(status)) {
2044                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2045                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2046                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2047                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2048                 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2049 
2050                 TEST_ASSERT(numFields == 5);
2051                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2052                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2053                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2054                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2055                 TEST_ASSERT_UTEXT(str_third, fields[4]);
2056                 TEST_ASSERT(fields[5] == NULL);
2057             }
2058             for(i = 0; i < numFields; i++) {
2059                 utext_close(fields[i]);
2060             }
2061         }
2062 
2063         /*  Split with too few output strings available (2) */
2064         status = U_ZERO_ERROR;
2065         fields[0] = NULL;
2066         fields[1] = NULL;
2067         fields[2] = &patternText;
2068         numFields = uregex_splitUText(re, fields, 2, &status);
2069         TEST_ASSERT_SUCCESS(status);
2070 
2071         /* The TEST_ASSERT_SUCCESS call above should change too... */
2072         if(U_SUCCESS(status)) {
2073             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2074             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2075             TEST_ASSERT(numFields == 2);
2076             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2077             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2078             TEST_ASSERT(fields[2] == &patternText);
2079         }
2080         for(i = 0; i < numFields; i++) {
2081             utext_close(fields[i]);
2082         }
2083 
2084 
2085         /*  Split with too few output strings available (3) */
2086         status = U_ZERO_ERROR;
2087         fields[0] = NULL;
2088         fields[1] = NULL;
2089         fields[2] = NULL;
2090         fields[3] = &patternText;
2091         numFields = uregex_splitUText(re, fields, 3, &status);
2092         TEST_ASSERT_SUCCESS(status);
2093 
2094         /* The TEST_ASSERT_SUCCESS call above should change too... */
2095         if(U_SUCCESS(status)) {
2096             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2097             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2098             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2099             TEST_ASSERT(numFields == 3);
2100             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2101             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2102             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2103             TEST_ASSERT(fields[3] == &patternText);
2104         }
2105         for(i = 0; i < numFields; i++) {
2106             utext_close(fields[i]);
2107         }
2108 
2109         /*  Split with just enough output strings available (5) */
2110         status = U_ZERO_ERROR;
2111         fields[0] = NULL;
2112         fields[1] = NULL;
2113         fields[2] = NULL;
2114         fields[3] = NULL;
2115         fields[4] = NULL;
2116         fields[5] = &patternText;
2117         numFields = uregex_splitUText(re, fields, 5, &status);
2118         TEST_ASSERT_SUCCESS(status);
2119 
2120         /* The TEST_ASSERT_SUCCESS call above should change too... */
2121         if(U_SUCCESS(status)) {
2122             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2123             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2124             const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2125             const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2126             const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2127 
2128             TEST_ASSERT(numFields == 5);
2129             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2130             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2131             TEST_ASSERT_UTEXT(str_second, fields[2]);
2132             TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2133             TEST_ASSERT_UTEXT(str_third, fields[4]);
2134             TEST_ASSERT(fields[5] == &patternText);
2135         }
2136         for(i = 0; i < numFields; i++) {
2137             utext_close(fields[i]);
2138         }
2139 
2140         /* Split, end of text is a field delimiter.   */
2141         status = U_ZERO_ERROR;
2142         uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2143         TEST_ASSERT_SUCCESS(status);
2144 
2145         /* The TEST_ASSERT_SUCCESS call above should change too... */
2146         if(U_SUCCESS(status)) {
2147             memset(fields, 0, sizeof(fields));
2148             fields[9] = &patternText;
2149             numFields = uregex_splitUText(re, fields, 9, &status);
2150             TEST_ASSERT_SUCCESS(status);
2151 
2152             /* The TEST_ASSERT_SUCCESS call above should change too... */
2153             if(U_SUCCESS(status)) {
2154                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2155                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2156                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2157                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2158                 const char str_empty[] = { 0x00 };
2159 
2160                 TEST_ASSERT(numFields == 5);
2161                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2162                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2163                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2164                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2165                 TEST_ASSERT_UTEXT(str_empty,  fields[4]);
2166                 TEST_ASSERT(fields[5] == NULL);
2167                 TEST_ASSERT(fields[8] == NULL);
2168                 TEST_ASSERT(fields[9] == &patternText);
2169             }
2170             for(i = 0; i < numFields; i++) {
2171                 utext_close(fields[i]);
2172             }
2173         }
2174 
2175         uregex_close(re);
2176     }
2177     utext_close(&patternText);
2178 }
2179 
2180 
TestRefreshInput(void)2181 static void TestRefreshInput(void) {
2182     /*
2183      *  RefreshInput changes out the input of a URegularExpression without
2184      *    changing anything else in the match state.  Used with Java JNI,
2185      *    when Java moves the underlying string storage.   This test
2186      *    runs a find() loop, moving the text after the first match.
2187      *    The right number of matches should still be found.
2188      */
2189     UChar testStr[]  = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0};  /* = "A B C"  */
2190     UChar movedStr[] = {   0,    0,    0,    0,    0,   0};
2191     UErrorCode status = U_ZERO_ERROR;
2192     URegularExpression *re;
2193     UText ut1 = UTEXT_INITIALIZER;
2194     UText ut2 = UTEXT_INITIALIZER;
2195 
2196     re = uregex_openC("[ABC]", 0, 0, &status);
2197     TEST_ASSERT_SUCCESS(status);
2198 
2199     utext_openUChars(&ut1, testStr, -1, &status);
2200     TEST_ASSERT_SUCCESS(status);
2201     uregex_setUText(re, &ut1, &status);
2202     TEST_ASSERT_SUCCESS(status);
2203 
2204     /* Find the first match "A" in the original string */
2205     TEST_ASSERT(uregex_findNext(re, &status));
2206     TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2207 
2208     /* Move the string, kill the original string.  */
2209     u_strcpy(movedStr, testStr);
2210     u_memset(testStr, 0, u_strlen(testStr));
2211     utext_openUChars(&ut2, movedStr, -1, &status);
2212     TEST_ASSERT_SUCCESS(status);
2213     uregex_refreshUText(re, &ut2, &status);
2214     TEST_ASSERT_SUCCESS(status);
2215 
2216     /* Find the following two matches, now working in the moved string. */
2217     TEST_ASSERT(uregex_findNext(re, &status));
2218     TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2219     TEST_ASSERT(uregex_findNext(re, &status));
2220     TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2221     TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2222 
2223     uregex_close(re);
2224 }
2225 
2226 
TestBug8421(void)2227 static void TestBug8421(void) {
2228     /* Bug 8421:  setTimeLimit on a regular expresssion before setting text to be matched
2229      *             was failing.
2230      */
2231     URegularExpression *re;
2232     UErrorCode status = U_ZERO_ERROR;
2233     int32_t  limit = -1;
2234 
2235     re = uregex_openC("abc", 0, 0, &status);
2236     TEST_ASSERT_SUCCESS(status);
2237 
2238     limit = uregex_getTimeLimit(re, &status);
2239     TEST_ASSERT_SUCCESS(status);
2240     TEST_ASSERT(limit == 0);
2241 
2242     uregex_setTimeLimit(re, 100, &status);
2243     TEST_ASSERT_SUCCESS(status);
2244     limit = uregex_getTimeLimit(re, &status);
2245     TEST_ASSERT_SUCCESS(status);
2246     TEST_ASSERT(limit == 100);
2247 
2248     uregex_close(re);
2249 }
2250 
FindCallback(const void * context,int64_t matchIndex)2251 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2252     return FALSE;
2253 }
2254 
MatchCallback(const void * context,int32_t steps)2255 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2256     return FALSE;
2257 }
2258 
TestBug10815()2259 static void TestBug10815() {
2260   /* Bug 10815:   uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2261    *              when the callback function specified by uregex_setMatchCallback() returns FALSE
2262    */
2263     URegularExpression *re;
2264     UErrorCode status = U_ZERO_ERROR;
2265     UChar    text[100];
2266 
2267 
2268     // findNext() with a find progress callback function.
2269 
2270     re = uregex_openC(".z", 0, 0, &status);
2271     TEST_ASSERT_SUCCESS(status);
2272 
2273     u_uastrncpy(text, "Hello, World.",  UPRV_LENGTHOF(text));
2274     uregex_setText(re, text, -1, &status);
2275     TEST_ASSERT_SUCCESS(status);
2276 
2277     uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2278     TEST_ASSERT_SUCCESS(status);
2279 
2280     uregex_findNext(re, &status);
2281     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2282 
2283     uregex_close(re);
2284 
2285     // findNext() with a match progress callback function.
2286 
2287     status = U_ZERO_ERROR;
2288     re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2289     TEST_ASSERT_SUCCESS(status);
2290 
2291     // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2292     // it will appear to be stuck in a (near) infinite loop.
2293     u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",  UPRV_LENGTHOF(text));
2294     uregex_setText(re, text, -1, &status);
2295     TEST_ASSERT_SUCCESS(status);
2296 
2297     uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2298     TEST_ASSERT_SUCCESS(status);
2299 
2300     uregex_findNext(re, &status);
2301     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2302 
2303     uregex_close(re);
2304 }
2305 
2306 
2307 #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */
2308