1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /********************************************************************************
9 *
10 * File CNORMTST.C
11 *
12 * Modification History:
13 *        Name                     Description
14 *     Madhu Katragadda            Ported for C API
15 *     synwee                      added test for quick check
16 *     synwee                      added test for checkFCD
17 *********************************************************************************/
18 /*tests for u_normalization*/
19 #include "unicode/utypes.h"
20 #include "unicode/unorm.h"
21 #include "unicode/utf16.h"
22 #include "cintltst.h"
23 #include "cmemory.h"
24 
25 #if !UCONFIG_NO_NORMALIZATION
26 
27 #include <stdlib.h>
28 #include <time.h>
29 #include "unicode/uchar.h"
30 #include "unicode/ustring.h"
31 #include "unicode/unorm.h"
32 #include "cnormtst.h"
33 
34 static void
35 TestAPI(void);
36 
37 static void
38 TestNormCoverage(void);
39 
40 static void
41 TestConcatenate(void);
42 
43 static void
44 TestNextPrevious(void);
45 
46 static void TestIsNormalized(void);
47 
48 static void
49 TestFCNFKCClosure(void);
50 
51 static void
52 TestQuickCheckPerCP(void);
53 
54 static void
55 TestComposition(void);
56 
57 static void
58 TestFCD(void);
59 
60 static void
61 TestGetDecomposition(void);
62 
63 static void
64 TestGetRawDecomposition(void);
65 
66 static void TestAppendRestoreMiddle(void);
67 static void TestGetEasyToUseInstance(void);
68 
69 static const char* const canonTests[][3] = {
70     /* Input*/                    /*Decomposed*/                /*Composed*/
71     { "cat",                    "cat",                        "cat"                    },
72     { "\\u00e0ardvark",            "a\\u0300ardvark",            "\\u00e0ardvark",        },
73 
74     { "\\u1e0a",                "D\\u0307",                    "\\u1e0a"                }, /* D-dot_above*/
75     { "D\\u0307",                "D\\u0307",                    "\\u1e0a"                }, /* D dot_above*/
76 
77     { "\\u1e0c\\u0307",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_below dot_above*/
78     { "\\u1e0a\\u0323",            "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D-dot_above dot_below */
79     { "D\\u0307\\u0323",        "D\\u0323\\u0307",            "\\u1e0c\\u0307"        }, /* D dot_below dot_above */
80 
81     { "\\u1e10\\u0307\\u0323",    "D\\u0327\\u0323\\u0307",    "\\u1e10\\u0323\\u0307"    }, /*D dot_below cedilla dot_above*/
82     { "D\\u0307\\u0328\\u0323",    "D\\u0328\\u0323\\u0307",    "\\u1e0c\\u0328\\u0307"    }, /* D dot_above ogonek dot_below*/
83 
84     { "\\u1E14",                "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron-grave*/
85     { "\\u0112\\u0300",            "E\\u0304\\u0300",            "\\u1E14"                }, /* E-macron + grave*/
86     { "\\u00c8\\u0304",            "E\\u0300\\u0304",            "\\u00c8\\u0304"        }, /* E-grave + macron*/
87 
88     { "\\u212b",                "A\\u030a",                    "\\u00c5"                }, /* angstrom_sign*/
89     { "\\u00c5",                "A\\u030a",                    "\\u00c5"                }, /* A-ring*/
90 
91     { "\\u00C4ffin",            "A\\u0308ffin",                "\\u00C4ffin"                    },
92     { "\\u00C4\\uFB03n",        "A\\u0308\\uFB03n",            "\\u00C4\\uFB03n"                },
93 
94     { "Henry IV",                "Henry IV",                    "Henry IV"                },
95     { "Henry \\u2163",            "Henry \\u2163",            "Henry \\u2163"            },
96 
97     { "\\u30AC",                "\\u30AB\\u3099",            "\\u30AC"                }, /* ga (Katakana)*/
98     { "\\u30AB\\u3099",            "\\u30AB\\u3099",            "\\u30AC"                }, /*ka + ten*/
99     { "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E",            "\\uFF76\\uFF9E"        }, /* hw_ka + hw_ten*/
100     { "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E",            "\\u30AB\\uFF9E"        }, /* ka + hw_ten*/
101     { "\\uFF76\\u3099",            "\\uFF76\\u3099",            "\\uFF76\\u3099"        },  /* hw_ka + ten*/
102     { "A\\u0300\\u0316",           "A\\u0316\\u0300",           "\\u00C0\\u0316"        },  /* hw_ka + ten*/
103     { "", "", "" }
104 };
105 
106 static const char* const compatTests[][3] = {
107     /* Input*/                        /*Decomposed    */                /*Composed*/
108     { "cat",                        "cat",                            "cat"                },
109 
110     { "\\uFB4f",                    "\\u05D0\\u05DC",                "\\u05D0\\u05DC"    }, /* Alef-Lamed vs. Alef, Lamed*/
111 
112     { "\\u00C4ffin",                "A\\u0308ffin",                    "\\u00C4ffin"             },
113     { "\\u00C4\\uFB03n",            "A\\u0308ffin",                    "\\u00C4ffin"                }, /* ffi ligature -> f + f + i*/
114 
115     { "Henry IV",                    "Henry IV",                        "Henry IV"            },
116     { "Henry \\u2163",                "Henry IV",                        "Henry IV"            },
117 
118     { "\\u30AC",                    "\\u30AB\\u3099",                "\\u30AC"            }, /* ga (Katakana)*/
119     { "\\u30AB\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /*ka + ten*/
120 
121     { "\\uFF76\\u3099",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + ten*/
122 
123     /*These two are broken in Unicode 2.1.2 but fixed in 2.1.5 and later*/
124     { "\\uFF76\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* hw_ka + hw_ten*/
125     { "\\u30AB\\uFF9E",                "\\u30AB\\u3099",                "\\u30AC"            }, /* ka + hw_ten*/
126     { "", "", "" }
127 };
128 
129 static const char* const fcdTests[][3] = {
130     /* Added for testing the below-U+0300 prefix of a NUL-terminated string. */
131     { "\\u010e\\u0327", "D\\u0327\\u030c", NULL },  /* D-caron + cedilla */
132     { "\\u010e", "\\u010e", NULL }  /* D-caron */
133 };
134 
135 void addNormTest(TestNode** root);
136 
addNormTest(TestNode ** root)137 void addNormTest(TestNode** root)
138 {
139     addTest(root, &TestAPI, "tsnorm/cnormtst/TestAPI");
140     addTest(root, &TestDecomp, "tsnorm/cnormtst/TestDecomp");
141     addTest(root, &TestCompatDecomp, "tsnorm/cnormtst/TestCompatDecomp");
142     addTest(root, &TestCanonDecompCompose, "tsnorm/cnormtst/TestCanonDecompCompose");
143     addTest(root, &TestCompatDecompCompose, "tsnorm/cnormtst/TestCompatDecompCompose");
144     addTest(root, &TestFCD, "tsnorm/cnormtst/TestFCD");
145     addTest(root, &TestNull, "tsnorm/cnormtst/TestNull");
146     addTest(root, &TestQuickCheck, "tsnorm/cnormtst/TestQuickCheck");
147     addTest(root, &TestQuickCheckPerCP, "tsnorm/cnormtst/TestQuickCheckPerCP");
148     addTest(root, &TestIsNormalized, "tsnorm/cnormtst/TestIsNormalized");
149     addTest(root, &TestCheckFCD, "tsnorm/cnormtst/TestCheckFCD");
150     addTest(root, &TestNormCoverage, "tsnorm/cnormtst/TestNormCoverage");
151     addTest(root, &TestConcatenate, "tsnorm/cnormtst/TestConcatenate");
152     addTest(root, &TestNextPrevious, "tsnorm/cnormtst/TestNextPrevious");
153     addTest(root, &TestFCNFKCClosure, "tsnorm/cnormtst/TestFCNFKCClosure");
154     addTest(root, &TestComposition, "tsnorm/cnormtst/TestComposition");
155     addTest(root, &TestGetDecomposition, "tsnorm/cnormtst/TestGetDecomposition");
156     addTest(root, &TestGetRawDecomposition, "tsnorm/cnormtst/TestGetRawDecomposition");
157     addTest(root, &TestAppendRestoreMiddle, "tsnorm/cnormtst/TestAppendRestoreMiddle");
158     addTest(root, &TestGetEasyToUseInstance, "tsnorm/cnormtst/TestGetEasyToUseInstance");
159 }
160 
161 static const char* const modeStrings[]={
162     "?",
163     "UNORM_NONE",
164     "UNORM_NFD",
165     "UNORM_NFKD",
166     "UNORM_NFC",
167     "UNORM_NFKC",
168     "UNORM_FCD",
169     "UNORM_MODE_COUNT"
170 };
171 
TestNormCases(UNormalizationMode mode,const char * const cases[][3],int32_t lengthOfCases)172 static void TestNormCases(UNormalizationMode mode,
173                           const char* const cases[][3], int32_t lengthOfCases) {
174     int32_t x, neededLen, length2;
175     int32_t expIndex= (mode==UNORM_NFC || mode==UNORM_NFKC) ? 2 : 1;
176     UChar *source=NULL;
177     UChar result[16];
178     log_verbose("Testing unorm_normalize(%s)\n", modeStrings[mode]);
179     for(x=0; x < lengthOfCases; x++)
180     {
181         UErrorCode status = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
182         source=CharsToUChars(cases[x][0]);
183         neededLen= unorm_normalize(source, u_strlen(source), mode, 0, NULL, 0, &status);
184         length2= unorm_normalize(source, -1, mode, 0, NULL, 0, &status2);
185         if(neededLen!=length2) {
186           log_err("ERROR in unorm_normalize(%s)[%d]: "
187                   "preflight length/srcLength %d!=%d preflight length/NUL\n",
188                   modeStrings[mode], (int)x, (int)neededLen, (int)length2);
189         }
190         if(status==U_BUFFER_OVERFLOW_ERROR)
191         {
192             status=U_ZERO_ERROR;
193         }
194         length2=unorm_normalize(source, u_strlen(source), mode, 0, result, UPRV_LENGTHOF(result), &status);
195         if(U_FAILURE(status) || neededLen!=length2) {
196             log_data_err("ERROR in unorm_normalize(%s/srcLength) at %s:  %s - (Are you missing data?)\n",
197                          modeStrings[mode], austrdup(source), myErrorName(status));
198         } else {
199             assertEqual(result, cases[x][expIndex], x);
200         }
201         length2=unorm_normalize(source, -1, mode, 0, result, UPRV_LENGTHOF(result), &status);
202         if(U_FAILURE(status) || neededLen!=length2) {
203             log_data_err("ERROR in unorm_normalize(%s/NUL) at %s:  %s - (Are you missing data?)\n",
204                          modeStrings[mode], austrdup(source), myErrorName(status));
205         } else {
206             assertEqual(result, cases[x][expIndex], x);
207         }
208         free(source);
209     }
210 }
211 
TestDecomp()212 void TestDecomp() {
213     TestNormCases(UNORM_NFD, canonTests, UPRV_LENGTHOF(canonTests));
214 }
215 
TestCompatDecomp()216 void TestCompatDecomp() {
217     TestNormCases(UNORM_NFKD, compatTests, UPRV_LENGTHOF(compatTests));
218 }
219 
TestCanonDecompCompose()220 void TestCanonDecompCompose() {
221     TestNormCases(UNORM_NFC, canonTests, UPRV_LENGTHOF(canonTests));
222 }
223 
TestCompatDecompCompose()224 void TestCompatDecompCompose() {
225     TestNormCases(UNORM_NFKC, compatTests, UPRV_LENGTHOF(compatTests));
226 }
227 
TestFCD()228 void TestFCD() {
229     TestNormCases(UNORM_FCD, fcdTests, UPRV_LENGTHOF(fcdTests));
230 }
231 
assertEqual(const UChar * result,const char * expected,int32_t index)232 static void assertEqual(const UChar* result, const char* expected, int32_t index)
233 {
234     UChar *expectedUni = CharsToUChars(expected);
235     if(u_strcmp(result, expectedUni)!=0){
236         log_err("ERROR in decomposition at index = %d. EXPECTED: %s , GOT: %s\n", index, expected,
237             austrdup(result) );
238     }
239     free(expectedUni);
240 }
241 
TestNull_check(UChar * src,int32_t srcLen,UChar * exp,int32_t expLen,UNormalizationMode mode,const char * name)242 static void TestNull_check(UChar *src, int32_t srcLen,
243                     UChar *exp, int32_t expLen,
244                     UNormalizationMode mode,
245                     const char *name)
246 {
247     UErrorCode status = U_ZERO_ERROR;
248     int32_t len, i;
249 
250     UChar   result[50];
251 
252 
253     status = U_ZERO_ERROR;
254 
255     for(i=0;i<50;i++)
256       {
257         result[i] = 0xFFFD;
258       }
259 
260     len = unorm_normalize(src, srcLen, mode, 0, result, 50, &status);
261 
262     if(U_FAILURE(status)) {
263       log_data_err("unorm_normalize(%s) with 0x0000 failed: %s - (Are you missing data?)\n", name, u_errorName(status));
264     } else if (len != expLen) {
265       log_err("unorm_normalize(%s) with 0x0000 failed: Expected len %d, got %d\n", name, expLen, len);
266     }
267 
268     {
269       for(i=0;i<len;i++){
270         if(exp[i] != result[i]) {
271           log_err("unorm_normalize(%s): @%d, expected \\u%04X got \\u%04X\n",
272                   name,
273                   i,
274                   exp[i],
275                   result[i]);
276           return;
277         }
278         log_verbose("     %d: \\u%04X\n", i, result[i]);
279       }
280     }
281 
282     log_verbose("unorm_normalize(%s) with 0x0000: OK\n", name);
283 }
284 
TestNull()285 void TestNull()
286 {
287 
288     UChar   source_comp[] = { 0x0061, 0x0000, 0x0044, 0x0307 };
289     int32_t source_comp_len = 4;
290     UChar   expect_comp[] = { 0x0061, 0x0000, 0x1e0a };
291     int32_t expect_comp_len = 3;
292 
293     UChar   source_dcmp[] = { 0x1e0A, 0x0000, 0x0929 };
294     int32_t source_dcmp_len = 3;
295     UChar   expect_dcmp[] = { 0x0044, 0x0307, 0x0000, 0x0928, 0x093C };
296     int32_t expect_dcmp_len = 5;
297 
298     TestNull_check(source_comp,
299                    source_comp_len,
300                    expect_comp,
301                    expect_comp_len,
302                    UNORM_NFC,
303                    "UNORM_NFC");
304 
305     TestNull_check(source_dcmp,
306                    source_dcmp_len,
307                    expect_dcmp,
308                    expect_dcmp_len,
309                    UNORM_NFD,
310                    "UNORM_NFD");
311 
312     TestNull_check(source_comp,
313                    source_comp_len,
314                    expect_comp,
315                    expect_comp_len,
316                    UNORM_NFKC,
317                    "UNORM_NFKC");
318 
319 
320 }
321 
TestQuickCheckResultNO()322 static void TestQuickCheckResultNO()
323 {
324   const UChar CPNFD[] = {0x00C5, 0x0407, 0x1E00, 0x1F57, 0x220C,
325                          0x30AE, 0xAC00, 0xD7A3, 0xFB36, 0xFB4E};
326   const UChar CPNFC[] = {0x0340, 0x0F93, 0x1F77, 0x1FBB, 0x1FEB,
327                           0x2000, 0x232A, 0xF900, 0xFA1E, 0xFB4E};
328   const UChar CPNFKD[] = {0x00A0, 0x02E4, 0x1FDB, 0x24EA, 0x32FE,
329                            0xAC00, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
330   const UChar CPNFKC[] = {0x00A0, 0x017F, 0x2000, 0x24EA, 0x32FE,
331                            0x33FE, 0xFB4E, 0xFA10, 0xFF3F, 0xFA2D};
332 
333 
334   const int SIZE = 10;
335 
336   int count = 0;
337   UErrorCode error = U_ZERO_ERROR;
338 
339   for (; count < SIZE; count ++)
340   {
341     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
342                                                               UNORM_NO)
343     {
344       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
345       return;
346     }
347     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
348                                                               UNORM_NO)
349     {
350       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
351       return;
352     }
353     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
354                                                               UNORM_NO)
355     {
356       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
357       return;
358     }
359     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
360                                                               UNORM_NO)
361     {
362       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
363       return;
364     }
365   }
366 }
367 
368 
TestQuickCheckResultYES()369 static void TestQuickCheckResultYES()
370 {
371   const UChar CPNFD[] = {0x00C6, 0x017F, 0x0F74, 0x1000, 0x1E9A,
372                          0x2261, 0x3075, 0x4000, 0x5000, 0xF000};
373   const UChar CPNFC[] = {0x0400, 0x0540, 0x0901, 0x1000, 0x1500,
374                          0x1E9A, 0x3000, 0x4000, 0x5000, 0xF000};
375   const UChar CPNFKD[] = {0x00AB, 0x02A0, 0x1000, 0x1027, 0x2FFB,
376                           0x3FFF, 0x4FFF, 0xA000, 0xF000, 0xFA27};
377   const UChar CPNFKC[] = {0x00B0, 0x0100, 0x0200, 0x0A02, 0x1000,
378                           0x2010, 0x3030, 0x4000, 0xA000, 0xFA0E};
379 
380   const int SIZE = 10;
381   int count = 0;
382   UErrorCode error = U_ZERO_ERROR;
383 
384   UChar cp = 0;
385   while (cp < 0xA0)
386   {
387     if (unorm_quickCheck(&cp, 1, UNORM_NFD, &error) != UNORM_YES)
388     {
389       log_data_err("ERROR in NFD quick check at U+%04x - (Are you missing data?)\n", cp);
390       return;
391     }
392     if (unorm_quickCheck(&cp, 1, UNORM_NFC, &error) !=
393                                                              UNORM_YES)
394     {
395       log_err("ERROR in NFC quick check at U+%04x\n", cp);
396       return;
397     }
398     if (unorm_quickCheck(&cp, 1, UNORM_NFKD, &error) != UNORM_YES)
399     {
400       log_data_err("ERROR in NFKD quick check at U+%04x\n", cp);
401       return;
402     }
403     if (unorm_quickCheck(&cp, 1, UNORM_NFKC, &error) !=
404                                                              UNORM_YES)
405     {
406       log_err("ERROR in NFKC quick check at U+%04x\n", cp);
407       return;
408     }
409     cp ++;
410   }
411 
412   for (; count < SIZE; count ++)
413   {
414     if (unorm_quickCheck(&(CPNFD[count]), 1, UNORM_NFD, &error) !=
415                                                              UNORM_YES)
416     {
417       log_err("ERROR in NFD quick check at U+%04x\n", CPNFD[count]);
418       return;
419     }
420     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error)
421                                                           != UNORM_YES)
422     {
423       log_err("ERROR in NFC quick check at U+%04x\n", CPNFC[count]);
424       return;
425     }
426     if (unorm_quickCheck(&(CPNFKD[count]), 1, UNORM_NFKD, &error) !=
427                                                              UNORM_YES)
428     {
429       log_err("ERROR in NFKD quick check at U+%04x\n", CPNFKD[count]);
430       return;
431     }
432     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
433                                                              UNORM_YES)
434     {
435       log_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
436       return;
437     }
438   }
439 }
440 
TestQuickCheckResultMAYBE()441 static void TestQuickCheckResultMAYBE()
442 {
443   const UChar CPNFC[] = {0x0306, 0x0654, 0x0BBE, 0x102E, 0x1161,
444                          0x116A, 0x1173, 0x1175, 0x3099, 0x309A};
445   const UChar CPNFKC[] = {0x0300, 0x0654, 0x0655, 0x09D7, 0x0B3E,
446                           0x0DCF, 0xDDF, 0x102E, 0x11A8, 0x3099};
447 
448 
449   const int SIZE = 10;
450 
451   int count = 0;
452   UErrorCode error = U_ZERO_ERROR;
453 
454   /* NFD and NFKD does not have any MAYBE codepoints */
455   for (; count < SIZE; count ++)
456   {
457     if (unorm_quickCheck(&(CPNFC[count]), 1, UNORM_NFC, &error) !=
458                                                            UNORM_MAYBE)
459     {
460       log_data_err("ERROR in NFC quick check at U+%04x - (Are you missing data?)\n", CPNFC[count]);
461       return;
462     }
463     if (unorm_quickCheck(&(CPNFKC[count]), 1, UNORM_NFKC, &error) !=
464                                                            UNORM_MAYBE)
465     {
466       log_data_err("ERROR in NFKC quick check at U+%04x\n", CPNFKC[count]);
467       return;
468     }
469   }
470 }
471 
TestQuickCheckStringResult()472 static void TestQuickCheckStringResult()
473 {
474   int count;
475   UChar *d = NULL;
476   UChar *c = NULL;
477   UErrorCode error = U_ZERO_ERROR;
478 
479   for (count = 0; count < UPRV_LENGTHOF(canonTests); count ++)
480   {
481     d = CharsToUChars(canonTests[count][1]);
482     c = CharsToUChars(canonTests[count][2]);
483     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFD, &error) !=
484                                                             UNORM_YES)
485     {
486       log_data_err("ERROR in NFD quick check for string at count %d - (Are you missing data?)\n", count);
487       return;
488     }
489 
490     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFC, &error) ==
491                                                             UNORM_NO)
492     {
493       log_err("ERROR in NFC quick check for string at count %d\n", count);
494       return;
495     }
496 
497     free(d);
498     free(c);
499   }
500 
501   for (count = 0; count < UPRV_LENGTHOF(compatTests); count ++)
502   {
503     d = CharsToUChars(compatTests[count][1]);
504     c = CharsToUChars(compatTests[count][2]);
505     if (unorm_quickCheck(d, u_strlen(d), UNORM_NFKD, &error) !=
506                                                             UNORM_YES)
507     {
508       log_data_err("ERROR in NFKD quick check for string at count %d\n", count);
509       return;
510     }
511 
512     if (unorm_quickCheck(c, u_strlen(c), UNORM_NFKC, &error) !=
513                                                             UNORM_YES)
514     {
515       log_err("ERROR in NFKC quick check for string at count %d\n", count);
516       return;
517     }
518 
519     free(d);
520     free(c);
521   }
522 }
523 
TestQuickCheck()524 void TestQuickCheck()
525 {
526   TestQuickCheckResultNO();
527   TestQuickCheckResultYES();
528   TestQuickCheckResultMAYBE();
529   TestQuickCheckStringResult();
530 }
531 
532 /*
533  * The intltest/NormalizerConformanceTest tests a lot of strings that _are_
534  * normalized, and some that are not.
535  * Here we pick some specific cases and test the C API.
536  */
TestIsNormalized(void)537 static void TestIsNormalized(void) {
538     static const UChar notNFC[][8]={            /* strings that are not in NFC */
539         { 0x62, 0x61, 0x300, 0x63, 0 },         /* 0061 0300 compose */
540         { 0xfb1d, 0 },                          /* excluded from composition */
541         { 0x0627, 0x0653, 0 },                  /* 0627 0653 compose */
542         { 0x3071, 0x306f, 0x309a, 0x3073, 0 }   /* 306F 309A compose */
543     };
544     static const UChar notNFKC[][8]={           /* strings that are not in NFKC */
545         { 0x1100, 0x1161, 0 },                  /* Jamo compose */
546         { 0x1100, 0x314f, 0 },                  /* compatibility Jamo compose */
547         { 0x03b1, 0x1f00, 0x0345, 0x03b3, 0 }   /* 1F00 0345 compose */
548     };
549 
550     int32_t i;
551     UErrorCode errorCode;
552 
553     /* API test */
554 
555     /* normal case with length>=0 (length -1 used for special cases below) */
556     errorCode=U_ZERO_ERROR;
557     if(!unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
558         log_data_err("error: !isNormalized(<U+0300>, NFC) (%s) - (Are you missing data?)\n", u_errorName(errorCode));
559     }
560 
561     /* incoming U_FAILURE */
562     errorCode=U_TRUNCATED_CHAR_FOUND;
563     (void)unorm_isNormalized(notNFC[0]+2, 1, UNORM_NFC, &errorCode);
564     if(errorCode!=U_TRUNCATED_CHAR_FOUND) {
565         log_err("error: isNormalized(U_TRUNCATED_CHAR_FOUND) changed the error code to %s\n", u_errorName(errorCode));
566     }
567 
568     /* NULL source */
569     errorCode=U_ZERO_ERROR;
570     (void)unorm_isNormalized(NULL, 1, UNORM_NFC, &errorCode);
571     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
572         log_data_err("error: isNormalized(NULL) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
573     }
574 
575     /* bad length */
576     errorCode=U_ZERO_ERROR;
577     (void)unorm_isNormalized(notNFC[0]+2, -2, UNORM_NFC, &errorCode);
578     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
579         log_data_err("error: isNormalized([-2]) did not set U_ILLEGAL_ARGUMENT_ERROR but %s - (Are you missing data?)\n", u_errorName(errorCode));
580     }
581 
582     /* specific cases */
583     for(i=0; i<UPRV_LENGTHOF(notNFC); ++i) {
584         errorCode=U_ZERO_ERROR;
585         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
586             log_data_err("error: isNormalized(notNFC[%d], NFC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
587         }
588         errorCode=U_ZERO_ERROR;
589         if(unorm_isNormalized(notNFC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
590             log_data_err("error: isNormalized(notNFC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
591         }
592     }
593     for(i=0; i<UPRV_LENGTHOF(notNFKC); ++i) {
594         errorCode=U_ZERO_ERROR;
595         if(unorm_isNormalized(notNFKC[i], -1, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
596             log_data_err("error: isNormalized(notNFKC[%d], NFKC) is wrong (%s) - (Are you missing data?)\n", i, u_errorName(errorCode));
597         }
598     }
599 }
600 
TestCheckFCD()601 void TestCheckFCD()
602 {
603   UErrorCode status = U_ZERO_ERROR;
604   static const UChar FAST_[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
605                          0x0A};
606   static const UChar FALSE_[] = {0x0001, 0x0002, 0x02EA, 0x03EB, 0x0300, 0x0301,
607                           0x02B9, 0x0314, 0x0315, 0x0316};
608   static const UChar TRUE_[] = {0x0030, 0x0040, 0x0440, 0x056D, 0x064F, 0x06E7,
609                          0x0050, 0x0730, 0x09EE, 0x1E10};
610 
611   static const UChar datastr[][5] =
612   { {0x0061, 0x030A, 0x1E05, 0x0302, 0},
613     {0x0061, 0x030A, 0x00E2, 0x0323, 0},
614     {0x0061, 0x0323, 0x00E2, 0x0323, 0},
615     {0x0061, 0x0323, 0x1E05, 0x0302, 0} };
616   static const UBool result[] = {UNORM_YES, UNORM_NO, UNORM_NO, UNORM_YES};
617 
618   static const UChar datachar[] = {0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
619                             0x6a,
620                             0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
621                             0xea,
622                             0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306,
623                             0x0307, 0x0308, 0x0309, 0x030a,
624                             0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326,
625                             0x0327, 0x0328, 0x0329, 0x032a,
626                             0x1e00, 0x1e01, 0x1e02, 0x1e03, 0x1e04, 0x1e05, 0x1e06,
627                             0x1e07, 0x1e08, 0x1e09, 0x1e0a};
628 
629   int count = 0;
630 
631   if (unorm_quickCheck(FAST_, 10, UNORM_FCD, &status) != UNORM_YES)
632     log_data_err("unorm_quickCheck(FCD) failed: expected value for fast unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
633   if (unorm_quickCheck(FALSE_, 10, UNORM_FCD, &status) != UNORM_NO)
634     log_err("unorm_quickCheck(FCD) failed: expected value for error unorm_quickCheck is UNORM_NO\n");
635   if (unorm_quickCheck(TRUE_, 10, UNORM_FCD, &status) != UNORM_YES)
636     log_data_err("unorm_quickCheck(FCD) failed: expected value for correct unorm_quickCheck is UNORM_YES - (Are you missing data?)\n");
637 
638   if (U_FAILURE(status))
639     log_data_err("unorm_quickCheck(FCD) failed: %s - (Are you missing data?)\n", u_errorName(status));
640 
641   while (count < 4)
642   {
643     UBool fcdresult = unorm_quickCheck(datastr[count], 4, UNORM_FCD, &status);
644     if (U_FAILURE(status)) {
645       log_data_err("unorm_quickCheck(FCD) failed: exception occured at data set %d - (Are you missing data?)\n", count);
646       break;
647     }
648     else {
649       if (result[count] != fcdresult) {
650         log_err("unorm_quickCheck(FCD) failed: Data set %d expected value %d\n", count,
651                  result[count]);
652       }
653     }
654     count ++;
655   }
656 
657   /* random checks of long strings */
658   status = U_ZERO_ERROR;
659   srand((unsigned)time( NULL ));
660 
661   for (count = 0; count < 50; count ++)
662   {
663     int size = 0;
664     UBool testresult = UNORM_YES;
665     UChar data[20];
666     UChar norm[100];
667     UChar nfd[100];
668     int normsize = 0;
669     int nfdsize = 0;
670 
671     while (size != 19) {
672       data[size] = datachar[rand() % UPRV_LENGTHOF(datachar)];
673       log_verbose("0x%x", data[size]);
674       normsize += unorm_normalize(data + size, 1, UNORM_NFD, 0,
675                                   norm + normsize, 100 - normsize, &status);
676       if (U_FAILURE(status)) {
677         log_data_err("unorm_quickCheck(FCD) failed: exception occured at data generation - (Are you missing data?)\n");
678         break;
679       }
680       size ++;
681     }
682     log_verbose("\n");
683 
684     nfdsize = unorm_normalize(data, size, UNORM_NFD, 0,
685                               nfd, 100, &status);
686     if (U_FAILURE(status)) {
687       log_data_err("unorm_quickCheck(FCD) failed: exception occured at normalized data generation - (Are you missing data?)\n");
688     }
689 
690     if (nfdsize != normsize || u_memcmp(nfd, norm, nfdsize) != 0) {
691       testresult = UNORM_NO;
692     }
693     if (testresult == UNORM_YES) {
694       log_verbose("result UNORM_YES\n");
695     }
696     else {
697       log_verbose("result UNORM_NO\n");
698     }
699 
700     if (unorm_quickCheck(data, size, UNORM_FCD, &status) != testresult || U_FAILURE(status)) {
701       log_data_err("unorm_quickCheck(FCD) failed: expected %d for random data - (Are you missing data?)\n", testresult);
702     }
703   }
704 }
705 
706 static void
TestAPI()707 TestAPI() {
708     static const UChar in[]={ 0x68, 0xe4 };
709     UChar out[20]={ 0xffff, 0xffff, 0xffff, 0xffff };
710     UErrorCode errorCode;
711     int32_t length;
712 
713     /* try preflighting */
714     errorCode=U_ZERO_ERROR;
715     length=unorm_normalize(in, 2, UNORM_NFD, 0, NULL, 0, &errorCode);
716     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
717         log_data_err("unorm_normalize(pure preflighting NFD)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
718         return;
719     }
720 
721     errorCode=U_ZERO_ERROR;
722     length=unorm_normalize(in, 2, UNORM_NFD, 0, out, 3, &errorCode);
723     if(U_FAILURE(errorCode)) {
724         log_err("unorm_normalize(NFD)=%ld failed with %s\n", length, u_errorName(errorCode));
725         return;
726     }
727     if(length!=3 || out[2]!=0x308 || out[3]!=0xffff) {
728         log_err("unorm_normalize(NFD ma<umlaut>)=%ld failed with out[]=U+%04x U+%04x U+%04x U+%04x\n", length, out[0], out[1], out[2], out[3]);
729         return;
730     }
731     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, NULL, 0, &errorCode);
732     if(U_FAILURE(errorCode)) {
733         log_err("unorm_normalize(src NULL[0], NFC, dest NULL[0])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
734         return;
735     }
736     length=unorm_normalize(NULL, 0, UNORM_NFC, 0, out, 20, &errorCode);
737     if(U_FAILURE(errorCode)) {
738         log_err("unorm_normalize(src NULL[0], NFC, dest out[20])=%ld failed with %s\n", (long)length, u_errorName(errorCode));
739         return;
740     }
741 }
742 
743 /* test cases to improve test code coverage */
744 enum {
745     HANGUL_K_KIYEOK=0x3131,         /* NFKD->Jamo L U+1100 */
746     HANGUL_K_WEO=0x315d,            /* NFKD->Jamo V U+116f */
747     HANGUL_K_KIYEOK_SIOS=0x3133,    /* NFKD->Jamo T U+11aa */
748 
749     HANGUL_KIYEOK=0x1100,           /* Jamo L U+1100 */
750     HANGUL_WEO=0x116f,              /* Jamo V U+116f */
751     HANGUL_KIYEOK_SIOS=0x11aa,      /* Jamo T U+11aa */
752 
753     HANGUL_AC00=0xac00,             /* Hangul syllable = Jamo LV U+ac00 */
754     HANGUL_SYLLABLE=0xac00+14*28+3, /* Hangul syllable = U+1100 * U+116f * U+11aa */
755 
756     MUSICAL_VOID_NOTEHEAD=0x1d157,
757     MUSICAL_HALF_NOTE=0x1d15e,  /* NFC/NFD->Notehead+Stem */
758     MUSICAL_STEM=0x1d165,       /* cc=216 */
759     MUSICAL_STACCATO=0x1d17c    /* cc=220 */
760 };
761 
762 static void
TestNormCoverage()763 TestNormCoverage() {
764     UChar input[1000], expect[1000], output[1000];
765     UErrorCode errorCode;
766     int32_t i, length, inLength, expectLength, hangulPrefixLength, preflightLength;
767 
768     /* create a long and nasty string with NFKC-unsafe characters */
769     inLength=0;
770 
771     /* 3 Jamos L/V/T, all 8 combinations normal/compatibility */
772     input[inLength++]=HANGUL_KIYEOK;
773     input[inLength++]=HANGUL_WEO;
774     input[inLength++]=HANGUL_KIYEOK_SIOS;
775 
776     input[inLength++]=HANGUL_KIYEOK;
777     input[inLength++]=HANGUL_WEO;
778     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
779 
780     input[inLength++]=HANGUL_KIYEOK;
781     input[inLength++]=HANGUL_K_WEO;
782     input[inLength++]=HANGUL_KIYEOK_SIOS;
783 
784     input[inLength++]=HANGUL_KIYEOK;
785     input[inLength++]=HANGUL_K_WEO;
786     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
787 
788     input[inLength++]=HANGUL_K_KIYEOK;
789     input[inLength++]=HANGUL_WEO;
790     input[inLength++]=HANGUL_KIYEOK_SIOS;
791 
792     input[inLength++]=HANGUL_K_KIYEOK;
793     input[inLength++]=HANGUL_WEO;
794     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
795 
796     input[inLength++]=HANGUL_K_KIYEOK;
797     input[inLength++]=HANGUL_K_WEO;
798     input[inLength++]=HANGUL_KIYEOK_SIOS;
799 
800     input[inLength++]=HANGUL_K_KIYEOK;
801     input[inLength++]=HANGUL_K_WEO;
802     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
803 
804     /* Hangul LV with normal/compatibility Jamo T */
805     input[inLength++]=HANGUL_AC00;
806     input[inLength++]=HANGUL_KIYEOK_SIOS;
807 
808     input[inLength++]=HANGUL_AC00;
809     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
810 
811     /* compatibility Jamo L, V */
812     input[inLength++]=HANGUL_K_KIYEOK;
813     input[inLength++]=HANGUL_K_WEO;
814 
815     hangulPrefixLength=inLength;
816 
817     input[inLength++]=U16_LEAD(MUSICAL_HALF_NOTE);
818     input[inLength++]=U16_TRAIL(MUSICAL_HALF_NOTE);
819     for(i=0; i<200; ++i) {
820         input[inLength++]=U16_LEAD(MUSICAL_STACCATO);
821         input[inLength++]=U16_TRAIL(MUSICAL_STACCATO);
822         input[inLength++]=U16_LEAD(MUSICAL_STEM);
823         input[inLength++]=U16_TRAIL(MUSICAL_STEM);
824     }
825 
826     /* (compatibility) Jamo L, T do not compose */
827     input[inLength++]=HANGUL_K_KIYEOK;
828     input[inLength++]=HANGUL_K_KIYEOK_SIOS;
829 
830     /* quick checks */
831     errorCode=U_ZERO_ERROR;
832     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFD, &errorCode) || U_FAILURE(errorCode)) {
833         log_data_err("error unorm_quickCheck(long input, UNORM_NFD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
834     }
835     errorCode=U_ZERO_ERROR;
836     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKD, &errorCode) || U_FAILURE(errorCode)) {
837         log_data_err("error unorm_quickCheck(long input, UNORM_NFKD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
838     }
839     errorCode=U_ZERO_ERROR;
840     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFC, &errorCode) || U_FAILURE(errorCode)) {
841         log_data_err("error unorm_quickCheck(long input, UNORM_NFC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
842     }
843     errorCode=U_ZERO_ERROR;
844     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_NFKC, &errorCode) || U_FAILURE(errorCode)) {
845         log_data_err("error unorm_quickCheck(long input, UNORM_NFKC)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
846     }
847     errorCode=U_ZERO_ERROR;
848     if(UNORM_NO!=unorm_quickCheck(input, inLength, UNORM_FCD, &errorCode) || U_FAILURE(errorCode)) {
849         log_data_err("error unorm_quickCheck(long input, UNORM_FCD)!=NO (%s) - (Are you missing data?)\n", u_errorName(errorCode));
850     }
851 
852     /* NFKC */
853     expectLength=0;
854     expect[expectLength++]=HANGUL_SYLLABLE;
855 
856     expect[expectLength++]=HANGUL_SYLLABLE;
857 
858     expect[expectLength++]=HANGUL_SYLLABLE;
859 
860     expect[expectLength++]=HANGUL_SYLLABLE;
861 
862     expect[expectLength++]=HANGUL_SYLLABLE;
863 
864     expect[expectLength++]=HANGUL_SYLLABLE;
865 
866     expect[expectLength++]=HANGUL_SYLLABLE;
867 
868     expect[expectLength++]=HANGUL_SYLLABLE;
869 
870     expect[expectLength++]=HANGUL_AC00+3;
871 
872     expect[expectLength++]=HANGUL_AC00+3;
873 
874     expect[expectLength++]=HANGUL_AC00+14*28;
875 
876     expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
877     expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
878     expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
879     expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
880     for(i=0; i<200; ++i) {
881         expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
882         expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
883     }
884     for(i=0; i<200; ++i) {
885         expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
886         expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
887     }
888 
889     expect[expectLength++]=HANGUL_KIYEOK;
890     expect[expectLength++]=HANGUL_KIYEOK_SIOS;
891 
892     /* try destination overflow first */
893     errorCode=U_ZERO_ERROR;
894     preflightLength=unorm_normalize(input, inLength,
895                            UNORM_NFKC, 0,
896                            output, 100, /* too short */
897                            &errorCode);
898     if(errorCode!=U_BUFFER_OVERFLOW_ERROR) {
899         log_data_err("error unorm_normalize(long input, output too short, UNORM_NFKC) did not overflow but %s - (Are you missing data?)\n", u_errorName(errorCode));
900     }
901 
902     /* real NFKC */
903     errorCode=U_ZERO_ERROR;
904     length=unorm_normalize(input, inLength,
905                            UNORM_NFKC, 0,
906                            output, UPRV_LENGTHOF(output),
907                            &errorCode);
908     if(U_FAILURE(errorCode)) {
909         log_data_err("error unorm_normalize(long input, UNORM_NFKC) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
910     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
911         log_err("error unorm_normalize(long input, UNORM_NFKC) produced wrong result\n");
912         for(i=0; i<length; ++i) {
913             if(output[i]!=expect[i]) {
914                 log_err("    NFKC[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
915                 break;
916             }
917         }
918     }
919     if(length!=preflightLength) {
920         log_err("error unorm_normalize(long input, UNORM_NFKC)==%ld but preflightLength==%ld\n", length, preflightLength);
921     }
922 
923     /* FCD */
924     u_memcpy(expect, input, hangulPrefixLength);
925     expectLength=hangulPrefixLength;
926 
927     expect[expectLength++]=U16_LEAD(MUSICAL_VOID_NOTEHEAD);
928     expect[expectLength++]=U16_TRAIL(MUSICAL_VOID_NOTEHEAD);
929     expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
930     expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
931     for(i=0; i<200; ++i) {
932         expect[expectLength++]=U16_LEAD(MUSICAL_STEM);
933         expect[expectLength++]=U16_TRAIL(MUSICAL_STEM);
934     }
935     for(i=0; i<200; ++i) {
936         expect[expectLength++]=U16_LEAD(MUSICAL_STACCATO);
937         expect[expectLength++]=U16_TRAIL(MUSICAL_STACCATO);
938     }
939 
940     expect[expectLength++]=HANGUL_K_KIYEOK;
941     expect[expectLength++]=HANGUL_K_KIYEOK_SIOS;
942 
943     errorCode=U_ZERO_ERROR;
944     length=unorm_normalize(input, inLength,
945                            UNORM_FCD, 0,
946                            output, UPRV_LENGTHOF(output),
947                            &errorCode);
948     if(U_FAILURE(errorCode)) {
949         log_data_err("error unorm_normalize(long input, UNORM_FCD) failed with %s - (Are you missing data?)\n", u_errorName(errorCode));
950     } else if(length!=expectLength || u_memcmp(output, expect, length)!=0) {
951         log_err("error unorm_normalize(long input, UNORM_FCD) produced wrong result\n");
952         for(i=0; i<length; ++i) {
953             if(output[i]!=expect[i]) {
954                 log_err("    FCD[%d]==U+%04lx expected U+%04lx\n", i, output[i], expect[i]);
955                 break;
956             }
957         }
958     }
959 }
960 
961 /* API test for unorm_concatenate() - for real test strings see intltest/tstnorm.cpp */
962 static void
TestConcatenate(void)963 TestConcatenate(void) {
964     /* "re + 'sume'" */
965     static const UChar
966     left[]={
967         0x72, 0x65, 0
968     },
969     right[]={
970         0x301, 0x73, 0x75, 0x6d, 0xe9, 0
971     },
972     expect[]={
973         0x72, 0xe9, 0x73, 0x75, 0x6d, 0xe9, 0
974     };
975 
976     UChar buffer[100];
977     UErrorCode errorCode;
978     int32_t length;
979 
980     /* left with length, right NUL-terminated */
981     errorCode=U_ZERO_ERROR;
982     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
983     if(U_FAILURE(errorCode) || length!=6 || 0!=u_memcmp(buffer, expect, length)) {
984         log_data_err("error: unorm_concatenate()=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
985     }
986 
987     /* preflighting */
988     errorCode=U_ZERO_ERROR;
989     length=unorm_concatenate(left, 2, right, -1, NULL, 0, UNORM_NFC, 0, &errorCode);
990     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6) {
991         log_data_err("error: unorm_concatenate(preflighting)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
992     }
993 
994     buffer[2]=0x5555;
995     errorCode=U_ZERO_ERROR;
996     length=unorm_concatenate(left, 2, right, -1, buffer, 1, UNORM_NFC, 0, &errorCode);
997     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=6 || buffer[2]!=0x5555) {
998         log_data_err("error: unorm_concatenate(preflighting 2)=%ld (expect 6) failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
999     }
1000 
1001     /* enter with U_FAILURE */
1002     buffer[2]=0xaaaa;
1003     errorCode=U_UNEXPECTED_TOKEN;
1004     length=unorm_concatenate(left, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1005     if(errorCode!=U_UNEXPECTED_TOKEN || buffer[2]!=0xaaaa) {
1006         log_err("error: unorm_concatenate(failure)=%ld failed with %s\n", length, u_errorName(errorCode));
1007     }
1008 
1009     /* illegal arguments */
1010     buffer[2]=0xaaaa;
1011     errorCode=U_ZERO_ERROR;
1012     length=unorm_concatenate(NULL, 2, right, -1, buffer, 100, UNORM_NFC, 0, &errorCode);
1013     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || buffer[2]!=0xaaaa) {
1014         log_data_err("error: unorm_concatenate(left=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1015     }
1016 
1017     errorCode=U_ZERO_ERROR;
1018     length=unorm_concatenate(left, 2, right, -1, NULL, 100, UNORM_NFC, 0, &errorCode);
1019     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1020         log_data_err("error: unorm_concatenate(buffer=NULL)=%ld failed with %s - (Are you missing data?)\n", length, u_errorName(errorCode));
1021     }
1022 }
1023 
1024 enum {
1025     _PLUS=0x2b
1026 };
1027 
1028 static const char *const _modeString[UNORM_MODE_COUNT]={
1029     "0", "NONE", "NFD", "NFKD", "NFC", "NFKC", "FCD"
1030 };
1031 
1032 static void
_testIter(const UChar * src,int32_t srcLength,UCharIterator * iter,UNormalizationMode mode,UBool forward,const UChar * out,int32_t outLength,const int32_t * srcIndexes,int32_t srcIndexesLength)1033 _testIter(const UChar *src, int32_t srcLength,
1034           UCharIterator *iter, UNormalizationMode mode, UBool forward,
1035           const UChar *out, int32_t outLength,
1036           const int32_t *srcIndexes, int32_t srcIndexesLength) {
1037     UChar buffer[4];
1038     const UChar *expect, *outLimit, *in;
1039     int32_t length, i, expectLength, expectIndex, prevIndex, index, inLength;
1040     UErrorCode errorCode;
1041     UBool neededToNormalize, expectNeeded;
1042 
1043     errorCode=U_ZERO_ERROR;
1044     outLimit=out+outLength;
1045     if(forward) {
1046         expect=out;
1047         i=index=0;
1048     } else {
1049         expect=outLimit;
1050         i=srcIndexesLength-2;
1051         index=srcLength;
1052     }
1053 
1054     for(;;) {
1055         prevIndex=index;
1056         if(forward) {
1057             if(!iter->hasNext(iter)) {
1058                 return;
1059             }
1060             length=unorm_next(iter,
1061                               buffer, UPRV_LENGTHOF(buffer),
1062                               mode, 0,
1063                               (UBool)(out!=NULL), &neededToNormalize,
1064                               &errorCode);
1065             expectIndex=srcIndexes[i+1];
1066             in=src+prevIndex;
1067             inLength=expectIndex-prevIndex;
1068 
1069             if(out!=NULL) {
1070                 /* get output piece from between plus signs */
1071                 expectLength=0;
1072                 while((expect+expectLength)!=outLimit && expect[expectLength]!=_PLUS) {
1073                     ++expectLength;
1074                 }
1075                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1076             } else {
1077                 expect=in;
1078                 expectLength=inLength;
1079                 expectNeeded=FALSE;
1080             }
1081         } else {
1082             if(!iter->hasPrevious(iter)) {
1083                 return;
1084             }
1085             length=unorm_previous(iter,
1086                                   buffer, UPRV_LENGTHOF(buffer),
1087                                   mode, 0,
1088                                   (UBool)(out!=NULL), &neededToNormalize,
1089                                   &errorCode);
1090             expectIndex=srcIndexes[i];
1091             in=src+expectIndex;
1092             inLength=prevIndex-expectIndex;
1093 
1094             if(out!=NULL) {
1095                 /* get output piece from between plus signs */
1096                 expectLength=0;
1097                 while(expect!=out && expect[-1]!=_PLUS) {
1098                     ++expectLength;
1099                     --expect;
1100                 }
1101                 expectNeeded=(UBool)(0!=u_memcmp(buffer, in, inLength));
1102             } else {
1103                 expect=in;
1104                 expectLength=inLength;
1105                 expectNeeded=FALSE;
1106             }
1107         }
1108         index=iter->getIndex(iter, UITER_CURRENT);
1109 
1110         if(U_FAILURE(errorCode)) {
1111             log_data_err("error unorm iteration (next/previous %d %s)[%d]: %s - (Are you missing data?)\n",
1112                     forward, _modeString[mode], i, u_errorName(errorCode));
1113             return;
1114         }
1115         if(expectIndex!=index) {
1116             log_err("error unorm iteration (next/previous %d %s): index[%d] wrong, got %d expected %d\n",
1117                     forward, _modeString[mode], i, index, expectIndex);
1118             return;
1119         }
1120         if(expectLength!=length) {
1121             log_err("error unorm iteration (next/previous %d %s): length[%d] wrong, got %d expected %d\n",
1122                     forward, _modeString[mode], i, length, expectLength);
1123             return;
1124         }
1125         if(0!=u_memcmp(expect, buffer, length)) {
1126             log_err("error unorm iteration (next/previous %d %s): output string[%d] wrong\n",
1127                     forward, _modeString[mode], i);
1128             return;
1129         }
1130         if(neededToNormalize!=expectNeeded) {
1131         }
1132 
1133         if(forward) {
1134             expect+=expectLength+1; /* go after the + */
1135             ++i;
1136         } else {
1137             --expect; /* go before the + */
1138             --i;
1139         }
1140     }
1141 }
1142 
1143 static void
TestNextPrevious()1144 TestNextPrevious() {
1145     static const UChar
1146     src[]={ /* input string */
1147         0xa0, 0xe4, 0x63, 0x302, 0x327, 0xac00, 0x3133
1148     },
1149     nfd[]={ /* + separates expected output pieces */
1150         0xa0, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x3133
1151     },
1152     nfkd[]={
1153         0x20, _PLUS, 0x61, 0x308, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0x1100, 0x1161, _PLUS, 0x11aa
1154     },
1155     nfc[]={
1156         0xa0, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1157     },
1158     nfkc[]={
1159         0x20, _PLUS, 0xe4, _PLUS, 0xe7, 0x302, _PLUS, 0xac03
1160     },
1161     fcd[]={
1162         0xa0, _PLUS, 0xe4, _PLUS, 0x63, 0x327, 0x302, _PLUS, 0xac00, _PLUS, 0x3133
1163     };
1164 
1165     /* expected iterator indexes in the source string for each iteration piece */
1166     static const int32_t
1167     nfdIndexes[]={
1168         0, 1, 2, 5, 6, 7
1169     },
1170     nfkdIndexes[]={
1171         0, 1, 2, 5, 6, 7
1172     },
1173     nfcIndexes[]={
1174         0, 1, 2, 5, 6, 7
1175     },
1176     nfkcIndexes[]={
1177         0, 1, 2, 5, 7
1178     },
1179     fcdIndexes[]={
1180         0, 1, 2, 5, 6, 7
1181     };
1182 
1183     UCharIterator iter;
1184 
1185     UChar buffer[4];
1186     int32_t length;
1187 
1188     UBool neededToNormalize;
1189     UErrorCode errorCode;
1190 
1191     uiter_setString(&iter, src, UPRV_LENGTHOF(src));
1192 
1193     /* test iteration with doNormalize */
1194     iter.index=0;
1195     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1196     iter.index=0;
1197     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1198     iter.index=0;
1199     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1200     iter.index=0;
1201     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1202     iter.index=0;
1203     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1204 
1205     iter.index=iter.length;
1206     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, nfd, UPRV_LENGTHOF(nfd), nfdIndexes, sizeof(nfdIndexes)/4);
1207     iter.index=iter.length;
1208     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, nfkd, UPRV_LENGTHOF(nfkd), nfkdIndexes, sizeof(nfkdIndexes)/4);
1209     iter.index=iter.length;
1210     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, nfc, UPRV_LENGTHOF(nfc), nfcIndexes, sizeof(nfcIndexes)/4);
1211     iter.index=iter.length;
1212     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, nfkc, UPRV_LENGTHOF(nfkc), nfkcIndexes, sizeof(nfkcIndexes)/4);
1213     iter.index=iter.length;
1214     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, fcd, UPRV_LENGTHOF(fcd), fcdIndexes, sizeof(fcdIndexes)/4);
1215 
1216     /* test iteration without doNormalize */
1217     iter.index=0;
1218     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, TRUE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1219     iter.index=0;
1220     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, TRUE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1221     iter.index=0;
1222     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, TRUE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1223     iter.index=0;
1224     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, TRUE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1225     iter.index=0;
1226     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, TRUE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1227 
1228     iter.index=iter.length;
1229     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFD, FALSE, NULL, 0, nfdIndexes, sizeof(nfdIndexes)/4);
1230     iter.index=iter.length;
1231     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKD, FALSE, NULL, 0, nfkdIndexes, sizeof(nfkdIndexes)/4);
1232     iter.index=iter.length;
1233     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFC, FALSE, NULL, 0, nfcIndexes, sizeof(nfcIndexes)/4);
1234     iter.index=iter.length;
1235     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_NFKC, FALSE, NULL, 0, nfkcIndexes, sizeof(nfkcIndexes)/4);
1236     iter.index=iter.length;
1237     _testIter(src, UPRV_LENGTHOF(src), &iter, UNORM_FCD, FALSE, NULL, 0, fcdIndexes, sizeof(fcdIndexes)/4);
1238 
1239     /* try without neededToNormalize */
1240     errorCode=U_ZERO_ERROR;
1241     buffer[0]=5;
1242     iter.index=1;
1243     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1244                       UNORM_NFD, 0, TRUE, NULL,
1245                       &errorCode);
1246     if(U_FAILURE(errorCode) || length!=2 || buffer[0]!=nfd[2] || buffer[1]!=nfd[3]) {
1247         log_data_err("error unorm_next(without needed) %s - (Are you missing data?)\n", u_errorName(errorCode));
1248         return;
1249     }
1250 
1251     /* preflight */
1252     neededToNormalize=9;
1253     iter.index=1;
1254     length=unorm_next(&iter, NULL, 0,
1255                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1256                       &errorCode);
1257     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2) {
1258         log_err("error unorm_next(pure preflighting) %s\n", u_errorName(errorCode));
1259         return;
1260     }
1261 
1262     errorCode=U_ZERO_ERROR;
1263     buffer[0]=buffer[1]=5;
1264     neededToNormalize=9;
1265     iter.index=1;
1266     length=unorm_next(&iter, buffer, 1,
1267                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1268                       &errorCode);
1269     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || neededToNormalize!=FALSE || length!=2 || buffer[1]!=5) {
1270         log_err("error unorm_next(preflighting) %s\n", u_errorName(errorCode));
1271         return;
1272     }
1273 
1274     /* no iterator */
1275     errorCode=U_ZERO_ERROR;
1276     buffer[0]=buffer[1]=5;
1277     neededToNormalize=9;
1278     iter.index=1;
1279     length=unorm_next(NULL, buffer, UPRV_LENGTHOF(buffer),
1280                       UNORM_NFD, 0, TRUE, &neededToNormalize,
1281                       &errorCode);
1282     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1283         log_err("error unorm_next(no iterator) %s\n", u_errorName(errorCode));
1284         return;
1285     }
1286 
1287     /* illegal mode */
1288     buffer[0]=buffer[1]=5;
1289     neededToNormalize=9;
1290     iter.index=1;
1291     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1292                       (UNormalizationMode)0, 0, TRUE, &neededToNormalize,
1293                       &errorCode);
1294     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1295         log_err("error unorm_next(illegal mode) %s\n", u_errorName(errorCode));
1296         return;
1297     }
1298 
1299     /* error coming in */
1300     errorCode=U_MISPLACED_QUANTIFIER;
1301     buffer[0]=5;
1302     iter.index=1;
1303     length=unorm_next(&iter, buffer, UPRV_LENGTHOF(buffer),
1304                       UNORM_NFD, 0, TRUE, NULL,
1305                       &errorCode);
1306     if(errorCode!=U_MISPLACED_QUANTIFIER) {
1307         log_err("error unorm_next(U_MISPLACED_QUANTIFIER) %s\n", u_errorName(errorCode));
1308         return;
1309     }
1310 }
1311 
1312 static void
TestFCNFKCClosure(void)1313 TestFCNFKCClosure(void) {
1314     static const struct {
1315         UChar32 c;
1316         const UChar s[6];
1317     } tests[]={
1318         { 0x00C4, { 0 } },
1319         { 0x00E4, { 0 } },
1320         { 0x037A, { 0x0020, 0x03B9, 0 } },
1321         { 0x03D2, { 0x03C5, 0 } },
1322         { 0x20A8, { 0x0072, 0x0073, 0 } },
1323         { 0x210B, { 0x0068, 0 } },
1324         { 0x210C, { 0x0068, 0 } },
1325         { 0x2121, { 0x0074, 0x0065, 0x006C, 0 } },
1326         { 0x2122, { 0x0074, 0x006D, 0 } },
1327         { 0x2128, { 0x007A, 0 } },
1328         { 0x1D5DB, { 0x0068, 0 } },
1329         { 0x1D5ED, { 0x007A, 0 } },
1330         { 0x0061, { 0 } }
1331     };
1332 
1333     UChar buffer[8];
1334     UErrorCode errorCode;
1335     int32_t i, length;
1336 
1337     for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
1338         errorCode=U_ZERO_ERROR;
1339         length=u_getFC_NFKC_Closure(tests[i].c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1340         if(U_FAILURE(errorCode) || length!=u_strlen(buffer) || 0!=u_strcmp(tests[i].s, buffer)) {
1341             log_data_err("u_getFC_NFKC_Closure(U+%04lx) is wrong (%s) - (Are you missing data?)\n", tests[i].c, u_errorName(errorCode));
1342         }
1343     }
1344 
1345     /* error handling */
1346     errorCode=U_ZERO_ERROR;
1347     length=u_getFC_NFKC_Closure(0x5c, NULL, UPRV_LENGTHOF(buffer), &errorCode);
1348     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1349         log_err("u_getFC_NFKC_Closure(dest=NULL) is wrong (%s)\n", u_errorName(errorCode));
1350     }
1351 
1352     length=u_getFC_NFKC_Closure(0x5c, buffer, UPRV_LENGTHOF(buffer), &errorCode);
1353     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1354         log_err("u_getFC_NFKC_Closure(U_FAILURE) is wrong (%s)\n", u_errorName(errorCode));
1355     }
1356 }
1357 
1358 static void
TestQuickCheckPerCP()1359 TestQuickCheckPerCP() {
1360     UErrorCode errorCode;
1361     UChar32 c, lead, trail;
1362     UChar s[U16_MAX_LENGTH], nfd[16];
1363     int32_t length, lccc1, lccc2, tccc1, tccc2;
1364     int32_t qc1, qc2;
1365 
1366     if(
1367         u_getIntPropertyMaxValue(UCHAR_NFD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1368         u_getIntPropertyMaxValue(UCHAR_NFKD_QUICK_CHECK)!=(int32_t)UNORM_YES ||
1369         u_getIntPropertyMaxValue(UCHAR_NFC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1370         u_getIntPropertyMaxValue(UCHAR_NFKC_QUICK_CHECK)!=(int32_t)UNORM_MAYBE ||
1371         u_getIntPropertyMaxValue(UCHAR_LEAD_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS) ||
1372         u_getIntPropertyMaxValue(UCHAR_TRAIL_CANONICAL_COMBINING_CLASS)!=u_getIntPropertyMaxValue(UCHAR_CANONICAL_COMBINING_CLASS)
1373     ) {
1374         log_err("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS\n");
1375     }
1376 
1377     /*
1378      * compare the quick check property values for some code points
1379      * to the quick check results for checking same-code point strings
1380      */
1381     errorCode=U_ZERO_ERROR;
1382     c=0;
1383     while(c<0x110000) {
1384         length=0;
1385         U16_APPEND_UNSAFE(s, length, c);
1386 
1387         qc1=u_getIntPropertyValue(c, UCHAR_NFC_QUICK_CHECK);
1388         qc2=unorm_quickCheck(s, length, UNORM_NFC, &errorCode);
1389         if(qc1!=qc2) {
1390             log_data_err("u_getIntPropertyValue(NFC)=%d != %d=unorm_quickCheck(NFC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1391         }
1392 
1393         qc1=u_getIntPropertyValue(c, UCHAR_NFD_QUICK_CHECK);
1394         qc2=unorm_quickCheck(s, length, UNORM_NFD, &errorCode);
1395         if(qc1!=qc2) {
1396             log_data_err("u_getIntPropertyValue(NFD)=%d != %d=unorm_quickCheck(NFD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1397         }
1398 
1399         qc1=u_getIntPropertyValue(c, UCHAR_NFKC_QUICK_CHECK);
1400         qc2=unorm_quickCheck(s, length, UNORM_NFKC, &errorCode);
1401         if(qc1!=qc2) {
1402             log_data_err("u_getIntPropertyValue(NFKC)=%d != %d=unorm_quickCheck(NFKC) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1403         }
1404 
1405         qc1=u_getIntPropertyValue(c, UCHAR_NFKD_QUICK_CHECK);
1406         qc2=unorm_quickCheck(s, length, UNORM_NFKD, &errorCode);
1407         if(qc1!=qc2) {
1408             log_data_err("u_getIntPropertyValue(NFKD)=%d != %d=unorm_quickCheck(NFKD) for U+%04x - (Are you missing data?)\n", qc1, qc2, c);
1409         }
1410 
1411         length=unorm_normalize(s, length, UNORM_NFD, 0, nfd, UPRV_LENGTHOF(nfd), &errorCode);
1412         if (U_FAILURE(errorCode)) {
1413             log_data_err("%s:%d errorCode=%s\n", __FILE__, __LINE__, u_errorName(errorCode));
1414             break;
1415         }
1416         /* length-length == 0 is used to get around a compiler warning. */
1417         U16_GET(nfd, 0, length-length, length, lead);
1418         U16_GET(nfd, 0, length-1, length, trail);
1419 
1420         lccc1=u_getIntPropertyValue(c, UCHAR_LEAD_CANONICAL_COMBINING_CLASS);
1421         lccc2=u_getCombiningClass(lead);
1422         tccc1=u_getIntPropertyValue(c, UCHAR_TRAIL_CANONICAL_COMBINING_CLASS);
1423         tccc2=u_getCombiningClass(trail);
1424 
1425         if(lccc1!=lccc2) {
1426             log_data_err("u_getIntPropertyValue(lccc)=%d != %d=u_getCombiningClass(lead) for U+%04x\n",
1427                     lccc1, lccc2, c);
1428         }
1429         if(tccc1!=tccc2) {
1430             log_data_err("u_getIntPropertyValue(tccc)=%d != %d=u_getCombiningClass(trail) for U+%04x\n",
1431                     tccc1, tccc2, c);
1432         }
1433 
1434         /* skip some code points */
1435         c=(20*c)/19+1;
1436     }
1437 }
1438 
1439 static void
TestComposition(void)1440 TestComposition(void) {
1441     static const struct {
1442         UNormalizationMode mode;
1443         uint32_t options;
1444         UChar input[12];
1445         UChar expect[12];
1446     } cases[]={
1447         /*
1448          * special cases for UAX #15 bug
1449          * see Unicode Corrigendum #5: Normalization Idempotency
1450          * at http://unicode.org/versions/corrigendum5.html
1451          * (was Public Review Issue #29)
1452          */
1453         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327 },         { 0x1100, 0x0300, 0x1161, 0x0327 } },
1454         { UNORM_NFC, 0, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 }, { 0x1100, 0x0300, 0x1161, 0x0327, 0x11a8 } },
1455         { UNORM_NFC, 0, { 0xac00, 0x0300, 0x0327, 0x11a8 },         { 0xac00, 0x0327, 0x0300, 0x11a8 } },
1456         { UNORM_NFC, 0, { 0x0b47, 0x0300, 0x0b3e },                 { 0x0b47, 0x0300, 0x0b3e } },
1457 
1458         /* TODO: add test cases for UNORM_FCC here (j2151) */
1459     };
1460 
1461     UChar output[16];
1462     UErrorCode errorCode;
1463     int32_t i, length;
1464 
1465     for(i=0; i<UPRV_LENGTHOF(cases); ++i) {
1466         errorCode=U_ZERO_ERROR;
1467         length=unorm_normalize(
1468                     cases[i].input, -1,
1469                     cases[i].mode, cases[i].options,
1470                     output, UPRV_LENGTHOF(output),
1471                     &errorCode);
1472         if( U_FAILURE(errorCode) ||
1473             length!=u_strlen(cases[i].expect) ||
1474             0!=u_memcmp(output, cases[i].expect, length)
1475         ) {
1476             log_data_err("unexpected result for case %d - (Are you missing data?)\n", i);
1477         }
1478     }
1479 }
1480 
1481 static void
TestGetDecomposition()1482 TestGetDecomposition() {
1483     UChar decomp[32];
1484     int32_t length;
1485 
1486     UErrorCode errorCode=U_ZERO_ERROR;
1487     const UNormalizer2 *n2=unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE_CONTIGUOUS, &errorCode);
1488     if(U_FAILURE(errorCode)) {
1489         log_err_status(errorCode, "unorm2_getInstance(nfc/FCC) failed: %s\n", u_errorName(errorCode));
1490         return;
1491     }
1492 
1493     length=unorm2_getDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1494     if(U_FAILURE(errorCode) || length>=0) {
1495         log_err("unorm2_getDecomposition(fcc, space) failed\n");
1496     }
1497     errorCode=U_ZERO_ERROR;
1498     length=unorm2_getDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1499     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1500         log_err("unorm2_getDecomposition(fcc, a-umlaut) failed\n");
1501     }
1502     errorCode=U_ZERO_ERROR;
1503     length=unorm2_getDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1504     if(U_FAILURE(errorCode) || length!=3 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0x11a8 || decomp[3]!=0) {
1505         log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) failed\n");
1506     }
1507     errorCode=U_ZERO_ERROR;
1508     length=unorm2_getDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1509     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
1510         log_err("unorm2_getDecomposition(fcc, Hangul syllable U+AC01) overflow failed\n");
1511     }
1512     errorCode=U_ZERO_ERROR;
1513     length=unorm2_getDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1514     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1515         log_err("unorm2_getDecomposition(fcc, capacity<0) failed\n");
1516     }
1517     errorCode=U_ZERO_ERROR;
1518     length=unorm2_getDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1519     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1520         log_err("unorm2_getDecomposition(fcc, decomposition=NULL) failed\n");
1521     }
1522 }
1523 
1524 static void
TestGetRawDecomposition()1525 TestGetRawDecomposition() {
1526     UChar decomp[32];
1527     int32_t length;
1528 
1529     UErrorCode errorCode=U_ZERO_ERROR;
1530     const UNormalizer2 *n2=unorm2_getNFKCInstance(&errorCode);
1531     if(U_FAILURE(errorCode)) {
1532         log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1533         return;
1534     }
1535     /*
1536      * Raw decompositions from NFKC data are the Unicode Decomposition_Mapping values,
1537      * without recursive decomposition.
1538      */
1539 
1540     length=unorm2_getRawDecomposition(n2, 0x20, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1541     if(U_FAILURE(errorCode) || length>=0) {
1542         log_err("unorm2_getDecomposition(nfkc, space) failed\n");
1543     }
1544     errorCode=U_ZERO_ERROR;
1545     length=unorm2_getRawDecomposition(n2, 0xe4, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1546     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x61 || decomp[1]!=0x308 || decomp[2]!=0) {
1547         log_err("unorm2_getDecomposition(nfkc, a-umlaut) failed\n");
1548     }
1549     /* U+1E08 LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
1550     errorCode=U_ZERO_ERROR;
1551     length=unorm2_getRawDecomposition(n2, 0x1e08, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1552     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xc7 || decomp[1]!=0x301 || decomp[2]!=0) {
1553         log_err("unorm2_getDecomposition(nfkc, c-cedilla-acute) failed\n");
1554     }
1555     /* U+212B ANGSTROM SIGN */
1556     errorCode=U_ZERO_ERROR;
1557     length=unorm2_getRawDecomposition(n2, 0x212b, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1558     if(U_FAILURE(errorCode) || length!=1 || decomp[0]!=0xc5 || decomp[1]!=0) {
1559         log_err("unorm2_getDecomposition(nfkc, angstrom sign) failed\n");
1560     }
1561     errorCode=U_ZERO_ERROR;
1562     length=unorm2_getRawDecomposition(n2, 0xac00, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1563     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0x1100 || decomp[1]!=0x1161 || decomp[2]!=0) {
1564         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC00) failed\n");
1565     }
1566     /* A Hangul LVT syllable has a raw decomposition of an LV syllable + T. */
1567     errorCode=U_ZERO_ERROR;
1568     length=unorm2_getRawDecomposition(n2, 0xac01, decomp, UPRV_LENGTHOF(decomp), &errorCode);
1569     if(U_FAILURE(errorCode) || length!=2 || decomp[0]!=0xac00 || decomp[1]!=0x11a8 || decomp[2]!=0) {
1570         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) failed\n");
1571     }
1572     errorCode=U_ZERO_ERROR;
1573     length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 0, &errorCode);
1574     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=2) {
1575         log_err("unorm2_getDecomposition(nfkc, Hangul syllable U+AC01) overflow failed\n");
1576     }
1577     errorCode=U_ZERO_ERROR;
1578     length=unorm2_getRawDecomposition(n2, 0xac01, decomp, -1, &errorCode);
1579     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1580         log_err("unorm2_getDecomposition(nfkc, capacity<0) failed\n");
1581     }
1582     errorCode=U_ZERO_ERROR;
1583     length=unorm2_getRawDecomposition(n2, 0xac01, NULL, 4, &errorCode);
1584     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1585         log_err("unorm2_getDecomposition(nfkc, decomposition=NULL) failed\n");
1586     }
1587 }
1588 
1589 static void
TestAppendRestoreMiddle()1590 TestAppendRestoreMiddle() {
1591     UChar a[20]={ 0x61, 0x62, 0x63, 0x41, 0x327, 0 };  /* last chars are 'A' and 'cedilla' NFC */
1592     static const UChar b[]={ 0x30A, 0x64, 0x65, 0x66, 0 };  /* first char is 'ring above' NFC */
1593     /* NFC: C5 is 'A with ring above' */
1594     static const UChar expected[]={ 0x61, 0x62, 0x63, 0xC5, 0x327, 0x64, 0x65, 0x66 };
1595     int32_t length;
1596     UErrorCode errorCode=U_ZERO_ERROR;
1597     const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1598     if(U_FAILURE(errorCode)) {
1599         log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1600         return;
1601     }
1602     /*
1603      * Use length=-1 to fool the estimate of the ReorderingBuffer capacity.
1604      * Use a capacity of 6 or 7 so that the middle sequence <41 327 30A>
1605      * still fits into a[] but the full result still overflows this capacity.
1606      * (Let it modify the destination buffer before reallocating internally.)
1607      */
1608     length=unorm2_append(n2, a, -1, 6, b, -1, &errorCode);
1609     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=UPRV_LENGTHOF(expected)) {
1610         log_err("unorm2_append(preflight) returned wrong length of %d\n", (int)length);
1611         return;
1612     }
1613     /* Verify that the middle is unchanged or restored. (ICU ticket #7848) */
1614     if(a[0]!=0x61 || a[1]!=0x62 || a[2]!=0x63 || a[3]!=0x41 || a[4]!=0x327 || a[5]!=0) {
1615         log_err("unorm2_append(overflow) modified the first string\n");
1616         return;
1617     }
1618     errorCode=U_ZERO_ERROR;
1619     length=unorm2_append(n2, a, -1, UPRV_LENGTHOF(a), b, -1, &errorCode);
1620     if(U_FAILURE(errorCode) || length!=UPRV_LENGTHOF(expected) || 0!=u_memcmp(a, expected, length)) {
1621         log_err("unorm2_append(real) failed - %s, length %d\n", u_errorName(errorCode), (int)length);
1622         return;
1623     }
1624 }
1625 
1626 static void
TestGetEasyToUseInstance()1627 TestGetEasyToUseInstance() {
1628     static const UChar in[]={
1629         0xA0,  /* -> <noBreak> 0020 */
1630         0xC7, 0x301  /* = 1E08 = 0043 0327 0301 */
1631     };
1632     UChar out[32];
1633     int32_t length;
1634 
1635     UErrorCode errorCode=U_ZERO_ERROR;
1636     const UNormalizer2 *n2=unorm2_getNFCInstance(&errorCode);
1637     if(U_FAILURE(errorCode)) {
1638         log_err_status(errorCode, "unorm2_getNFCInstance() failed: %s\n", u_errorName(errorCode));
1639         return;
1640     }
1641     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1642     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0xa0 || out[1]!=0x1e08) {
1643         log_err("unorm2_getNFCInstance() did not return an NFC instance (normalized length=%d; %s)\n",
1644                 (int)length, u_errorName(errorCode));
1645     }
1646 
1647     errorCode=U_ZERO_ERROR;
1648     n2=unorm2_getNFDInstance(&errorCode);
1649     if(U_FAILURE(errorCode)) {
1650         log_err_status(errorCode, "unorm2_getNFDInstance() failed: %s\n", u_errorName(errorCode));
1651         return;
1652     }
1653     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1654     if(U_FAILURE(errorCode) || length!=4 || out[0]!=0xa0 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1655         log_err("unorm2_getNFDInstance() did not return an NFD instance (normalized length=%d; %s)\n",
1656                 (int)length, u_errorName(errorCode));
1657     }
1658 
1659     errorCode=U_ZERO_ERROR;
1660     n2=unorm2_getNFKCInstance(&errorCode);
1661     if(U_FAILURE(errorCode)) {
1662         log_err_status(errorCode, "unorm2_getNFKCInstance() failed: %s\n", u_errorName(errorCode));
1663         return;
1664     }
1665     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1666     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e08) {
1667         log_err("unorm2_getNFKCInstance() did not return an NFKC instance (normalized length=%d; %s)\n",
1668                 (int)length, u_errorName(errorCode));
1669     }
1670 
1671     errorCode=U_ZERO_ERROR;
1672     n2=unorm2_getNFKDInstance(&errorCode);
1673     if(U_FAILURE(errorCode)) {
1674         log_err_status(errorCode, "unorm2_getNFKDInstance() failed: %s\n", u_errorName(errorCode));
1675         return;
1676     }
1677     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1678     if(U_FAILURE(errorCode) || length!=4 || out[0]!=0x20 || out[1]!=0x43 || out[2]!=0x327 || out[3]!=0x301) {
1679         log_err("unorm2_getNFKDInstance() did not return an NFKD instance (normalized length=%d; %s)\n",
1680                 (int)length, u_errorName(errorCode));
1681     }
1682 
1683     errorCode=U_ZERO_ERROR;
1684     n2=unorm2_getNFKCCasefoldInstance(&errorCode);
1685     if(U_FAILURE(errorCode)) {
1686         log_err_status(errorCode, "unorm2_getNFKCCasefoldInstance() failed: %s\n", u_errorName(errorCode));
1687         return;
1688     }
1689     length=unorm2_normalize(n2, in, UPRV_LENGTHOF(in), out, UPRV_LENGTHOF(out), &errorCode);
1690     if(U_FAILURE(errorCode) || length!=2 || out[0]!=0x20 || out[1]!=0x1e09) {
1691         log_err("unorm2_getNFKCCasefoldInstance() did not return an NFKC_Casefold instance (normalized length=%d; %s)\n",
1692                 (int)length, u_errorName(errorCode));
1693     }
1694 }
1695 
1696 #endif /* #if !UCONFIG_NO_NORMALIZATION */
1697