1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /********************************************************************************
9 *
10 * File CFRTST.C
11 *
12 * Modification History:
13 *        Name                     Description
14 *     Madhu Katragadda            Ported for C API
15 *********************************************************************************/
16 /**
17  * CollationFrenchTest is a third level test class.  This tests the locale
18  * specific primary, secondary and tertiary rules.  For example, the ignorable
19  * character '-' in string "black-bird".  The en_US locale uses the default
20  * collation rules as its sorting sequence.
21  */
22 
23 #include <stdlib.h>
24 
25 #include "unicode/utypes.h"
26 
27 #if !UCONFIG_NO_COLLATION
28 
29 #include "unicode/ucol.h"
30 #include "unicode/uloc.h"
31 #include "cintltst.h"
32 #include "ccolltst.h"
33 #include "callcoll.h"
34 #include "cfrtst.h"
35 #include "cmemory.h"
36 #include "unicode/ustring.h"
37 #include "string.h"
38 
39 static  UCollator *myCollation;
40 const static UChar testSourceCases[][MAX_TOKEN_LEN] =
41 {
42     {0x0061/*'a'*/, 0x0062/*'b'*/, 0x0063/*'c'*/, 0x0000},
43     {0x0043/*'C'*/, 0x004f/*'O'*/, 0x0054/*'T'*/, 0x0045/*'E'*/, 0x0000},
44     {0x0063/*'c'*/, 0x006f/*'o'*/, 0x002d/*'-'*/, 0x006f/*'o'*/, 0x0070/*'p'*/, 0x0000},
45     {0x0070/*'p'*/, 0x00EA, 0x0063/*'c'*/, 0x0068/*'h'*/, 0x0065/*'e'*/, 0x0000},
46     {0x0070/*'p'*/, 0x00EA, 0x0063/*'c'*/, 0x0068/*'h'*/, 0x0065/*'e'*/, 0x0072/*'r'*/, 0x0000},
47     {0x0070/*'p'*/, 0x00E9, 0x0063/*'c'*/, 0x0068/*'h'*/, 0x0065/*'e'*/, 0x0072/*'r'*/, 0x0000},
48     {0x0070/*'p'*/, 0x00E9, 0x0063/*'c'*/, 0x0068/*'h'*/, 0x0065/*'e'*/, 0x0072/*'r'*/, 0x0000},
49     {0x0048/*'H'*/, 0x0065/*'e'*/, 0x006c/*'l'*/, 0x006c/*'l'*/, 0x006f/*'o'*/, 0x0000},
50     {0x01f1, 0x0000},
51     {0xfb00, 0x0000},
52     {0x01fa, 0x0000},
53     {0x0101, 0x0000}
54 };
55 
56 const static UChar testTargetCases[][MAX_TOKEN_LEN] =
57 {
58     {0x0041/*'A'*/, 0x0042/*'B'*/, 0x0043/*'C'*/, 0x0000},
59     {0x0063/*'c'*/, 0x00f4, 0x0074/*'t'*/, 0x0065/*'e'*/, 0x0000},
60     {0x0043/*'C'*/, 0x004f/*'O'*/, 0x004f/*'O'*/, 0x0050/*'P'*/, 0x0000},
61     {0x0070/*'p'*/, 0x00E9, 0x0063/*'c'*/, 0x0068/*'h'*/, 0x00E9, 0x0000},
62     {0x0070/*'p'*/,  0x00E9, 0x0063/*'c'*/, 0x0068/*'h'*/, 0x00E9, 0x0000},
63     {0x0070/*'p'*/, 0x00EA, 0x0063/*'c'*/, 0x0068/*'h'*/, 0x0065/*'e'*/, 0x0000},
64     {0x0070/*'p'*/, 0x00EA, 0x0063/*'c'*/, 0x0068/*'h'*/, 0x0065/*'e'*/, 0x0072/*'r'*/, 0x0000},
65     {0x0068/*'h'*/, 0x0065/*'e'*/, 0x006c/*'l'*/, 0x006c/*'l'*/, 0x004f/*'O'*/, 0x0000},
66     {0x01ee, 0x0000},
67     {0x25ca, 0x0000},
68     {0x00e0, 0x0000},
69     {0x01df, 0x0000}
70 };
71 
72 const static UCollationResult results[] =
73 {
74     UCOL_LESS,
75     UCOL_LESS,
76     UCOL_LESS, /*UCOL_GREATER,*/
77     UCOL_LESS,
78     UCOL_GREATER,
79     UCOL_GREATER,
80     UCOL_LESS,
81     UCOL_GREATER,
82     UCOL_LESS, /*UCOL_GREATER,*/
83     UCOL_GREATER,
84     UCOL_LESS,
85     UCOL_LESS
86 };
87 
88 /* 0x0300 is grave, 0x0301 is acute*/
89 /* the order of elements in this array must be different than the order in CollationEnglishTest*/
90 const static UChar testAcute[][MAX_TOKEN_LEN] =
91 {
92 /*00*/    {0x0065/*'e'*/, 0x0065/*'e'*/,  0x0000},
93 /*01*/    {0x0065/*'e'*/, 0x0301, 0x0065/*'e'*/,  0x0000},
94 /*02*/    {0x0065/*'e'*/, 0x0300, 0x0301, 0x0065/*'e'*/,  0x0000},
95 /*03*/    {0x0065/*'e'*/, 0x0300, 0x0065/*'e'*/,  0x0000},
96 /*04*/    {0x0065/*'e'*/, 0x0301, 0x0300, 0x0065/*'e'*/,  0x0000},
97 /*05*/    {0x0065/*'e'*/, 0x0065/*'e'*/, 0x0301, 0x0000},
98 /*06*/    {0x0065/*'e'*/, 0x0301, 0x0065/*'e'*/, 0x0301, 0x0000},
99 /*07*/    {0x0065/*'e'*/, 0x0300, 0x0301, 0x0065/*'e'*/, 0x0301, 0x0000},
100 /*08*/    {0x0065/*'e'*/, 0x0300, 0x0065/*'e'*/, 0x0301, 0x0000},
101 /*09*/    {0x0065/*'e'*/, 0x0301, 0x0300, 0x0065/*'e'*/, 0x0301, 0x0000},
102 /*0a*/    {0x0065/*'e'*/, 0x0065/*'e'*/, 0x0300, 0x0301, 0x0000},
103 /*0b*/    {0x0065/*'e'*/, 0x0301, 0x0065/*'e'*/, 0x0300, 0x0301, 0x0000},
104 /*0c*/    {0x0065/*'e'*/, 0x0300, 0x0301, 0x0065/*'e'*/, 0x0300, 0x0301, 0x0000},
105 /*0d*/    {0x0065/*'e'*/, 0x0300, 0x0065/*'e'*/, 0x0300, 0x0301, 0x0000},
106 /*0e*/    {0x0065/*'e'*/, 0x0301, 0x0300, 0x0065/*'e'*/, 0x0300, 0x0301, 0x0000},
107 /*0f*/    {0x0065/*'e'*/, 0x0065/*'e'*/, 0x0300, 0x0000},
108 /*10*/    {0x0065/*'e'*/, 0x0301, 0x0065/*'e'*/, 0x0300, 0x0000},
109 /*11*/    {0x0065/*'e'*/, 0x0300, 0x0301, 0x0065/*'e'*/, 0x0300, 0x0000},
110 /*12*/    {0x0065/*'e'*/, 0x0300, 0x0065/*'e'*/, 0x0300, 0x0000},
111 /*13*/    {0x0065/*'e'*/, 0x0301, 0x0300, 0x0065/*'e'*/, 0x0300, 0x0000},
112 /*14*/    {0x0065/*'e'*/, 0x0065/*'e'*/, 0x0301, 0x0300, 0x0000},
113 /*15*/    {0x0065/*'e'*/, 0x0301, 0x0065/*'e'*/, 0x0301, 0x0300, 0x0000},
114 /*16*/    {0x0065/*'e'*/, 0x0300, 0x0301, 0x0065/*'e'*/, 0x0301, 0x0300, 0x0000},
115 /*17*/    {0x0065/*'e'*/, 0x0300, 0x0065/*'e'*/, 0x0301, 0x0300, 0x0000},
116 /*18*/    {0x0065/*'e'*/, 0x0301, 0x0300, 0x0065/*'e'*/, 0x0301, 0x0300, 0x0000}
117 };
118 
119 const static UChar testBugs[][MAX_TOKEN_LEN] =
120 {
121     {0x0061/*'a'*/, 0x000},
122     {0x0041/*'A'*/, 0x000},
123     {0x0065/*'e'*/, 0x000},
124     {0x0045/*'E'*/, 0x000},
125     {0x00e9, 0x000},
126     {0x00e8, 0x000},
127     {0x00ea, 0x000},
128     {0x00eb, 0x000},
129     {0x0065/*'e'*/, 0x0061/*'a'*/, 0x000},
130     {0x0078/*'x'*/, 0x000}
131 };
132 
133 
134 static void TestGetSortKey(void);
135 
136 
addFrenchCollTest(TestNode ** root)137 void addFrenchCollTest(TestNode** root)
138 {
139     addTest(root, &TestSecondary, "tscoll/cfrtst/TestSecondary");
140     addTest(root, &TestTertiary, "tscoll/cfrtst/TestTertiary");
141     addTest(root, &TestExtra, "tscoll/cfrtst/TestExtra");
142     addTest(root, &TestGetSortKey, "tscoll/cfrtst/TestGetSortKey");
143 }
144 
145 
TestTertiary()146 static void TestTertiary( )
147 {
148 
149     int32_t i;
150     UErrorCode status = U_ZERO_ERROR;
151     myCollation = ucol_open("fr_CA", &status);
152     if(U_FAILURE(status) || !myCollation){
153         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
154         return;
155     }
156 
157     ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
158     if(U_FAILURE(status)){
159         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
160         return;
161     }
162     log_verbose("Testing fr_CA Collation with Tertiary strength\n");
163     ucol_setStrength(myCollation, UCOL_QUATERNARY);
164     for (i = 0; i < 12 ; i++)
165     {
166         doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
167     }
168     ucol_close(myCollation);
169 }
170 
TestSecondary()171 static void TestSecondary()
172 {
173     int32_t i,j, testAcuteSize;
174     UCollationResult expected=UCOL_EQUAL;
175     UErrorCode status = U_ZERO_ERROR;
176     myCollation = ucol_open("fr_CA", &status);
177     if(U_FAILURE(status)){
178         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
179         return;
180     }
181     ucol_setAttribute(myCollation, UCOL_STRENGTH, UCOL_SECONDARY, &status);
182     if(U_FAILURE(status)){
183         log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
184         return;
185     }
186     log_verbose("Testing fr_CA Collation with Secondary strength\n");
187     /*test acute and grave ordering (compare to french collation)*/
188     testAcuteSize = UPRV_LENGTHOF(testAcute);
189     for (i = 0; i < testAcuteSize; i++)
190     {
191         for (j = 0; j < testAcuteSize; j++)
192         {
193             if (i <  j) expected = UCOL_LESS;
194             if (i == j) expected = UCOL_EQUAL;
195             if (i >  j) expected = UCOL_GREATER;
196             doTest(myCollation, testAcute[i], testAcute[j], expected );
197         }
198     }
199     ucol_close(myCollation);
200 }
201 
TestExtra()202 static void TestExtra()
203 {
204     int32_t i, j;
205     UErrorCode status = U_ZERO_ERROR;
206     myCollation = ucol_open("fr_CA", &status);
207     if(U_FAILURE(status)){
208         log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
209         return;
210     }
211     log_verbose("Testing fr_CA Collation extra with secondary strength\n");
212     ucol_setStrength(myCollation, UCOL_TERTIARY);
213     for (i = 0; i < 9 ; i++)
214     {
215         for (j = i + 1; j < 10; j += 1)
216         {
217             doTest(myCollation, testBugs[i], testBugs[j], UCOL_LESS);
218         }
219     }
220     ucol_close(myCollation);
221 }
222 
TestGetSortKey()223 static void TestGetSortKey() {
224     /* This is meant to test a buffer reallocation crash while using
225     French secondary sorting with a large buffer.
226     The fact that Japanese characters are used is irrelevant. */
227     static const UChar pucUTF16[] = {
228         0x3049,0x30b9,0x3088,0xfffd,0xfffd,0x308f,0xfffd,0x3042,
229         0xfffd,0xfffd,0x305e,0xfffd,0x30b6,0x30bb,0x305b,0x30b1,
230         0x3050,0x30af,0x304e,0x30bd,0xfffd,0x30c6,0xfffd,0xfffd,
231         0x30e1,0xfffd,0xfffd,0x30d9,0xfffd,0x3092,0x3075,0x304a,
232         0x3074,0x3070,0x30f5,0x30c4,0x306e,0x30df,0x3053,0xfffd,
233         0x30a6,0x30b6,0x30e0,0xfffd,0x30bc,0x30ef,0x3087,0x30cc,
234         0x305f,0x30de,0xfffd,0x3090,0x3063,0x30dc,0x30b6,0x30b9,
235         0x30d2,0x3072,0x3061,0xfffd,0xfffd,0xfffd,0x307b,0x3092,
236         0x30a5,0x30a9,0x30b1,0x30e7,0xfffd,0xfffd,0xfffd,0xfffd,
237         0xfffd,0x305e,0xfffd,0x30c7,0x30ae,0x305b,0x308b,0x30c0,
238         0x30f5,0xfffd,0xfffd,0xfffd,0x307d,0x304e,0xfffd,0xfffd,
239         0x30c0,0x30c8,0x306f,0x307a,0x30dd,0x30e4,0x3084,0xfffd,
240         0x308c,0x30f1,0xfffd,0x30c6,0xfffd,0x307a,0xfffd,0x3052,
241         0x3056,0x305d,0x30b7,0xfffd,0x305b,0x30b0,0x30b9,0xfffd,
242         0x30b2,0x306d,0x3044,0xfffd,0x3073,0xfffd,0x30be,0x30cf,
243         0x3080,0xfffd,0x30a8,0x30f5,0x30a5,0x30c7,0x307c,0xfffd,
244         0x30d1,0x305f,0x30b2,0xfffd,0x3053,0x30ca,0xfffd,0x30dd,
245         0x3058,0x30c0,0x305d,0x30e1,0xfffd,0x30bb,0x305f,0x30d1,
246         0x30f2,0x3058,0x3086,0x30ce,0x30db,0x30cb,0x30e9,0xfffd,
247         0x308c,0xfffd,0xfffd,0x30af,0x30c4,0x3076,0x304c,0x30f5,
248         0x30e8,0x308c,0xfffd,0x30e2,0x3073,0x30a3,0x304e,0x30ea,
249         0xfffd,0x304f,0xfffd,0x306c,0x3044,0xfffd,0xfffd,0x30c9,
250         0xfffd,0x30f5,0xfffd,0xfffd,0xfffd,0x30eb,0x30a8,0xfffd,
251         0x306d,0x307d,0x30d8,0x3069,0xfffd,0xfffd,0x3086,0x30a9,
252         0xfffd,0x3076,0x30e9,0x30cc,0x3074,0x30e0,0xfffd,0xfffd,
253         0xfffd,0x30f0,0x3086,0x30ac,0x3076,0x3068,0x30c7,0xfffd,
254         0x30b7,0x30d2,0x3048,0x308e,0x30e8,0x30d9,0x30ce,0x30d0,
255         0x308b,0x30ee,0x30e6,0x3079,0x30f3,0x30af,0xfffd,0x3079,
256         0xfffd,0xfffd,0x30ca,0x30bf,0xfffd,0x30b5,0xfffd,0xfffd,
257         0x3093,0xfffd,0x30ba,0xfffd,0x3076,0x3047,0x304a,0xfffd,
258         0xfffd,0x3086,0xfffd,0x3081,0xfffd,0x30f6,0x3066,0xfffd,
259         0xfffd,0x30b6,0x30ef,0x30e2,0x30bf,0xfffd,0x3053,0x304a,
260         0xfffd,0xfffd,0x304a,0x30e8,0xfffd,0x30e2,0xfffd,0xfffd,
261         0x305c,0x3081,0x30c6,0xfffd,0x3091,0x3046,0x306a,0x3059,
262         0xfffd,0xfffd,0x30dd,0x30d1,0x308a,0x30ee,0xfffd,0xfffd,
263         0x308a,0x3042,0x30da,0xfffd,0x3064,0x30ef,0x305c,0x306b,
264         0xfffd,0x30ca,0x3085,0x3067,0x30ea,0x30c2,0x30c8,0xfffd,
265         0x30f5,0xfffd,0xfffd,0xfffd,0x30ca,0xfffd,0x3050,0x30f1,
266         0x3050,0x3053,0x3072,0xfffd,0xfffd,0xfffd,0x3074,0xfffd,
267         0x304b,0x30dd,0x306d,0xfffd,0x3049,0x30a1,0x30cc,0x30de,
268         0x30ae,0x307b,0x308a,0xfffd,0x3065,0xfffd,0xfffd,0x30c0,
269         0xfffd,0x3048,0x30dc,0x304f,0x3085,0x3059,0x304b,0x30d3,
270         0x30eb,0x30a4,0x3073,0xfffd,0x30ba,0x308f,0x30a7,0x30c3,
271         0x3074,0x30cf,0x306c,0x3053,0x30c0,0xfffd,0x3066,0xfffd,
272         0x308f,0xfffd,0x30b5,0xfffd,0x3092,0x30c4,0xfffd,0x30d6,
273         0x3056,0x30ad,0x30d2,0x30ba,0xfffd,0x30e6,0x304c,0x3088,
274         0x30b6,0x3048,0x3077,0x30d1,0xfffd,0x3050,0xfffd,0x3042,
275         0xfffd,0xfffd,0x308f,0xfffd,0x30c1,0xfffd,0x3074,0x3061,
276         0x3056,0x30e5,0xfffd,0xfffd,0x3057,0xfffd,0xfffd,0xfffd,
277         0xfffd,0x30bd,0x30b3,0x30ee,0xfffd,0x30f2,0x3084,0x3050,
278         0xfffd,0x30e7,0xfffd,0xfffd,0x3060,0x3049,0x30f2,0x30ad,
279         0x30bf,0x30f1,0x30a2,0xfffd,0x30af,0xfffd,0x3060,0x30a1,
280         0x30e9,0x30c3,0xfffd,0x3072,0x3093,0x3070,0xfffd,0x308f,
281         0x3060,0xfffd,0x3067,0x306f,0x3082,0x308b,0x3051,0xfffd,
282         0x3058,0xfffd,0xfffd,0x30a8,0x3051,0x3054,0x30ad,0x30f0,
283         0x3053,0xfffd,0x30e1,0x30d7,0x308d,0x307f,0x30be,0x30b0,
284         0xfffd,0x30db,0xfffd,0x30d1,0xfffd,0x3054,0x30a5,0xfffd,
285         0x306a,0xfffd,0x305c,0xfffd,0x3052,0x3088,0xfffd,0x306e,
286         0xfffd,0x30a9,0x30a1,0x30b4,0x3083,0x30bd,0xfffd,0xfffd,
287         0x306a,0x3070,0x30cd,0xfffd,0x3072,0x30ed,0x30c6,0x30be,
288         0x30c4,0x305e,0x30b3,0x30e1,0x308a,0xfffd,0x305b,0xfffd,
289         0x3042,0x3088,0xfffd,0x304c,0xfffd,0x3089,0x3071,0xfffd,
290         0xfffd,0x30c6,0x3062,0x3079,0xfffd,0x304b,0x304a,0xfffd,
291         0x30ad,0x3045,0x3045,0x3087,0xfffd,0x306a,0x308b,0x0000,
292         0x30bd,0x3065,0x30b8,0x3086,0x30d3,0x3076,0xfffd,0xfffd,
293         0x308f,0x3053,0x307c,0x3053,0x3084,0x30ae,0x30c4,0x3045,
294         0x30a8,0x30d0,0x30e1,0x308c,0x30e6,0x30b7,0xfffd,0xfffd,
295         0xfffd,0x3046,0x305f,0xfffd,0x3086,0x30ab,0xfffd,0xfffd,
296         0x30c8,0xfffd,0x30a1,0x3052,0x3059,0xfffd,0x30a4,0xfffd,
297         0xfffd,0x308c,0x3085,0x30ab,0x30b5,0x3091,0x30bf,0x30e3,
298         0xfffd,0xfffd,0x3087,0xfffd,0x30f6,0x3051,0x30bd,0x3092,
299         0x3063,0xfffd,0x30a9,0x3063,0x306e,0xfffd,0xfffd,0xfffd,
300         0x306c,0xfffd,0x307e,0x30ad,0x3077,0x30c2,0x30e9,0x30d5,
301         0xfffd,0xfffd,0x30c6,0x305c,0xfffd,0xfffd,0x3089,0xfffd,
302         0x3048,0x30cb,0x308c,0xfffd,0xfffd,0x3044,0xfffd,0x3080,
303         0x3063,0x3079,0xfffd,0x308a,0x30cb,0x3042,0x3057,0xfffd,
304         0x307c,0x30c1,0x30a8,0x30cf,0xfffd,0x3083,0xfffd,0xfffd,
305         0x306c,0xfffd,0x305e,0x3092,0xfffd,0x30dc,0x30b0,0x3081,
306         0x30e3,0x30f0,0x304e,0x30cc,0x308e,0x30c4,0x30ad
307     };
308 
309     UErrorCode status = U_ZERO_ERROR;
310     UCollator *pCollator;
311     int32_t lenActualSortKey;
312     uint8_t pucSortKey[4096];
313     static const int32_t LENSORTKEY = (int32_t)sizeof(pucSortKey);
314 
315     ucol_prepareShortStringOpen("LFR_AN_CX_EX_FO_HX_NX_S3", 0, NULL, &status);
316 
317     pCollator = ucol_openFromShortString("LFR_AN_CX_EX_FO_HX_NX_S3", 0, NULL, &status);
318 
319     if (U_FAILURE(status)) {
320         log_data_err("error opening collator -> %s. (Are you missing data?)\n", u_errorName(status));
321         return;
322     }
323 
324     lenActualSortKey = ucol_getSortKey(pCollator,
325         (const UChar *)pucUTF16,
326         UPRV_LENGTHOF(pucUTF16),
327         pucSortKey,
328         LENSORTKEY);
329 
330     if (lenActualSortKey > LENSORTKEY) {
331         log_err("sort key too big for original buffer. Got: %d Expected: %d\n", lenActualSortKey, LENSORTKEY);
332         return;
333     }
334     /* If the test didn't crash, then the test succeeded. */
335     ucol_close(pCollator);
336 }
337 
338 #endif /* #if !UCONFIG_NO_COLLATION */
339