1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2014, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 
7 #include "unicode/utypes.h"
8 
9 #if !UCONFIG_NO_COLLATION
10 
11 #include "unicode/coll.h"
12 #include "unicode/tblcoll.h"
13 #include "unicode/unistr.h"
14 #include "unicode/sortkey.h"
15 #include "g7coll.h"
16 #include "sfwdchit.h"
17 #include "cmemory.h"
18 
19 static const UChar testCases[][G7CollationTest::MAX_TOKEN_LEN] = {
20     {  0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
21         0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073 /*'s'*/, 0x0000},                    /* 9 */
22     { 0x0050 /*'P'*/, 0x0061 /*'a'*/, 0x0074/*'t'*/, 0x0000},                                                    /* 1 */
23     { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x00E9, 0x0000},                                    /* 2 */
24     { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0000},                           /* 3 */
25     { 0x0070 /*'p'*/, 0x00E9, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000},            /* 4 */
26     { 0x0070 /*'p'*/, 0x00EA, 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0065 /*'e'*/, 0x0072 /*'r'*/, 0x0000},            /* 5 */
27     { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0064 /*'d'*/, 0x0000},                                                    /* 6 */
28     { 0x0054 /*'T'*/, 0x00F6, 0x006e /*'n'*/, 0x0065 /*'e'*/, 0x0000},                                            /* 7 */
29     { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x0066 /*'f'*/, 0x0075 /*'u'*/, 0x0000},                                   /* 8 */
30     { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
31       0x0062  /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000},                                    /* 12 */
32     { 0x0054 /*'T'*/, 0x006f /*'o'*/, 0x006e /*'n'*/, 0x0000},                                                    /* 10 */
33     { 0x0050  /*'P'*/, 0x0041 /*'A'*/, 0x0054 /*'T'*/, 0x0000},                                                    /* 11 */
34     { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
35         0x002d /*'-'*/,  0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0000},                /* 13 */
36     { 0x0062 /*'b'*/, 0x006c /*'l'*/, 0x0061 /*'a'*/, 0x0062 /*'c'*/, 0x006b /*'k'*/,
37         0x002d /*'-'*/,  0x0062 /*'b'*/, 0x0069 /*'i'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0073/*'s'*/, 0x0000},  /* 0 */
38     {0x0070 /*'p'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x0000},                                                    /* 14 */
39     /* Additional tests */
40     { 0x0063 /*'c'*/, 0x007a /*'z'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0000 },                                 /* 15 */
41     { 0x0063 /*'c'*/, 0x0068 /*'h'*/, 0x0075 /*'u'*/, 0x0072 /*'r'*/, 0x006f /*'o'*/, 0x0000 },                  /* 16 */
42     { 0x0063 /*'c'*/, 0x0061 /*'a'*/, 0x0074 /*'t'*/, 0x000 },                                                    /* 17 */
43     { 0x0064 /*'d'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x006e /*'n'*/, 0x0000 },                                 /* 18 */
44     { 0x003f /*'?'*/, 0x0000 },                                                                                /* 19 */
45     { 0x0071 /*'q'*/, 0x0075 /*'u'*/, 0x0069 /*'i'*/, 0x0063 /*'c'*/, 0x006b /*'k'*/, 0x0000 },                  /* 20 */
46     { 0x0023 /*'#'*/, 0x0000 },                                                                                /* 21 */
47     { 0x0026 /*'&'*/, 0x0000 },                                                                                /* 22 */
48     {  0x0061 /*'a'*/, 0x002d /*'-'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/,
49                 0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000},                                                        /* 24 */
50     { 0x0061 /*'a'*/, 0x0061 /*'a'*/, 0x0072 /*'r'*/, 0x0064 /*'d'*/, 0x0076 /*'v'*/, 0x0061 /*'a'*/,
51                 0x0072/*'r'*/, 0x006b/*'k'*/, 0x0000},                                                        /* 23 */
52     { 0x0061 /*'a'*/, 0x0062 /*'b'*/, 0x0062 /*'b'*/, 0x006f /*'o'*/, 0x0074 /*'t'*/, 0x0000},                   /* 25 */
53     { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x002d /*'-'*/, 0x0070 /*'p'*/, 0x0000},                                 /* 27 */
54     { 0x0063 /*'c'*/, 0x006f  /*'o'*/, 0x0070 /*'p'*/, 0x0000},                                                /* 28 */
55     { 0x0063 /*'c'*/, 0x006f /*'o'*/, 0x006f /*'o'*/, 0x0070 /*'p'*/, 0x0000},                                 /* 26 */
56     { 0x007a /*'z'*/, 0x0065  /*'e'*/, 0x0062 /*'b'*/, 0x0072 /*'r'*/, 0x0061 /*'a'*/, 0x0000}                    /* 29 */
57 };
58 
59 static const int32_t results[G7CollationTest::TESTLOCALES][G7CollationTest::TOTALTESTSET] = {
60     { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_US */
61     { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_GB */
62     { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* en_CA */
63     { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_FR */
64     { 12, 13, 9, 0, 14, 1, 11, 3, 2, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* fr_CA */
65     { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* de_DE */
66     { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* it_IT */
67     { 12, 13, 9, 0, 14, 1, 11, 2, 3, 4, 5, 6, 8, 10, 7, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 }, /* ja_JP */
68     /* new table collation with rules "& Z < p, P"  loop to FIXEDTESTSET */
69     { 12, 13, 9, 0, 6, 8, 10, 7, 14, 1, 11, 2, 3, 4, 5, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31 },
70     /* new table collation with rules "& C < ch , cH, Ch, CH " loop to TOTALTESTSET */
71     { 19, 22, 21, 23, 24, 25, 12, 13, 9, 0, 17, 26, 28, 27, 15, 16, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 },
72     /* new table collation with rules "& Question-mark ; ? & Hash-mark ; # & Ampersand ; '&'  " loop to TOTALTESTSET */
73     { 23, 24, 25, 22, 12, 13, 9, 0, 17, 16, 26, 28, 27, 15, 18, 21, 14, 1, 11, 2, 3, 4, 5, 19, 20, 6, 8, 10, 7, 29 },
74     /* analogous to Japanese rules " & aa ; a- & ee ; e- & ii ; i- & oo ; o- & uu ; u- " */  /* loop to TOTALTESTSET */
75     { 19, 22, 21, 24, 23, 25, 12, 13, 9, 0, 17, 16, 28, 26, 27, 15, 18, 14, 1, 11, 2, 3, 4, 5, 20, 6, 8, 10, 7, 29 }
76 };
77 
~G7CollationTest()78 G7CollationTest::~G7CollationTest() {}
79 
TestG7Locales()80 void G7CollationTest::TestG7Locales(/* char* par */)
81 {
82     int32_t i;
83     const Locale locales[8] = {
84         Locale("en", "US", ""),
85         Locale("en", "GB", ""),
86         Locale("en", "CA", ""),
87         Locale("fr", "FR", ""),
88         Locale("fr", "CA", ""),
89         Locale("de", "DE", ""),
90         Locale("it", "IT", ""),
91         Locale("ja", "JP", "")
92     };
93 
94     for (i = 0; i < UPRV_LENGTHOF(locales); i++)
95     {
96         UnicodeString dispName;
97         UErrorCode status = U_ZERO_ERROR;
98 
99         const Locale &locale = locales[i];
100         LocalPointer<Collator> myCollation(Collator::createInstance(locale, status));
101         if(U_FAILURE(status)) {
102           errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
103           return;
104         }
105         myCollation->setStrength(Collator::QUATERNARY);
106         myCollation->setAttribute(UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, status);
107         if (U_FAILURE(status)) {
108             errln("Locale %s creation failed - %s", locale.getName(), u_errorName(status));
109             continue;
110         }
111 
112         const UnicodeString &rules = ((RuleBasedCollator*)myCollation.getAlias())->getRules();
113         if (rules.isEmpty() &&
114                 (locale == Locale::getCanadaFrench() || locale == Locale::getJapanese())) {
115             dataerrln("%s Collator missing rule string", locale.getName());
116             if (logKnownIssue("10671", "TestG7Locales does not test ignore-punctuation")) {
117                 continue;
118             }
119         } else {
120             status = U_ZERO_ERROR;
121             RuleBasedCollator *tblColl1 = new RuleBasedCollator(rules, status);
122             if (U_FAILURE(status)) {
123                 errln("Recreate %s collation failed - %s", locale.getName(), u_errorName(status));
124                 continue;
125             }
126             myCollation.adoptInstead(tblColl1);
127         }
128 
129         UnicodeString msg;
130 
131         msg += "Locale ";
132         msg += locales[i].getDisplayName(dispName);
133         msg += "tests start :";
134         logln(msg);
135 
136         int32_t j, n;
137         for (j = 0; j < FIXEDTESTSET; j++)
138         {
139             for (n = j+1; n < FIXEDTESTSET; n++)
140             {
141                 doTest(myCollation.getAlias(), testCases[results[i][j]], testCases[results[i][n]], Collator::LESS);
142             }
143         }
144     }
145 }
146 
TestDemo1()147 void G7CollationTest::TestDemo1(/* char* par */)
148 {
149     logln("Demo Test 1 : Create a new table collation with rules \"& Z < p, P\"");
150     UErrorCode status = U_ZERO_ERROR;
151     Collator *col = Collator::createInstance("en_US", status);
152     if(U_FAILURE(status)) {
153       delete col;
154       errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
155       return;
156     }
157     const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
158     UnicodeString newRules(" & Z < p, P");
159     newRules.insert(0, baseRules);
160     RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
161 
162     if (U_FAILURE(status))
163     {
164         errln( "Demo Test 1 Table Collation object creation failed.");
165         return;
166     }
167 
168     int32_t j, n;
169     for (j = 0; j < FIXEDTESTSET; j++)
170     {
171         for (n = j+1; n < FIXEDTESTSET; n++)
172         {
173             doTest(myCollation, testCases[results[8][j]], testCases[results[8][n]], Collator::LESS);
174         }
175     }
176 
177     delete myCollation;
178     delete col;
179 }
180 
TestDemo2()181 void G7CollationTest::TestDemo2(/* char* par */)
182 {
183     logln("Demo Test 2 : Create a new table collation with rules \"& C < ch , cH, Ch, CH\"");
184     UErrorCode status = U_ZERO_ERROR;
185     Collator *col = Collator::createInstance("en_US", status);
186     if(U_FAILURE(status)) {
187       delete col;
188       errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
189       return;
190     }
191     const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
192     UnicodeString newRules("& C < ch , cH, Ch, CH");
193     newRules.insert(0, baseRules);
194     RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
195 
196     if (U_FAILURE(status))
197     {
198         errln("Demo Test 2 Table Collation object creation failed.");
199         return;
200     }
201 
202     int32_t j, n;
203     for (j = 0; j < TOTALTESTSET; j++)
204     {
205         for (n = j+1; n < TOTALTESTSET; n++)
206         {
207             doTest(myCollation, testCases[results[9][j]], testCases[results[9][n]], Collator::LESS);
208         }
209     }
210 
211     delete myCollation;
212     delete col;
213 }
214 
TestDemo3()215 void G7CollationTest::TestDemo3(/* char* par */)
216 {
217     logln("Demo Test 3 : Create a new table collation with rules \"& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'\"");
218     UErrorCode status = U_ZERO_ERROR;
219     Collator *col = Collator::createInstance("en_US", status);
220     if(U_FAILURE(status)) {
221       errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
222       delete col;
223       return;
224     }
225     const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
226     UnicodeString newRules = "& Question'-'mark ; '?' & Hash'-'mark ; '#' & Ampersand ; '&'";
227     newRules.insert(0, baseRules);
228     RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
229 
230     if (U_FAILURE(status))
231     {
232         errln("Demo Test 3 Table Collation object creation failed.");
233         return;
234     }
235 
236     int32_t j, n;
237     for (j = 0; j < TOTALTESTSET; j++)
238     {
239         for (n = j+1; n < TOTALTESTSET; n++)
240         {
241             doTest(myCollation, testCases[results[10][j]], testCases[results[10][n]], Collator::LESS);
242         }
243     }
244 
245     delete myCollation;
246     delete col;
247 }
248 
TestDemo4()249 void G7CollationTest::TestDemo4(/* char* par */)
250 {
251     logln("Demo Test 4 : Create a new table collation with rules \" & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' \"");
252     UErrorCode status = U_ZERO_ERROR;
253     Collator *col = Collator::createInstance("en_US", status);
254     if(U_FAILURE(status)) {
255       delete col;
256       errcheckln(status, "Couldn't instantiate collator. Error: %s", u_errorName(status));
257       return;
258     }
259 
260     const UnicodeString baseRules = ((RuleBasedCollator*)col)->getRules();
261     UnicodeString newRules = " & aa ; a'-' & ee ; e'-' & ii ; i'-' & oo ; o'-' & uu ; u'-' ";
262     newRules.insert(0, baseRules);
263     RuleBasedCollator *myCollation = new RuleBasedCollator(newRules, status);
264 
265     int32_t j, n;
266     for (j = 0; j < TOTALTESTSET; j++)
267     {
268         for (n = j+1; n < TOTALTESTSET; n++)
269         {
270             doTest(myCollation, testCases[results[11][j]], testCases[results[11][n]], Collator::LESS);
271         }
272     }
273 
274     delete myCollation;
275     delete col;
276 }
277 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)278 void G7CollationTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
279 {
280     if (exec) logln("TestSuite G7CollationTest: ");
281     switch (index) {
282         case 0: name = "TestG7Locales"; if (exec)   TestG7Locales(/* par */); break;
283         case 1: name = "TestDemo1"; if (exec)   TestDemo1(/* par */); break;
284         case 2: name = "TestDemo2"; if (exec)   TestDemo2(/* par */); break;
285         case 3: name = "TestDemo3"; if (exec)   TestDemo3(/* par */); break;
286         case 4: name = "TestDemo4"; if (exec)   TestDemo4(/* par */); break;
287         default: name = ""; break;
288     }
289 }
290 
291 #endif /* #if !UCONFIG_NO_COLLATION */
292