1 /********************************************************************
2  * Copyright (c) 2002-2014, International Business Machines Corporation and
3  * others. All Rights Reserved.
4  ********************************************************************/
5 
6 /**
7  * UCAConformanceTest performs conformance tests defined in the data
8  * files. ICU ships with stub data files, as the whole test are too
9  * long. To do the whole test, download the test files.
10  */
11 
12 package com.ibm.icu.dev.test.collator;
13 
14 import java.io.BufferedReader;
15 import java.io.IOException;
16 
17 import com.ibm.icu.dev.test.TestFmwk;
18 import com.ibm.icu.dev.test.TestUtil;
19 import com.ibm.icu.lang.UCharacter;
20 import com.ibm.icu.text.Collator;
21 import com.ibm.icu.text.RawCollationKey;
22 import com.ibm.icu.text.RuleBasedCollator;
23 import com.ibm.icu.text.UTF16;
24 import com.ibm.icu.util.ULocale;
25 import com.ibm.icu.util.VersionInfo;
26 
27 public class UCAConformanceTest extends TestFmwk {
28 
29     /**
30      * @param args
31      */
main(String[] args)32     public static void main(String[] args) {
33         new UCAConformanceTest().run(args);
34     }
35 
UCAConformanceTest()36     public UCAConformanceTest() {
37     }
38     @Override
init()39     protected void init() throws Exception{
40         UCA = (RuleBasedCollator)Collator.getInstance(ULocale.ROOT);
41 
42         comparer = new UTF16.StringComparator(true, false, UTF16.StringComparator.FOLD_CASE_DEFAULT);
43     }
44     private RuleBasedCollator UCA;
45     private RuleBasedCollator rbUCA;
46     private UTF16.StringComparator comparer;
47     private boolean isAtLeastUCA62 =
48         UCharacter.getUnicodeVersion().compareTo(VersionInfo.UNICODE_6_2) >= 0;
49 
TestTableNonIgnorable()50     public void TestTableNonIgnorable() {
51         setCollNonIgnorable(UCA);
52         openTestFile("NON_IGNORABLE");
53         conformanceTest(UCA);
54     }
55 
TestTableShifted()56     public void TestTableShifted() {
57         setCollShifted(UCA);
58         openTestFile("SHIFTED");
59         conformanceTest(UCA);
60     }
61 
TestRulesNonIgnorable()62     public void TestRulesNonIgnorable() {
63         if(logKnownIssue("cldrbug:6745", "UCARules.txt has problems")) { return; }
64         initRbUCA();
65         if(rbUCA == null) { return; }
66 
67         setCollNonIgnorable(rbUCA);
68         openTestFile("NON_IGNORABLE");
69         conformanceTest(rbUCA);
70     }
71 
TestRulesShifted()72     public void TestRulesShifted() {
73         logln("This test is currently disabled, as it is impossible to "+
74         "wholly represent fractional UCA using tailoring rules.");
75         return;
76         /*
77         initRbUCA();
78         if(rbUCA == null) { return; }
79 
80         setCollShifted(rbUCA);
81         openTestFile("SHIFTED");
82         testConformance(rbUCA);
83         */
84     }
85     BufferedReader in;
openTestFile(String type)86     private void openTestFile(String type)
87     {
88         String collationTest = "CollationTest_";
89         String ext = ".txt";
90         try {
91             in = TestUtil.getDataReader(collationTest+type+"_SHORT"+ext);
92         } catch (Exception e) {
93             try {
94                 in = TestUtil.getDataReader(collationTest+type+ext);
95             } catch (Exception e1) {
96                 try {
97                     in = TestUtil.getDataReader(collationTest+type+"_STUB"+ext);
98                     logln( "INFO: Working with the stub file.\n"+
99                             "If you need the full conformance test, please\n"+
100                             "download the appropriate data files from:\n"+
101                             "http://unicode.org/cldr/trac/browser/trunk/common/uca");
102                 } catch (Exception e11) {
103                     errln("ERROR: Could not find any of the test files");
104                 }
105             }
106         }
107     }
108 
setCollNonIgnorable(RuleBasedCollator coll)109     private void setCollNonIgnorable(RuleBasedCollator coll)
110     {
111         if(coll != null) {
112             coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
113             coll.setLowerCaseFirst(false);
114             coll.setCaseLevel(false);
115             coll.setStrength(isAtLeastUCA62 ? Collator.IDENTICAL : Collator.TERTIARY);
116             coll.setAlternateHandlingShifted(false);
117         }
118     }
119 
setCollShifted(RuleBasedCollator coll)120     private void setCollShifted(RuleBasedCollator coll)
121     {
122         if(coll != null) {
123             coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
124             coll.setLowerCaseFirst(false);
125             coll.setCaseLevel(false);
126             coll.setStrength(isAtLeastUCA62 ? Collator.IDENTICAL : Collator.QUATERNARY);
127             coll.setAlternateHandlingShifted(true);
128         }
129     }
130 
131 
132 
initRbUCA()133     private void initRbUCA()
134     {
135         if(rbUCA == null) {
136             String ucarules = UCA.getRules(true);
137             try {
138                 rbUCA = new RuleBasedCollator(ucarules);
139             } catch(Exception e) {
140                 errln("Failure creating UCA rule-based collator: " + e);
141             }
142         }
143     }
144 
parseString(String line)145     private String parseString(String line) {
146         int i = 0, value;
147         StringBuilder result = new StringBuilder(), buffer = new StringBuilder();
148 
149         for(;;) {
150             while(i < line.length() && Character.isWhitespace(line.charAt(i))) {
151                 i++;
152             }
153             while(i < line.length() && Character.isLetterOrDigit(line.charAt(i))) {
154                 buffer.append(line.charAt(i));
155                 i++;
156             }
157             if(buffer.length() == 0) {
158                 // We hit something that was not whitespace/letter/digit.
159                 // Should be ';' or end of string.
160                 return result.toString();
161             }
162             /* read one code point */
163             value = Integer.parseInt(buffer.toString(), 16);
164             buffer.setLength(0);
165             result.appendCodePoint(value);
166         }
167 
168     }
169 
170     private static final int IS_SHIFTED = 1;
171     private static final int FROM_RULES = 2;
172 
skipLineBecauseOfBug(String s, int flags)173     private static boolean skipLineBecauseOfBug(String s, int flags) {
174         // Add temporary exceptions here if there are ICU bugs, until we can fix them.
175         // For examples see the ICU 52 version of this file.
176         return false;
177     }
178 
normalizeResult(int result)179     private static int normalizeResult(int result) {
180         return result < 0 ? -1 : result == 0 ? 0 : 1;
181     }
182 
conformanceTest(RuleBasedCollator coll)183     private void conformanceTest(RuleBasedCollator coll) {
184         if(in == null || coll == null) {
185             return;
186         }
187         int skipFlags = 0;
188         if(coll.isAlternateHandlingShifted()) {
189             skipFlags |= IS_SHIFTED;
190         }
191         if(coll == rbUCA) {
192             skipFlags |= FROM_RULES;
193         }
194 
195         logln("-prop:ucaconfnosortkeys=1 turns off getSortKey() in UCAConformanceTest");
196         boolean withSortKeys = getProperty("ucaconfnosortkeys") == null;
197 
198         int lineNo = 0;
199 
200         String line = null, oldLine = null, buffer = null, oldB = null;
201         RawCollationKey sk1 = new RawCollationKey(), sk2 = new RawCollationKey();
202         RawCollationKey oldSk = null, newSk = sk1;
203 
204         try {
205             while ((line = in.readLine()) != null) {
206                 lineNo++;
207                 if(line.length() == 0 || line.charAt(0) == '#') {
208                     continue;
209                 }
210                 buffer = parseString(line);
211 
212                 if(skipLineBecauseOfBug(buffer, skipFlags)) {
213                     logln("Skipping line " + lineNo + " because of a known bug");
214                     continue;
215                 }
216 
217                 if(withSortKeys) {
218                     coll.getRawCollationKey(buffer, newSk);
219                 }
220                 if(oldSk != null) {
221                     boolean ok = true;
222                     int skres = withSortKeys ? oldSk.compareTo(newSk) : 0;
223                     int cmpres = coll.compare(oldB, buffer);
224                     int cmpres2 = coll.compare(buffer, oldB);
225 
226                     if(cmpres != -cmpres2) {
227                         errln(String.format(
228                                 "Compare result not symmetrical on line %i: " +
229                                 "previous vs. current (%d) / current vs. previous (%d)",
230                                 lineNo, cmpres, cmpres2));
231                         ok = false;
232                     }
233 
234                     // TODO: Compare with normalization turned off if the input passes the FCD test.
235 
236                     if(withSortKeys && cmpres != normalizeResult(skres)) {
237                         errln("Difference between coll.compare (" + cmpres + ") and sortkey compare (" + skres + ") on line " + lineNo);
238                         ok = false;
239                     }
240 
241                     int res = cmpres;
242                     if(res == 0 && !isAtLeastUCA62) {
243                         // Up to UCA 6.1, the collation test files use a custom tie-breaker,
244                         // comparing the raw input strings.
245                         res = comparer.compare(oldB, buffer);
246                         // Starting with UCA 6.2, the collation test files use the standard UCA tie-breaker,
247                         // comparing the NFD versions of the input strings,
248                         // which we do via setting strength=identical.
249                     }
250                     if(res > 0) {
251                         errln("Line " + lineNo + " is not greater or equal than previous line");
252                         ok = false;
253                     }
254 
255                     if(!ok) {
256                         errln("  Previous data line " + oldLine);
257                         errln("  Current data line  " + line);
258                         if(withSortKeys) {
259                             errln("  Previous key: " + CollationTest.prettify(oldSk));
260                             errln("  Current key:  " + CollationTest.prettify(newSk));
261                         }
262                     }
263                 }
264 
265                 oldSk = newSk;
266                 oldB = buffer;
267                 oldLine = line;
268                 if(oldSk == sk1) {
269                     newSk = sk2;
270                 } else {
271                     newSk = sk1;
272                 }
273             }
274         } catch (Exception e) {
275             errln("Unexpected exception "+e);
276         } finally {
277             try {
278                 in.close();
279             } catch (IOException ignored) {
280             }
281             in = null;
282         }
283     }
284 }
285