1 /******************************************************************** 2 * Copyright (c) 2002-2014, International Business Machines Corporation and 3 * others. All Rights Reserved. 4 ********************************************************************/ 5 6 /** 7 * UCAConformanceTest performs conformance tests defined in the data 8 * files. ICU ships with stub data files, as the whole test are too 9 * long. To do the whole test, download the test files. 10 */ 11 12 package com.ibm.icu.dev.test.collator; 13 14 import java.io.BufferedReader; 15 import java.io.IOException; 16 17 import com.ibm.icu.dev.test.TestFmwk; 18 import com.ibm.icu.dev.test.TestUtil; 19 import com.ibm.icu.lang.UCharacter; 20 import com.ibm.icu.text.Collator; 21 import com.ibm.icu.text.RawCollationKey; 22 import com.ibm.icu.text.RuleBasedCollator; 23 import com.ibm.icu.text.UTF16; 24 import com.ibm.icu.util.ULocale; 25 import com.ibm.icu.util.VersionInfo; 26 27 public class UCAConformanceTest extends TestFmwk { 28 29 /** 30 * @param args 31 */ main(String[] args)32 public static void main(String[] args) { 33 new UCAConformanceTest().run(args); 34 } 35 UCAConformanceTest()36 public UCAConformanceTest() { 37 } 38 @Override init()39 protected void init() throws Exception{ 40 UCA = (RuleBasedCollator)Collator.getInstance(ULocale.ROOT); 41 42 comparer = new UTF16.StringComparator(true, false, UTF16.StringComparator.FOLD_CASE_DEFAULT); 43 } 44 private RuleBasedCollator UCA; 45 private RuleBasedCollator rbUCA; 46 private UTF16.StringComparator comparer; 47 private boolean isAtLeastUCA62 = 48 UCharacter.getUnicodeVersion().compareTo(VersionInfo.UNICODE_6_2) >= 0; 49 TestTableNonIgnorable()50 public void TestTableNonIgnorable() { 51 setCollNonIgnorable(UCA); 52 openTestFile("NON_IGNORABLE"); 53 conformanceTest(UCA); 54 } 55 TestTableShifted()56 public void TestTableShifted() { 57 setCollShifted(UCA); 58 openTestFile("SHIFTED"); 59 conformanceTest(UCA); 60 } 61 TestRulesNonIgnorable()62 public void TestRulesNonIgnorable() { 63 if(logKnownIssue("cldrbug:6745", "UCARules.txt has problems")) { return; } 64 initRbUCA(); 65 if(rbUCA == null) { return; } 66 67 setCollNonIgnorable(rbUCA); 68 openTestFile("NON_IGNORABLE"); 69 conformanceTest(rbUCA); 70 } 71 TestRulesShifted()72 public void TestRulesShifted() { 73 logln("This test is currently disabled, as it is impossible to "+ 74 "wholly represent fractional UCA using tailoring rules."); 75 return; 76 /* 77 initRbUCA(); 78 if(rbUCA == null) { return; } 79 80 setCollShifted(rbUCA); 81 openTestFile("SHIFTED"); 82 testConformance(rbUCA); 83 */ 84 } 85 BufferedReader in; openTestFile(String type)86 private void openTestFile(String type) 87 { 88 String collationTest = "CollationTest_"; 89 String ext = ".txt"; 90 try { 91 in = TestUtil.getDataReader(collationTest+type+"_SHORT"+ext); 92 } catch (Exception e) { 93 try { 94 in = TestUtil.getDataReader(collationTest+type+ext); 95 } catch (Exception e1) { 96 try { 97 in = TestUtil.getDataReader(collationTest+type+"_STUB"+ext); 98 logln( "INFO: Working with the stub file.\n"+ 99 "If you need the full conformance test, please\n"+ 100 "download the appropriate data files from:\n"+ 101 "http://unicode.org/cldr/trac/browser/trunk/common/uca"); 102 } catch (Exception e11) { 103 errln("ERROR: Could not find any of the test files"); 104 } 105 } 106 } 107 } 108 setCollNonIgnorable(RuleBasedCollator coll)109 private void setCollNonIgnorable(RuleBasedCollator coll) 110 { 111 if(coll != null) { 112 coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 113 coll.setLowerCaseFirst(false); 114 coll.setCaseLevel(false); 115 coll.setStrength(isAtLeastUCA62 ? Collator.IDENTICAL : Collator.TERTIARY); 116 coll.setAlternateHandlingShifted(false); 117 } 118 } 119 setCollShifted(RuleBasedCollator coll)120 private void setCollShifted(RuleBasedCollator coll) 121 { 122 if(coll != null) { 123 coll.setDecomposition(Collator.CANONICAL_DECOMPOSITION); 124 coll.setLowerCaseFirst(false); 125 coll.setCaseLevel(false); 126 coll.setStrength(isAtLeastUCA62 ? Collator.IDENTICAL : Collator.QUATERNARY); 127 coll.setAlternateHandlingShifted(true); 128 } 129 } 130 131 132 initRbUCA()133 private void initRbUCA() 134 { 135 if(rbUCA == null) { 136 String ucarules = UCA.getRules(true); 137 try { 138 rbUCA = new RuleBasedCollator(ucarules); 139 } catch(Exception e) { 140 errln("Failure creating UCA rule-based collator: " + e); 141 } 142 } 143 } 144 parseString(String line)145 private String parseString(String line) { 146 int i = 0, value; 147 StringBuilder result = new StringBuilder(), buffer = new StringBuilder(); 148 149 for(;;) { 150 while(i < line.length() && Character.isWhitespace(line.charAt(i))) { 151 i++; 152 } 153 while(i < line.length() && Character.isLetterOrDigit(line.charAt(i))) { 154 buffer.append(line.charAt(i)); 155 i++; 156 } 157 if(buffer.length() == 0) { 158 // We hit something that was not whitespace/letter/digit. 159 // Should be ';' or end of string. 160 return result.toString(); 161 } 162 /* read one code point */ 163 value = Integer.parseInt(buffer.toString(), 16); 164 buffer.setLength(0); 165 result.appendCodePoint(value); 166 } 167 168 } 169 170 private static final int IS_SHIFTED = 1; 171 private static final int FROM_RULES = 2; 172 skipLineBecauseOfBug(String s, int flags)173 private static boolean skipLineBecauseOfBug(String s, int flags) { 174 // Add temporary exceptions here if there are ICU bugs, until we can fix them. 175 // For examples see the ICU 52 version of this file. 176 return false; 177 } 178 normalizeResult(int result)179 private static int normalizeResult(int result) { 180 return result < 0 ? -1 : result == 0 ? 0 : 1; 181 } 182 conformanceTest(RuleBasedCollator coll)183 private void conformanceTest(RuleBasedCollator coll) { 184 if(in == null || coll == null) { 185 return; 186 } 187 int skipFlags = 0; 188 if(coll.isAlternateHandlingShifted()) { 189 skipFlags |= IS_SHIFTED; 190 } 191 if(coll == rbUCA) { 192 skipFlags |= FROM_RULES; 193 } 194 195 logln("-prop:ucaconfnosortkeys=1 turns off getSortKey() in UCAConformanceTest"); 196 boolean withSortKeys = getProperty("ucaconfnosortkeys") == null; 197 198 int lineNo = 0; 199 200 String line = null, oldLine = null, buffer = null, oldB = null; 201 RawCollationKey sk1 = new RawCollationKey(), sk2 = new RawCollationKey(); 202 RawCollationKey oldSk = null, newSk = sk1; 203 204 try { 205 while ((line = in.readLine()) != null) { 206 lineNo++; 207 if(line.length() == 0 || line.charAt(0) == '#') { 208 continue; 209 } 210 buffer = parseString(line); 211 212 if(skipLineBecauseOfBug(buffer, skipFlags)) { 213 logln("Skipping line " + lineNo + " because of a known bug"); 214 continue; 215 } 216 217 if(withSortKeys) { 218 coll.getRawCollationKey(buffer, newSk); 219 } 220 if(oldSk != null) { 221 boolean ok = true; 222 int skres = withSortKeys ? oldSk.compareTo(newSk) : 0; 223 int cmpres = coll.compare(oldB, buffer); 224 int cmpres2 = coll.compare(buffer, oldB); 225 226 if(cmpres != -cmpres2) { 227 errln(String.format( 228 "Compare result not symmetrical on line %i: " + 229 "previous vs. current (%d) / current vs. previous (%d)", 230 lineNo, cmpres, cmpres2)); 231 ok = false; 232 } 233 234 // TODO: Compare with normalization turned off if the input passes the FCD test. 235 236 if(withSortKeys && cmpres != normalizeResult(skres)) { 237 errln("Difference between coll.compare (" + cmpres + ") and sortkey compare (" + skres + ") on line " + lineNo); 238 ok = false; 239 } 240 241 int res = cmpres; 242 if(res == 0 && !isAtLeastUCA62) { 243 // Up to UCA 6.1, the collation test files use a custom tie-breaker, 244 // comparing the raw input strings. 245 res = comparer.compare(oldB, buffer); 246 // Starting with UCA 6.2, the collation test files use the standard UCA tie-breaker, 247 // comparing the NFD versions of the input strings, 248 // which we do via setting strength=identical. 249 } 250 if(res > 0) { 251 errln("Line " + lineNo + " is not greater or equal than previous line"); 252 ok = false; 253 } 254 255 if(!ok) { 256 errln(" Previous data line " + oldLine); 257 errln(" Current data line " + line); 258 if(withSortKeys) { 259 errln(" Previous key: " + CollationTest.prettify(oldSk)); 260 errln(" Current key: " + CollationTest.prettify(newSk)); 261 } 262 } 263 } 264 265 oldSk = newSk; 266 oldB = buffer; 267 oldLine = line; 268 if(oldSk == sk1) { 269 newSk = sk2; 270 } else { 271 newSk = sk1; 272 } 273 } 274 } catch (Exception e) { 275 errln("Unexpected exception "+e); 276 } finally { 277 try { 278 in.close(); 279 } catch (IOException ignored) { 280 } 281 in = null; 282 } 283 } 284 } 285