1 package org.unicode.cldr.unittest; 2 3 import java.util.Set; 4 5 import org.unicode.cldr.test.DisplayAndInputProcessor; 6 import org.unicode.cldr.util.CLDRConfig; 7 import org.unicode.cldr.util.CLDRFile; 8 import org.unicode.cldr.util.CLDRFile.ExemplarType; 9 import org.unicode.cldr.util.Factory; 10 11 import com.ibm.icu.dev.test.TestFmwk; 12 import com.ibm.icu.lang.CharSequences; 13 import com.ibm.icu.text.UnicodeSet; 14 import com.ibm.icu.text.UnicodeSetIterator; 15 16 public class TestDisplayAndInputProcessor extends TestFmwk { 17 18 CLDRConfig info = CLDRConfig.getInstance(); 19 main(String[] args)20 public static void main(String[] args) { 21 new TestDisplayAndInputProcessor().run(args); 22 } 23 TestAll()24 public void TestAll() { 25 showCldrFile(info.getEnglish()); 26 showCldrFile(info.getCLDRFile("ar", true)); 27 showCldrFile(info.getCLDRFile("ja", true)); 28 showCldrFile(info.getCLDRFile("hi", true)); 29 showCldrFile(info.getCLDRFile("wae", true)); 30 } 31 TestAExemplars()32 public void TestAExemplars() { 33 UnicodeSet test = new UnicodeSet(); 34 DisplayAndInputProcessor daip = new DisplayAndInputProcessor(info.getEnglish(), true); 35 Exception[] internalException = new Exception[1]; 36 37 for (String s : new UnicodeSet("[!-#%-\\]_a-~¡§ª-¬±-³ µ-·¹-þ؉٠-٬۰-۹०-९০-৯੦-੯ ૦-૯୦-୯௦-௯౦-౯೦-೯൦-൯༠-༩ ၀-၉\\\\’‰−〇一七三九二五八六四]")) { 38 test.clear().add(s); 39 String value = test.toPattern(false); 40 String path = CLDRFile.getExemplarPath(ExemplarType.numbers); 41 42 String display = daip.processForDisplay(path, value); 43 internalException[0] = null; 44 String input = daip.processInput(path, display, internalException); 45 46 try { 47 UnicodeSet roundTrip = new UnicodeSet(input); 48 if (!assertEquals(test.toString() + "=>" + display, test, roundTrip)) { 49 input = daip.processInput(path, display, internalException); // for debugging 50 } 51 } catch (Exception e) { 52 errln(test.toString() + "=>" + display + ": Failed to parse " + input); 53 } 54 } 55 } 56 TestTasawaq()57 public void TestTasawaq() { 58 DisplayAndInputProcessor daip = new DisplayAndInputProcessor(info 59 .getCLDRFile("twq", true)); 60 // time for data driven test 61 final String input = "[Z \u017E ]"; 62 final String expect = "[z \u017E]"; // lower case 63 String value = daip.processInput( 64 "//ldml/characters/exemplarCharacters", input, null); 65 if (!value.equals(expect)) { 66 errln("Tasawaq incorrectly normalized with output: '" + value 67 + "', expected '" + expect + "'"); 68 } 69 } 70 TestMalayalam()71 public void TestMalayalam() { 72 DisplayAndInputProcessor daip = new DisplayAndInputProcessor(info 73 .getCLDRFile("ml", false)); 74 String value = daip.processInput( 75 "//ldml/localeDisplayNames/languages/language[@type=\"alg\"]", 76 "അല്ഗോണ്ക്യന് ഭാഷ", null); 77 if (!value 78 .equals("\u0D05\u0D7D\u0D17\u0D4B\u0D7A\u0D15\u0D4D\u0D2F\u0D7B \u0D2D\u0D3E\u0D37")) { 79 errln("Malayalam incorrectly normalized with output: " + value); 80 } 81 } 82 TestRomanian()83 public void TestRomanian() { 84 DisplayAndInputProcessor daip = new DisplayAndInputProcessor(info 85 .getCLDRFile("ro", false)); 86 String value = daip 87 .processInput( 88 "//ldml/localeDisplayNames/types/type[@type=\"hant\"][@key=\"numbers\"]", 89 "Numerale chineze\u015Fti tradi\u0163ionale", null); 90 if (!value.equals("Numerale chineze\u0219ti tradi\u021Bionale")) { 91 errln("Romanian incorrectly normalized: " + value); 92 } 93 } 94 TestMyanmarZawgyi()95 public void TestMyanmarZawgyi() { 96 // Check that the Zawgyi detector and Zawgyi->Unicode converter perform 97 // correctly. 98 DisplayAndInputProcessor daip = new DisplayAndInputProcessor(info 99 .getCLDRFile("my", false)); 100 String z_mi = "ေမာင္းရီ (နယူးဇီလန္ကၽြန္းရွိ ပင္ရင္းတိုင္းရင္းသားလူမ်ိဳး)"; 101 String u_mi = "မောင်းရီ (နယူးဇီလန်ကျွန်းရှိ ပင်ရင်းတိုင်းရင်းသားလူမျိုး)"; 102 103 // Check that z_mi is detected as Zawgyi, and converted to u_mi. 104 // Check that the converted version is detected as Unicode. 105 String converted_z_mi = daip.processInput("", z_mi, null); 106 if (!converted_z_mi.equals(u_mi)) { 107 errln("Myanmar Zawgyi value incorrectly normalized: \n " + z_mi 108 + " to \n" + ">" + converted_z_mi + "<, expected\n" + ">" 109 + u_mi + "<"); 110 } 111 String converted_u_mi = daip.processInput("", u_mi, null); 112 if (!converted_u_mi.equals(u_mi)) { 113 errln("Myanmar Unicode value incorrectly changed:\n" + u_mi 114 + " to\n" + converted_u_mi); 115 } 116 // TODO(ccorn): test other strings with the converter. 117 String mixed_latn_zawgyi = "ABCDE " + z_mi + "XYZ"; 118 String mixed_latn_unicode = "ABCDE " + u_mi + "XYZ"; 119 String converted_mixed = daip.processInput("", mixed_latn_zawgyi, null); 120 if (!converted_mixed.equals(mixed_latn_unicode)) { 121 errln("Myanmar mixed value incorrectly normalized:" 122 + converted_mixed.length() + "\n" + mixed_latn_zawgyi 123 + " to " + mixed_latn_unicode.length() + "\n" 124 + converted_mixed + ", expected\n" + mixed_latn_unicode); 125 } 126 127 // Test 1039 conversion - simple cases. 128 String z1039 = "\u1031\u1019\u102c\u1004\u1039\u1038\u101b\u102e\u0020\u0028\u1014" 129 + "\u101A\u1030\u1038\u1007\u102E\u101C\u1014\u1039\u1000\u107D\u103C\u1014\u1039\u1038\u101B\u103D\u102D"; 130 String u103a = "\u1019\u1031\u102c\u1004\u103a\u1038\u101b\u102e\u0020\u0028\u1014" 131 + "\u101A\u1030\u1038\u1007\u102E\u101C\u1014\u103A\u1000\u103B\u103D\u1014\u103A\u1038\u101B\u103E\u102D"; 132 String converted_1039 = daip.processInput("", z1039, null); 133 if (!converted_1039.equals(u103a)) { 134 errln("Myanmar #1039 (Unicode) was changed: \n" + z1039 + " to \n" 135 + converted_1039 + ", expected \n" + u103a); 136 } 137 138 String z0 = "\u1000\u1005\u102C\u1038\u101E\u1019\u102C\u1038"; // Test 139 // #0 140 String converted_0 = daip.processInput("", z0, null); 141 if (!converted_0.equals(z0)) { 142 errln("Myanmar #0 (Unicode) was changed: " + z0 + " to " 143 + converted_0); 144 } 145 146 String z5 = "\u1021\u101E\u1004\u1039\u1038\u1019\u103D"; // Test #5 147 String u5 = "\u1021\u101E\u1004\u103A\u1038\u1019\u103E"; 148 String converted_5 = daip.processInput("", z5, null); 149 if (!converted_5.equals(u5)) { 150 errln("Myanmar #5 incorrectly normalized: " + z5 + " to " 151 + converted_5); 152 } 153 154 String z_zero = "\u1031\u1040\u1037"; 155 String u_zero = "\u101d\u1031\u1037"; 156 String converted_zero = daip.processInput("", z_zero, null); 157 if (!converted_zero.equals(u_zero)) { 158 errln("Myanmar with diacritics and zero incorrectly normalized:\n" 159 + z_zero + " to\n" + converted_zero + '\n' + u_zero); 160 } 161 // Check that multiple digits are not converted. 162 z_zero = "\u1041\u1040\u1037"; 163 u_zero = "\u1041\u1040\u1037"; 164 converted_zero = daip.processInput("", z_zero, null); 165 if (!converted_zero.equals(u_zero)) { 166 errln("Myanmar with two zeros incorrectly normalized:\n" + z_zero 167 + " to\n" + converted_zero + '\n' + u_zero); 168 } 169 170 // More checks that Unicode is not converted. 171 String is_unicode = "\u1019\u101B\u103E\u102D\u101E\u1031\u102C"; 172 String check_is_unicode = daip.processInput("", is_unicode, null); 173 if (!check_is_unicode.equals(is_unicode)) { 174 errln("Myanmar should not have converted:\n" + is_unicode + " to\n" 175 + check_is_unicode); 176 } 177 is_unicode = "\u1001\u103B\u103c"; 178 check_is_unicode = daip.processInput("", is_unicode, null); 179 if (!check_is_unicode.equals(is_unicode)) { 180 errln("Myanmar should not have converted:\n" + is_unicode + " to\n" 181 + check_is_unicode); 182 } 183 is_unicode = "\u1001\u103E\u103A"; 184 check_is_unicode = daip.processInput("", is_unicode, null); 185 if (!check_is_unicode.equals(is_unicode)) { 186 errln("Myanmar should not have converted:\n" + is_unicode + " to\n" 187 + check_is_unicode); 188 } 189 } 190 TestCompactNumberFormats()191 public void TestCompactNumberFormats() { 192 DisplayAndInputProcessor daip = new DisplayAndInputProcessor( 193 info.getEnglish(), false); 194 String xpath = "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"] "; 195 String value = daip.processInput(xpath, "0.00K.", null); 196 assertEquals("Period not correctly quoted", "0K'.'", value); 197 value = daip.processInput(xpath, "00.0K'.'", null); 198 assertEquals("Quotes should not be double-quoted", "00K'.'", value); 199 value = daip.processForDisplay(xpath, "0.0 K'.'"); 200 assertEquals("There should be no quotes left", "0.0 K.", value); 201 } 202 TestPatternCanonicalization()203 public void TestPatternCanonicalization() { 204 DisplayAndInputProcessor daip = new DisplayAndInputProcessor( 205 info.getEnglish(), false); 206 String xpath = "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength/decimalFormat[@type=\"standard\"]/pattern[@type=\"standard\"]"; 207 String value = daip.processInput(xpath, "#,###,##0.###", null); 208 assertEquals("Format not correctly canonicalized", "#,##0.###", value); 209 } 210 TestCurrencyFormatSpaces()211 public void TestCurrencyFormatSpaces() { 212 DisplayAndInputProcessor daip = new DisplayAndInputProcessor( 213 info.getEnglish(), false); 214 String xpath = "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]"; 215 String value = daip.processInput(xpath, "¤ #,##0.00", null); // breaking 216 // space 217 assertEquals("Breaking space not replaced", "¤ #,##0.00", value); // non-breaking 218 // space 219 } 220 usesModifierApostrophe(CLDRFile testFile)221 private Boolean usesModifierApostrophe(CLDRFile testFile) { 222 char MODIFIER_LETTER_APOSTROPHE = '\u02BC'; 223 String exemplarSet = testFile 224 .getWinningValue("//ldml/characters/exemplarCharacters"); 225 UnicodeSet mainExemplarSet = new UnicodeSet(exemplarSet); 226 UnicodeSetIterator usi = new UnicodeSetIterator(mainExemplarSet); 227 while (usi.next()) { 228 if (usi.codepoint == MODIFIER_LETTER_APOSTROPHE 229 || (usi.codepoint == UnicodeSetIterator.IS_STRING && usi 230 .getString().indexOf(MODIFIER_LETTER_APOSTROPHE) >= 0)) { 231 return true; 232 } 233 } 234 return false; 235 } 236 TestModifierApostropheLocales()237 public void TestModifierApostropheLocales() { 238 Factory f = info.getFullCldrFactory(); 239 Set<String> allLanguages = f.getAvailableLanguages(); 240 for (String thisLanguage : allLanguages) { 241 CLDRFile thisLanguageFile = f.make(thisLanguage, true); 242 try { 243 if (usesModifierApostrophe(thisLanguageFile)) { 244 if (!DisplayAndInputProcessor.LANGUAGES_USING_MODIFIER_APOSTROPHE 245 .contains(thisLanguage)) { 246 errln("Language : " 247 + thisLanguage 248 + " uses MODIFIER_LETTER_APOSROPHE, but is not on the list in DAIP.LANGUAGES_USING_MODIFIER_APOSTROPHE"); 249 } 250 } else { 251 if (DisplayAndInputProcessor.LANGUAGES_USING_MODIFIER_APOSTROPHE 252 .contains(thisLanguage)) { 253 errln("Language : " 254 + thisLanguage 255 + "is on the list in DAIP.LANGUAGES_USING_MODIFIER_APOSTROPHE, but the main exemplars don't use this character."); 256 } 257 } 258 } catch(Throwable t) { 259 t.printStackTrace(); 260 errln("Error in " + thisLanguage + " - " + t.getMessage()); 261 } 262 } 263 } 264 TestQuoteNormalization()265 public void TestQuoteNormalization() { 266 DisplayAndInputProcessor daip = new DisplayAndInputProcessor( 267 info.getEnglish(), false); 268 String xpath = "//ldml/units/unitLength[@type=\"narrow\"]/unitPattern[@count=\"one\"]"; 269 String value = daip.processInput(xpath, "{0}''", null); // breaking 270 // space 271 assertEquals("Quotes not normalized", "{0}″", value); // non-breaking 272 // space 273 } 274 showCldrFile(final CLDRFile cldrFile)275 private void showCldrFile(final CLDRFile cldrFile) { 276 DisplayAndInputProcessor daip = new DisplayAndInputProcessor(cldrFile, 277 true); 278 Exception[] internalException = new Exception[1]; 279 for (String path : cldrFile) { 280 String value = cldrFile.getStringValue(path); 281 if (value.equals("[\\- , . % ‰ + 0-9]")) { 282 int debug = 0; 283 } 284 String display = daip.processForDisplay(path, value); 285 internalException[0] = null; 286 String input = daip.processInput(path, display, internalException); 287 String diff = diff(value, input, path); 288 if (diff != null) { 289 errln(cldrFile.getLocaleID() + "\tNo roundtrip in DAIP:" 290 + "\n\t value<" 291 + value 292 + ">\n\tdisplay<" 293 + display 294 + ">\n\t input<" 295 + input 296 + ">\n\t diff<" 297 + diff 298 + (internalException[0] != null ? ">\n\texcep<" 299 + internalException[0] : "") 300 + ">\n\tpath<" 301 + path + ">"); 302 daip.processInput(path, value, internalException); // for 303 // debugging 304 } else if (!CharSequences.equals(value, display) 305 || !CharSequences.equals(value, input) 306 || internalException[0] != null) { 307 logln("DAIP Changes" 308 + "\n\tvalue<" 309 + value 310 + ">\n\tdisplay<" 311 + display 312 + ">\n\tinput<" 313 + input 314 + ">\n\tdiff<" 315 + diff 316 + (internalException[0] != null ? ">\n\texcep<" 317 + internalException[0] : "") 318 + ">\n\tpath<" 319 + path + ">"); 320 } 321 } 322 } 323 diff(String value, String input, String path)324 private String diff(String value, String input, String path) { 325 if (value.equals(input)) { 326 return null; 327 } 328 if (path.contains("/exemplarCharacters") || path.contains("/parseLenient")) { 329 try { 330 UnicodeSet s1 = new UnicodeSet(value); 331 UnicodeSet s2 = new UnicodeSet(input); 332 if (!s1.equals(s2)) { 333 UnicodeSet temp = new UnicodeSet(s1).removeAll(s2); 334 UnicodeSet temp2 = new UnicodeSet(s2).removeAll(s1); 335 temp.addAll(temp2); 336 return temp.toPattern(true); 337 } 338 return null; 339 } catch (Exception e) { 340 // TODO: handle exception 341 } 342 } 343 String value2 = value.replace('[', '(').replace(']', ')') 344 .replace('[', '(').replace(']', ')'); 345 if (value2.equals(input)) { 346 return null; 347 } 348 return "?"; 349 } 350 } 351