1 package org.unicode.cldr.unittest;
2 
3 import java.util.Set;
4 
5 import org.unicode.cldr.test.DisplayAndInputProcessor;
6 import org.unicode.cldr.util.CLDRConfig;
7 import org.unicode.cldr.util.CLDRFile;
8 import org.unicode.cldr.util.CLDRFile.ExemplarType;
9 import org.unicode.cldr.util.Factory;
10 
11 import com.ibm.icu.dev.test.TestFmwk;
12 import com.ibm.icu.lang.CharSequences;
13 import com.ibm.icu.text.UnicodeSet;
14 import com.ibm.icu.text.UnicodeSetIterator;
15 
16 public class TestDisplayAndInputProcessor extends TestFmwk {
17 
18     CLDRConfig info = CLDRConfig.getInstance();
19 
main(String[] args)20     public static void main(String[] args) {
21         new TestDisplayAndInputProcessor().run(args);
22     }
23 
TestAll()24     public void TestAll() {
25         showCldrFile(info.getEnglish());
26         showCldrFile(info.getCLDRFile("ar", true));
27         showCldrFile(info.getCLDRFile("ja", true));
28         showCldrFile(info.getCLDRFile("hi", true));
29         showCldrFile(info.getCLDRFile("wae", true));
30     }
31 
TestAExemplars()32     public void TestAExemplars() {
33         UnicodeSet test = new UnicodeSet();
34         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(info.getEnglish(), true);
35         Exception[] internalException = new Exception[1];
36 
37         for (String s : new UnicodeSet("[!-#%-\\]_a-~¡§ª-¬±-³ µ-·¹-þ؉٠-٬۰-۹०-९০-৯੦-੯ ૦-૯୦-୯௦-௯౦-౯೦-೯൦-൯༠-༩ ၀-၉\\‎\\‏’‰−〇一七三九二五八六四]")) {
38             test.clear().add(s);
39             String value = test.toPattern(false);
40             String path = CLDRFile.getExemplarPath(ExemplarType.numbers);
41 
42             String display = daip.processForDisplay(path, value);
43             internalException[0] = null;
44             String input = daip.processInput(path, display, internalException);
45 
46             try {
47                 UnicodeSet roundTrip = new UnicodeSet(input);
48                 if (!assertEquals(test.toString() + "=>" + display, test, roundTrip)) {
49                     input = daip.processInput(path, display, internalException); // for debugging
50                 }
51             } catch (Exception e) {
52                 errln(test.toString() + "=>" + display + ": Failed to parse " + input);
53             }
54         }
55     }
56 
TestTasawaq()57     public void TestTasawaq() {
58         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(info
59             .getCLDRFile("twq", true));
60         // time for data driven test
61         final String input = "[Z \u017E ]";
62         final String expect = "[z \u017E]"; // lower case
63         String value = daip.processInput(
64             "//ldml/characters/exemplarCharacters", input, null);
65         if (!value.equals(expect)) {
66             errln("Tasawaq incorrectly normalized with output: '" + value
67                 + "', expected '" + expect + "'");
68         }
69     }
70 
TestMalayalam()71     public void TestMalayalam() {
72         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(info
73             .getCLDRFile("ml", false));
74         String value = daip.processInput(
75             "//ldml/localeDisplayNames/languages/language[@type=\"alg\"]",
76             "അല്‍ഗോണ്‍ക്യന്‍ ഭാഷ", null);
77         if (!value
78             .equals("\u0D05\u0D7D\u0D17\u0D4B\u0D7A\u0D15\u0D4D\u0D2F\u0D7B \u0D2D\u0D3E\u0D37")) {
79             errln("Malayalam incorrectly normalized with output: " + value);
80         }
81     }
82 
TestRomanian()83     public void TestRomanian() {
84         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(info
85             .getCLDRFile("ro", false));
86         String value = daip
87             .processInput(
88                 "//ldml/localeDisplayNames/types/type[@type=\"hant\"][@key=\"numbers\"]",
89                 "Numerale chineze\u015Fti tradi\u0163ionale", null);
90         if (!value.equals("Numerale chineze\u0219ti tradi\u021Bionale")) {
91             errln("Romanian incorrectly normalized: " + value);
92         }
93     }
94 
TestMyanmarZawgyi()95     public void TestMyanmarZawgyi() {
96         // Check that the Zawgyi detector and Zawgyi->Unicode converter perform
97         // correctly.
98         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(info
99             .getCLDRFile("my", false));
100         String z_mi = "ေမာင္းရီ (နယူးဇီလန္ကၽြန္းရွိ ပင္ရင္းတိုင္းရင္းသားလူမ်ိဳး)";
101         String u_mi = "မောင်းရီ (နယူးဇီလန်ကျွန်းရှိ ပင်ရင်းတိုင်းရင်းသားလူမျိုး)";
102 
103         // Check that z_mi is detected as Zawgyi, and converted to u_mi.
104         // Check that the converted version is detected as Unicode.
105         String converted_z_mi = daip.processInput("", z_mi, null);
106         if (!converted_z_mi.equals(u_mi)) {
107             errln("Myanmar Zawgyi value incorrectly normalized: \n " + z_mi
108                 + " to \n" + ">" + converted_z_mi + "<, expected\n" + ">"
109                 + u_mi + "<");
110         }
111         String converted_u_mi = daip.processInput("", u_mi, null);
112         if (!converted_u_mi.equals(u_mi)) {
113             errln("Myanmar Unicode value incorrectly changed:\n" + u_mi
114                 + " to\n" + converted_u_mi);
115         }
116         // TODO(ccorn): test other strings with the converter.
117         String mixed_latn_zawgyi = "ABCDE " + z_mi + "XYZ";
118         String mixed_latn_unicode = "ABCDE " + u_mi + "XYZ";
119         String converted_mixed = daip.processInput("", mixed_latn_zawgyi, null);
120         if (!converted_mixed.equals(mixed_latn_unicode)) {
121             errln("Myanmar mixed value incorrectly normalized:"
122                 + converted_mixed.length() + "\n" + mixed_latn_zawgyi
123                 + " to " + mixed_latn_unicode.length() + "\n"
124                 + converted_mixed + ", expected\n" + mixed_latn_unicode);
125         }
126 
127         // Test 1039 conversion - simple cases.
128         String z1039 = "\u1031\u1019\u102c\u1004\u1039\u1038\u101b\u102e\u0020\u0028\u1014"
129             + "\u101A\u1030\u1038\u1007\u102E\u101C\u1014\u1039\u1000\u107D\u103C\u1014\u1039\u1038\u101B\u103D\u102D";
130         String u103a = "\u1019\u1031\u102c\u1004\u103a\u1038\u101b\u102e\u0020\u0028\u1014"
131             + "\u101A\u1030\u1038\u1007\u102E\u101C\u1014\u103A\u1000\u103B\u103D\u1014\u103A\u1038\u101B\u103E\u102D";
132         String converted_1039 = daip.processInput("", z1039, null);
133         if (!converted_1039.equals(u103a)) {
134             errln("Myanmar #1039 (Unicode) was changed: \n" + z1039 + " to \n"
135                 + converted_1039 + ", expected \n" + u103a);
136         }
137 
138         String z0 = "\u1000\u1005\u102C\u1038\u101E\u1019\u102C\u1038"; // Test
139         // #0
140         String converted_0 = daip.processInput("", z0, null);
141         if (!converted_0.equals(z0)) {
142             errln("Myanmar #0 (Unicode) was changed: " + z0 + " to "
143                 + converted_0);
144         }
145 
146         String z5 = "\u1021\u101E\u1004\u1039\u1038\u1019\u103D"; // Test #5
147         String u5 = "\u1021\u101E\u1004\u103A\u1038\u1019\u103E";
148         String converted_5 = daip.processInput("", z5, null);
149         if (!converted_5.equals(u5)) {
150             errln("Myanmar #5 incorrectly normalized: " + z5 + " to "
151                 + converted_5);
152         }
153 
154         String z_zero = "\u1031\u1040\u1037";
155         String u_zero = "\u101d\u1031\u1037";
156         String converted_zero = daip.processInput("", z_zero, null);
157         if (!converted_zero.equals(u_zero)) {
158             errln("Myanmar with diacritics and zero incorrectly normalized:\n"
159                 + z_zero + " to\n" + converted_zero + '\n' + u_zero);
160         }
161         // Check that multiple digits are not converted.
162         z_zero = "\u1041\u1040\u1037";
163         u_zero = "\u1041\u1040\u1037";
164         converted_zero = daip.processInput("", z_zero, null);
165         if (!converted_zero.equals(u_zero)) {
166             errln("Myanmar with two zeros incorrectly normalized:\n" + z_zero
167                 + " to\n" + converted_zero + '\n' + u_zero);
168         }
169 
170         // More checks that Unicode is not converted.
171         String is_unicode = "\u1019\u101B\u103E\u102D\u101E\u1031\u102C";
172         String check_is_unicode = daip.processInput("", is_unicode, null);
173         if (!check_is_unicode.equals(is_unicode)) {
174             errln("Myanmar should not have converted:\n" + is_unicode + " to\n"
175                 + check_is_unicode);
176         }
177         is_unicode = "\u1001\u103B\u103c";
178         check_is_unicode = daip.processInput("", is_unicode, null);
179         if (!check_is_unicode.equals(is_unicode)) {
180             errln("Myanmar should not have converted:\n" + is_unicode + " to\n"
181                 + check_is_unicode);
182         }
183         is_unicode = "\u1001\u103E\u103A";
184         check_is_unicode = daip.processInput("", is_unicode, null);
185         if (!check_is_unicode.equals(is_unicode)) {
186             errln("Myanmar should not have converted:\n" + is_unicode + " to\n"
187                 + check_is_unicode);
188         }
189     }
190 
TestCompactNumberFormats()191     public void TestCompactNumberFormats() {
192         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(
193             info.getEnglish(), false);
194         String xpath = "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"] ";
195         String value = daip.processInput(xpath, "0.00K.", null);
196         assertEquals("Period not correctly quoted", "0K'.'", value);
197         value = daip.processInput(xpath, "00.0K'.'", null);
198         assertEquals("Quotes should not be double-quoted", "00K'.'", value);
199         value = daip.processForDisplay(xpath, "0.0 K'.'");
200         assertEquals("There should be no quotes left", "0.0 K.", value);
201     }
202 
TestPatternCanonicalization()203     public void TestPatternCanonicalization() {
204         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(
205             info.getEnglish(), false);
206         String xpath = "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength/decimalFormat[@type=\"standard\"]/pattern[@type=\"standard\"]";
207         String value = daip.processInput(xpath, "#,###,##0.###", null);
208         assertEquals("Format not correctly canonicalized", "#,##0.###", value);
209     }
210 
TestCurrencyFormatSpaces()211     public void TestCurrencyFormatSpaces() {
212         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(
213             info.getEnglish(), false);
214         String xpath = "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]";
215         String value = daip.processInput(xpath, "¤ #,##0.00", null); // breaking
216         // space
217         assertEquals("Breaking space not replaced", "¤ #,##0.00", value); // non-breaking
218         // space
219     }
220 
usesModifierApostrophe(CLDRFile testFile)221     private Boolean usesModifierApostrophe(CLDRFile testFile) {
222         char MODIFIER_LETTER_APOSTROPHE = '\u02BC';
223         String exemplarSet = testFile
224             .getWinningValue("//ldml/characters/exemplarCharacters");
225         UnicodeSet mainExemplarSet = new UnicodeSet(exemplarSet);
226         UnicodeSetIterator usi = new UnicodeSetIterator(mainExemplarSet);
227         while (usi.next()) {
228             if (usi.codepoint == MODIFIER_LETTER_APOSTROPHE
229                 || (usi.codepoint == UnicodeSetIterator.IS_STRING && usi
230                     .getString().indexOf(MODIFIER_LETTER_APOSTROPHE) >= 0)) {
231                 return true;
232             }
233         }
234         return false;
235     }
236 
TestModifierApostropheLocales()237     public void TestModifierApostropheLocales() {
238         Factory f = info.getFullCldrFactory();
239         Set<String> allLanguages = f.getAvailableLanguages();
240         for (String thisLanguage : allLanguages) {
241             CLDRFile thisLanguageFile = f.make(thisLanguage, true);
242             try {
243                 if (usesModifierApostrophe(thisLanguageFile)) {
244                     if (!DisplayAndInputProcessor.LANGUAGES_USING_MODIFIER_APOSTROPHE
245                         .contains(thisLanguage)) {
246                         errln("Language : "
247                             + thisLanguage
248                             + " uses MODIFIER_LETTER_APOSROPHE, but is not on the list in DAIP.LANGUAGES_USING_MODIFIER_APOSTROPHE");
249                     }
250                 } else {
251                     if (DisplayAndInputProcessor.LANGUAGES_USING_MODIFIER_APOSTROPHE
252                         .contains(thisLanguage)) {
253                         errln("Language : "
254                             + thisLanguage
255                             + "is on the list in DAIP.LANGUAGES_USING_MODIFIER_APOSTROPHE, but the main exemplars don't use this character.");
256                     }
257                 }
258             } catch(Throwable t) {
259                 t.printStackTrace();
260                 errln("Error in " + thisLanguage + " - " + t.getMessage());
261             }
262         }
263     }
264 
TestQuoteNormalization()265     public void TestQuoteNormalization() {
266         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(
267             info.getEnglish(), false);
268         String xpath = "//ldml/units/unitLength[@type=\"narrow\"]/unitPattern[@count=\"one\"]";
269         String value = daip.processInput(xpath, "{0}''", null); // breaking
270         // space
271         assertEquals("Quotes not normalized", "{0}″", value); // non-breaking
272         // space
273     }
274 
showCldrFile(final CLDRFile cldrFile)275     private void showCldrFile(final CLDRFile cldrFile) {
276         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(cldrFile,
277             true);
278         Exception[] internalException = new Exception[1];
279         for (String path : cldrFile) {
280             String value = cldrFile.getStringValue(path);
281             if (value.equals("[\\- , . % ‰ + 0-9]")) {
282                 int debug = 0;
283             }
284             String display = daip.processForDisplay(path, value);
285             internalException[0] = null;
286             String input = daip.processInput(path, display, internalException);
287             String diff = diff(value, input, path);
288             if (diff != null) {
289                 errln(cldrFile.getLocaleID() + "\tNo roundtrip in DAIP:"
290                     + "\n\t  value<"
291                     + value
292                     + ">\n\tdisplay<"
293                     + display
294                     + ">\n\t  input<"
295                     + input
296                     + ">\n\t   diff<"
297                     + diff
298                     + (internalException[0] != null ? ">\n\texcep<"
299                         + internalException[0] : "")
300                     + ">\n\tpath<"
301                     + path + ">");
302                 daip.processInput(path, value, internalException); // for
303                 // debugging
304             } else if (!CharSequences.equals(value, display)
305                 || !CharSequences.equals(value, input)
306                 || internalException[0] != null) {
307                 logln("DAIP Changes"
308                     + "\n\tvalue<"
309                     + value
310                     + ">\n\tdisplay<"
311                     + display
312                     + ">\n\tinput<"
313                     + input
314                     + ">\n\tdiff<"
315                     + diff
316                     + (internalException[0] != null ? ">\n\texcep<"
317                         + internalException[0] : "")
318                     + ">\n\tpath<"
319                     + path + ">");
320             }
321         }
322     }
323 
diff(String value, String input, String path)324     private String diff(String value, String input, String path) {
325         if (value.equals(input)) {
326             return null;
327         }
328         if (path.contains("/exemplarCharacters") || path.contains("/parseLenient")) {
329             try {
330                 UnicodeSet s1 = new UnicodeSet(value);
331                 UnicodeSet s2 = new UnicodeSet(input);
332                 if (!s1.equals(s2)) {
333                     UnicodeSet temp = new UnicodeSet(s1).removeAll(s2);
334                     UnicodeSet temp2 = new UnicodeSet(s2).removeAll(s1);
335                     temp.addAll(temp2);
336                     return temp.toPattern(true);
337                 }
338                 return null;
339             } catch (Exception e) {
340                 // TODO: handle exception
341             }
342         }
343         String value2 = value.replace('[', '(').replace(']', ')')
344             .replace('[', '(').replace(']', ')');
345         if (value2.equals(input)) {
346             return null;
347         }
348         return "?";
349     }
350 }
351