1 /**
2  *
3  */
4 package org.unicode.cldr.util;
5 
6 import java.io.BufferedReader;
7 import java.io.File;
8 import java.io.FilenameFilter;
9 import java.io.IOException;
10 import java.io.Writer;
11 import java.util.ArrayList;
12 import java.util.Arrays;
13 import java.util.Collection;
14 import java.util.Collections;
15 import java.util.Enumeration;
16 import java.util.HashSet;
17 import java.util.Iterator;
18 import java.util.LinkedHashMap;
19 import java.util.LinkedHashSet;
20 import java.util.LinkedList;
21 import java.util.List;
22 import java.util.Locale;
23 import java.util.Map;
24 import java.util.Set;
25 import java.util.TreeMap;
26 import java.util.TreeSet;
27 import java.util.regex.Matcher;
28 import java.util.regex.Pattern;
29 
30 import org.unicode.cldr.draft.FileUtilities;
31 import org.unicode.cldr.test.TestTransforms;
32 import org.unicode.cldr.tool.LikelySubtags;
33 
34 import com.google.common.collect.BiMap;
35 import com.google.common.collect.HashBiMap;
36 import com.ibm.icu.impl.Relation;
37 import com.ibm.icu.lang.UScript;
38 import com.ibm.icu.text.Transliterator;
39 import com.ibm.icu.text.UnicodeFilter;
40 import com.ibm.icu.util.ICUUncheckedIOException;
41 
42 public class CLDRTransforms {
43 
44     public static final String TRANSFORM_DIR = (CLDRPaths.COMMON_DIRECTORY + "transforms/");
45 
46     static final CLDRTransforms SINGLETON = new CLDRTransforms();
47 
getInstance()48     public static CLDRTransforms getInstance() {
49         return SINGLETON;
50     }
51 
getShowProgress()52     public Appendable getShowProgress() {
53         return showProgress;
54     }
55 
setShowProgress(Appendable showProgress)56     public CLDRTransforms setShowProgress(Appendable showProgress) {
57         this.showProgress = showProgress;
58         return this;
59     }
60 
61     final Set<String> overridden = new HashSet<String>();
62     final DependencyOrder dependencyOrder = new DependencyOrder();
63 
64     static public class RegexFindFilenameFilter implements FilenameFilter {
65         Matcher matcher;
66 
RegexFindFilenameFilter(Matcher filter)67         public RegexFindFilenameFilter(Matcher filter) {
68             matcher = filter;
69         }
70 
71         @Override
accept(File dir, String name)72         public boolean accept(File dir, String name) {
73             return matcher.reset(name).find();
74         }
75     };
76 
77     /**
78      *
79      * @param dir
80      *            TODO
81      * @param namesMatchingRegex
82      *            TODO
83      * @param showProgress
84      *            null if no progress needed
85      * @param skipDashTIds TODO
86      * @return
87      */
88 
registerCldrTransforms(String dir, String namesMatchingRegex, Appendable showProgress, boolean keepDashTIds)89     public static void registerCldrTransforms(String dir, String namesMatchingRegex, Appendable showProgress, boolean keepDashTIds) {
90         CLDRTransforms r = getInstance();
91         if (dir == null) {
92             dir = TRANSFORM_DIR;
93         }
94         // reorder to preload some
95         r.showProgress = showProgress;
96         List<String> files;
97         Set<String> ordered;
98 
99         if (namesMatchingRegex == null) {
100             files = getAvailableIds();
101             ordered = r.dependencyOrder.getOrderedItems(files, null, true);
102         } else {
103             Matcher filter = PatternCache.get(namesMatchingRegex).matcher("");
104             r.deregisterIcuTransliterators(filter);
105             files = Arrays.asList(new File(TRANSFORM_DIR).list(new RegexFindFilenameFilter(filter)));
106             ordered = r.dependencyOrder.getOrderedItems(files, filter, true);
107         }
108 
109         // System.out.println(ordered);
110         for (String cldrFileName : ordered) {
111             r.registerTransliteratorsFromXML(dir, cldrFileName, files, keepDashTIds);
112         }
113         Transliterator.registerAny(); // do this last!
114 
115     }
116 
getAvailableIds()117     public static List<String> getAvailableIds() {
118         return Arrays.asList(new File(TRANSFORM_DIR).list());
119     }
120 
getOverriddenTransliterators()121     public Set<String> getOverriddenTransliterators() {
122         return Collections.unmodifiableSet(overridden);
123     }
124 
125     static Transliterator fixup = Transliterator.getInstance("[:Mn:]any-hex/java");
126 
127     class DependencyOrder {
128         // String[] doFirst = {"Latin-ConjoiningJamo"};
129         // the following are file names, not IDs, so the dependencies have to go both directions
130         // List<String> extras = new ArrayList<String>();
131 
132         Relation<Matcher, String> dependsOn = Relation.of(new LinkedHashMap<Matcher, Set<String>>(), LinkedHashSet.class);
133         {
134             addDependency("Latin-(Jamo|Hangul)(/.*)?", "Latin-ConjoiningJamo", "ConjoiningJamo-Latin");
135             addDependency("(Jamo|Hangul)-Latin(/.*)?", "Latin-ConjoiningJamo", "ConjoiningJamo-Latin");
136             addDependency("Latin-Han(/.*)", "Han-Spacedhan");
137             addDependency(".*(Hiragana|Katakana|Han|han).*", "Fullwidth-Halfwidth", "Halfwidth-Fullwidth");
138             addDependency(".*(Hiragana).*", "Latin-Katakana", "Katakana-Latin");
139 
140             addInterIndicDependency("Arabic");
141             addInterIndicDependency("Bengali");
142             addInterIndicDependency("Devanagari");
143             addInterIndicDependency("Gujarati");
144             addInterIndicDependency("Gurmukhi");
145             addInterIndicDependency("Kannada");
146             addInterIndicDependency("Malayalam");
147             addInterIndicDependency("Oriya");
148             addInterIndicDependency("Tamil");
149             addInterIndicDependency("Telugu");
150             addInterIndicDependency("ur");
151 
152             addDependency(".*Digit.*", "NumericPinyin-Pinyin", "Pinyin-NumericPinyin");
153             addDependency("Latin-NumericPinyin(/.*)?", "Tone-Digit", "Digit-Tone");
154             addDependency("NumericPinyin-Latin(/.*)?", "Tone-Digit", "Digit-Tone");
155             addDependency("am-ar", "am-am_FONIPA", "und_FONIPA-ar");
156             addDependency("am-chr", "am-am_FONIPA", "und_FONIPA-chr");
157             addDependency("am-fa", "am-am_FONIPA", "und_FONIPA-fa");
158             addDependency("ch-am", "ch-ch_FONIPA", "am-am_FONIPA");
159             addDependency("ch-ar", "ch-ch_FONIPA", "und_FONIPA-ar");
160             addDependency("ch-chr", "ch-ch_FONIPA", "und_FONIPA-chr");
161             addDependency("ch-fa", "ch-ch_FONIPA", "und_FONIPA-fa");
162             addDependency("cs-am", "cs-cs_FONIPA", "am-am_FONIPA");
163             addDependency("cs-ar", "cs-cs_FONIPA", "und_FONIPA-ar");
164             addDependency("cs-chr", "cs-cs_FONIPA", "und_FONIPA-chr");
165             addDependency("cs-fa", "cs-cs_FONIPA", "und_FONIPA-fa");
166             addDependency("cs-ja", "cs-cs_FONIPA", "cs_FONIPA-ja");
167             addDependency("cs_FONIPA-ko", "Latin-Hangul");
168             addDependency("cs-ko", "cs-cs_FONIPA", "cs_FONIPA-ko");
169             addDependency("de-ASCII", "Any-ASCII");
170             addDependency("eo-am", "eo-eo_FONIPA", "am-am_FONIPA");
171             addDependency("eo-ar", "eo-eo_FONIPA", "und_FONIPA-ar");
172             addDependency("eo-chr", "eo-eo_FONIPA", "und_FONIPA-chr");
173             addDependency("eo-fa", "eo-eo_FONIPA", "und_FONIPA-fa");
174             addDependency("es-am", "es-es_FONIPA", "am-am_FONIPA");
175             addDependency("es-ar", "es-es_FONIPA", "und_FONIPA-ar");
176             addDependency("es-chr", "es-es_FONIPA", "und_FONIPA-chr");
177             addDependency("es-fa", "es-es_FONIPA", "und_FONIPA-fa");
178             addDependency("es_419-am", "es-es_FONIPA", "es_FONIPA-es_419_FONIPA", "am-am_FONIPA");
179             addDependency("es_419-ar", "es-es_FONIPA", "es_FONIPA-es_419_FONIPA", "und_FONIPA-ar");
180             addDependency("es_419-chr", "es-es_FONIPA", "es_FONIPA-es_419_FONIPA", "und_FONIPA-chr");
181             addDependency("es_419-fa", "es-es_FONIPA", "es_FONIPA-es_419_FONIPA", "und_FONIPA-fa");
182             addDependency("es_419-ja", "es-es_FONIPA", "es_FONIPA-es_419_FONIPA", "es_FONIPA-ja");
183             addDependency("es-am", "es-es_FONIPA", "es_FONIPA-am");
184             addDependency("es-ja", "es-es_FONIPA", "es_FONIPA-ja");
185             addDependency("es-zh", "es-es_FONIPA", "es_FONIPA-zh");
186 
187             addDependency("Han-Latin-Names", "Han-Latin");
188 
189             addDependency("hy-am", "hy-hy_FONIPA", "am-am_FONIPA");
190             addDependency("hy-ar", "hy-hy_FONIPA", "und_FONIPA-ar");
191             addDependency("hy-chr", "hy-hy_FONIPA", "und_FONIPA-chr");
192             addDependency("hy-fa", "hy-hy_FONIPA", "und_FONIPA-fa");
193             addDependency("hy_AREVMDA-am", "hy_AREVMDA-hy_AREVMDA_FONIPA", "am-am_FONIPA");
194             addDependency("hy_AREVMDA-ar", "hy_AREVMDA-hy_AREVMDA_FONIPA", "und_FONIPA-ar");
195             addDependency("hy_AREVMDA-chr", "hy_AREVMDA-hy_AREVMDA_FONIPA", "und_FONIPA-chr");
196             addDependency("hy_AREVMDA-fa", "hy_AREVMDA-hy_AREVMDA_FONIPA", "und_FONIPA-fa");
197             addDependency("ia-am", "ia-ia_FONIPA", "am-am_FONIPA");
198             addDependency("ia-ar", "ia-ia_FONIPA", "und_FONIPA-ar");
199             addDependency("ia-chr", "ia-ia_FONIPA", "und_FONIPA-chr");
200             addDependency("ia-fa", "ia-ia_FONIPA", "und_FONIPA-fa");
201             addDependency("kk-am", "kk-kk_FONIPA", "am-am_FONIPA");
202             addDependency("kk-ar", "kk-kk_FONIPA", "und_FONIPA-ar");
203             addDependency("kk-chr", "kk-kk_FONIPA", "und_FONIPA-chr");
204             addDependency("kk-fa", "kk-kk_FONIPA", "und_FONIPA-fa");
205             addDependency("ky-am", "ky-ky_FONIPA", "am-am_FONIPA");
206             addDependency("ky-ar", "ky-ky_FONIPA", "und_FONIPA-ar");
207             addDependency("ky-chr", "ky-ky_FONIPA", "und_FONIPA-chr");
208             addDependency("ky-fa", "ky-ky_FONIPA", "und_FONIPA-fa");
209             addDependency("my-am", "my-my_FONIPA", "am-am_FONIPA");
210             addDependency("my-ar", "my-my_FONIPA", "und_FONIPA-ar");
211             addDependency("my-chr", "my-my_FONIPA", "und_FONIPA-chr");
212             addDependency("my-fa", "my-my_FONIPA", "und_FONIPA-fa");
213             addDependency("pl-am", "pl-pl_FONIPA", "am-am_FONIPA");
214             addDependency("pl-ar", "pl-pl_FONIPA", "und_FONIPA-ar");
215             addDependency("pl-chr", "pl-pl_FONIPA", "und_FONIPA-chr");
216             addDependency("pl-fa", "pl-pl_FONIPA", "und_FONIPA-fa");
217             addDependency("pl-ja", "pl-pl_FONIPA", "pl_FONIPA-ja");
218             addDependency("rm_SURSILV-am", "rm_SURSILV-rm_FONIPA_SURSILV", "am-am_FONIPA");
219             addDependency("rm_SURSILV-ar", "rm_SURSILV-rm_FONIPA_SURSILV", "und_FONIPA-ar");
220             addDependency("rm_SURSILV-chr", "rm_SURSILV-rm_FONIPA_SURSILV", "und_FONIPA-chr");
221             addDependency("rm_SURSILV-fa", "rm_SURSILV-rm_FONIPA_SURSILV", "und_FONIPA-fa");
222             addDependency("ro-am", "ro-ro_FONIPA", "am-am_FONIPA");
223             addDependency("ro-ar", "ro-ro_FONIPA", "und_FONIPA-ar");
224             addDependency("ro-chr", "ro-ro_FONIPA", "und_FONIPA-chr");
225             addDependency("ro-fa", "ro-ro_FONIPA", "und_FONIPA-fa");
226             addDependency("ro-ja", "ro-ro_FONIPA", "ro_FONIPA-ja");
227             addDependency("sat-am", "sat_Olck-sat_FONIPA", "am-am_FONIPA");
228             addDependency("sat-ar", "sat_Olck-sat_FONIPA", "und_FONIPA-ar");
229             addDependency("sat-chr", "sat_Olck-sat_FONIPA", "und_FONIPA-chr");
230             addDependency("sat-fa", "sat_Olck-sat_FONIPA", "und_FONIPA-fa");
231             addDependency("si-am", "si-si_FONIPA", "am-am_FONIPA");
232             addDependency("si-ar", "si-si_FONIPA", "und_FONIPA-ar");
233             addDependency("si-chr", "si-si_FONIPA", "und_FONIPA-chr");
234             addDependency("si-fa", "si-si_FONIPA", "und_FONIPA-fa");
235             addDependency("sk-am", "sk-sk_FONIPA", "am-am_FONIPA");
236             addDependency("sk-ar", "sk-sk_FONIPA", "und_FONIPA-ar");
237             addDependency("sk-chr", "sk-sk_FONIPA", "und_FONIPA-chr");
238             addDependency("sk-fa", "sk-sk_FONIPA", "und_FONIPA-fa");
239             addDependency("sk-ja", "sk-sk_FONIPA", "sk_FONIPA-ja");
240             addDependency("tlh-am", "tlh-tlh_FONIPA", "am-am_FONIPA");
241             addDependency("tlh-ar", "tlh-tlh_FONIPA", "und_FONIPA-ar");
242             addDependency("tlh-chr", "tlh-tlh_FONIPA", "und_FONIPA-chr");
243             addDependency("tlh-fa", "tlh-tlh_FONIPA", "und_FONIPA-fa");
244             addDependency("xh-am", "xh-xh_FONIPA", "am-am_FONIPA");
245             addDependency("xh-ar", "xh-xh_FONIPA", "und_FONIPA-ar");
246             addDependency("xh-chr", "xh-xh_FONIPA", "und_FONIPA-chr");
247             addDependency("xh-fa", "xh-xh_FONIPA", "und_FONIPA-fa");
248             addDependency("zu-am", "zu-zu_FONIPA", "am-am_FONIPA");
249             addDependency("zu-ar", "zu-zu_FONIPA", "und_FONIPA-ar");
250             addDependency("zu-chr", "zu-zu_FONIPA", "und_FONIPA-chr");
251             addDependency("zu-fa", "zu-zu_FONIPA", "und_FONIPA-fa");
252             addDependency("Latin-Bopomofo", "Latin-NumericPinyin");
253 
254             // addExtras("cs-ja", "cs-ja", "es-am", "es-ja", "es-zh", "Han-Latin/Names");
255             // Pinyin-NumericPinyin.xml
256         }
257 
addInterIndicDependency(String script)258         private void addInterIndicDependency(String script) {
259             addPivotDependency(script, "InterIndic");
260             if (!script.equals("Arabic")) {
261                 addDependency(script + "-Arabic",
262                     script + "-InterIndic", "InterIndic-Arabic");
263             }
264         }
265 
addPivotDependency(String script, String pivot)266         private void addPivotDependency(String script, String pivot) {
267             addDependency(script + "-.*", "Bengali" + "-" + pivot, pivot + "-" + "Bengali");
268             addDependency(".*-" + "Bengali" + "(/.*)?", pivot + "-" + "Bengali", pivot + "-" + "Bengali");
269         }
270 
271         // private void addExtras(String... strings) {
272         // for (String item : strings) {
273         // extras.add(item);
274         // }
275         // }
276 
addDependency(String pattern, String... whatItDependsOn)277         private void addDependency(String pattern, String... whatItDependsOn) {
278             dependsOn.putAll(PatternCache.get(pattern).matcher(""), Arrays.asList(whatItDependsOn));
279         }
280 
getOrderedItems(Collection<String> rawInput, Matcher filter, boolean hasXmlSuffix)281         public Set<String> getOrderedItems(Collection<String> rawInput, Matcher filter, boolean hasXmlSuffix) {
282             Set<String> input = new LinkedHashSet<String>(rawInput);
283             // input.addAll(extras);
284 
285             Set<String> ordered = new LinkedHashSet<String>();
286 
287             // for (String other : doFirst) {
288             // ordered.add(hasXmlSuffix ? other + ".xml" : other);
289             // }
290 
291             for (String cldrFileName : input) {
292                 if (hasXmlSuffix && !cldrFileName.endsWith(".xml")) {
293                     continue;
294                 }
295 
296                 if (filter != null && !filter.reset(cldrFileName).find()) {
297                     append("Skipping " + cldrFileName + "\n");
298                     continue;
299                 }
300                 // add dependencies first
301                 addDependenciesRecursively(cldrFileName, ordered, hasXmlSuffix);
302             }
303             append("Adding: " + ordered + "\n");
304             return ordered;
305         }
306 
addDependenciesRecursively(String cldrFileName, Set<String> ordered, boolean hasXmlSuffix)307         private void addDependenciesRecursively(String cldrFileName, Set<String> ordered, boolean hasXmlSuffix) {
308             String item = hasXmlSuffix && cldrFileName.endsWith(".xml") ? cldrFileName.substring(0,
309                 cldrFileName.length() - 4) : cldrFileName;
310             for (Matcher m : dependsOn.keySet()) {
311                 if (m.reset(item).matches()) {
312                     for (String other : dependsOn.getAll(m)) {
313                         final String toAdd = hasXmlSuffix ? other + ".xml" : other;
314                         if (other.equals(item) || ordered.contains(toAdd)) {
315                             continue;
316                         }
317                         addDependenciesRecursively(toAdd, ordered, hasXmlSuffix);
318                         append("Dependency: Adding: " + toAdd + " before " + item + "\n");
319                     }
320                 }
321             }
322             ordered.add(item);
323         }
324 
325     }
326 
getInstance(String id)327     public Transliterator getInstance(String id) {
328         if (!overridden.contains(id)) {
329             throw new IllegalArgumentException("No overriden transform for " + id);
330         }
331         return Transliterator.getInstance(id);
332     }
333 
334     public static Pattern TRANSFORM_ID_PATTERN = PatternCache.get("(.+)-([^/]+)(/(.*))?");
335 
getReverseInstance(String id)336     public Transliterator getReverseInstance(String id) {
337         Matcher matcher = TRANSFORM_ID_PATTERN.matcher(id);
338         if (!matcher.matches()) {
339             throw new IllegalArgumentException("**No transform for " + id);
340         }
341         return getInstance(matcher.group(2) + "-" + matcher.group(1)
342             + (matcher.group(4) == null ? "" : "/" + matcher.group(4)));
343     }
344 
345     private BiMap<String,String> displayNameToId = HashBiMap.create();
346 
getDisplayNameToId()347     public BiMap<String, String> getDisplayNameToId() {
348         return displayNameToId;
349     }
350 
addDisplayNameToId(Map<String, String> ids2, ParsedTransformID directionInfo)351     private void addDisplayNameToId(Map<String, String> ids2, ParsedTransformID directionInfo) {
352         displayNameToId.put(directionInfo.getDisplayId(), directionInfo.toString());
353     }
354 
registerTransliteratorsFromXML(String dir, String cldrFileName, List<String> cantSkip, boolean keepDashTIds)355     public void registerTransliteratorsFromXML(String dir, String cldrFileName, List<String> cantSkip, boolean keepDashTIds) {
356         ParsedTransformID directionInfo = new ParsedTransformID();
357         String ruleString;
358         final String cldrFileName2 = cldrFileName + ".xml";
359         try {
360             ruleString = getIcuRulesFromXmlFile(dir, cldrFileName2, directionInfo);
361         } catch (RuntimeException e) {
362             if (!cantSkip.contains(cldrFileName2)) {
363                 return;
364             }
365             throw e;
366         }
367 
368         String id = directionInfo.getId();
369         addDisplayNameToId(displayNameToId, directionInfo);
370 
371         if (directionInfo.getDirection() == Direction.both || directionInfo.getDirection() == Direction.forward) {
372             internalRegister(id, ruleString, Transliterator.FORWARD);
373             for (String alias : directionInfo.getAliases()) {
374                 if (!keepDashTIds && alias.contains("-t-")) {
375                     continue;
376                 }
377                 Transliterator.registerAlias(alias, id);
378             }
379         }
380         if (directionInfo.getDirection() == Direction.both || directionInfo.getDirection() == Direction.backward) {
381             internalRegister(id, ruleString, Transliterator.REVERSE);
382             for (String alias : directionInfo.getBackwardAliases()) {
383                 if (!keepDashTIds && alias.contains("-t-")) {
384                     continue;
385                 }
386                 Transliterator.registerAlias(alias, directionInfo.getBackwardId());
387             }
388         }
389     }
390 
391     /**
392      * Return Icu rules, and the direction info
393      *
394      * @param dir
395      *            TODO
396      * @param cldrFileName
397      * @param directionInfo
398      * @return
399      */
getIcuRulesFromXmlFile(String dir, String cldrFileName, ParsedTransformID directionInfo)400     public static String getIcuRulesFromXmlFile(String dir, String cldrFileName, ParsedTransformID directionInfo) {
401         final MyHandler myHandler = new MyHandler(cldrFileName, directionInfo);
402         XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
403         xfr.read(dir + cldrFileName, XMLFileReader.CONTENT_HANDLER | XMLFileReader.ERROR_HANDLER, true);
404         return myHandler.getRules();
405     }
406 
internalRegister(String id, String ruleString, int direction)407     private void internalRegister(String id, String ruleString, int direction) {
408         if (direction == Transliterator.REVERSE) {
409             id = ParsedTransformID.reverse(id);
410         }
411         internalRegisterNoReverseId(id, ruleString, direction);
412     }
413 
internalRegisterNoReverseId(String id, String ruleString, int direction)414     private void internalRegisterNoReverseId(String id, String ruleString, int direction) {
415         try {
416             Transliterator t = Transliterator.createFromRules(id, ruleString, direction);
417             overridden.add(id);
418             Transliterator oldTranslit = null;
419             if (showProgress != null) {
420                 try {
421                     oldTranslit = Transliterator.getInstance(id);
422                 } catch (Exception e) {
423                 }
424             }
425             Transliterator.unregister(id);
426             Transliterator.registerInstance(t);
427             // if (false) { // for paranoid testing
428             // Transliterator t1 = Transliterator.createFromRules(id, ruleString, direction);
429             // String r1 = t1.toRules(false);
430             // Transliterator t2 = Transliterator.getInstance(id);
431             // String r2 = t2.toRules(false);
432             // if (!r1.equals(r2)) {
433             // throw (IllegalArgumentException) new IllegalArgumentException("Rules unequal" + ruleString + "$$$\n$$$" +
434             // r2);
435             // }
436             // }
437             // verifyNullFilter("halfwidth-fullwidth");
438             if (showProgress != null) {
439                 append("Registered new Transliterator: " + id
440                     + (oldTranslit == null ? "" : "\told:\t" + oldTranslit.getID())
441                     + '\n');
442                 if (id.startsWith("el-")) {
443                     TestTransforms.showTransliterator("", t, 999);
444                     Transliterator t2 = Transliterator.getInstance(id);
445                     TestTransforms.showTransliterator("", t2, 999);
446                 }
447             }
448         } catch (RuntimeException e) {
449             if (showProgress != null) {
450                 e.printStackTrace();
451                 append("Couldn't register new Transliterator: " + id + "\t" + e.getMessage() + '\n');
452             } else {
453                 throw (IllegalArgumentException) new IllegalArgumentException("Couldn't register new Transliterator: "
454                     + id).initCause(e);
455             }
456         }
457     }
458 
459     Appendable showProgress;
460 
append(String string)461     private void append(String string) {
462         try {
463             if (showProgress == null) {
464                 return;
465             }
466             showProgress.append(string);
467             if (showProgress instanceof Writer) {
468                 ((Writer) showProgress).flush();
469             }
470         } catch (IOException e) {
471             throw new ICUUncheckedIOException(e);
472         }
473     }
474 
appendln(String s)475     private void appendln(String s) {
476         append(s + "\n");
477     }
478 
479     // ===================================
480 
481     @SuppressWarnings("deprecation")
registerFromIcuFormatFiles(String directory)482     public void registerFromIcuFormatFiles(String directory) throws IOException {
483 
484         deregisterIcuTransliterators((Matcher) null);
485 
486         Matcher getId = PatternCache.get("\\s*(\\S*)\\s*\\{\\s*").matcher("");
487         Matcher getSource = PatternCache.get("\\s*(\\S*)\\s*\\{\\s*\\\"(.*)\\\".*").matcher("");
488         Matcher translitID = PatternCache.get("([^-]+)-([^/]+)+(?:[/](.+))?").matcher("");
489 
490         Map<String, String> fixedIDs = new TreeMap<String, String>();
491         Set<String> oddIDs = new TreeSet<String>();
492 
493         File dir = new File(directory);
494         // get the list of files to take, and their directions
495         BufferedReader input = FileUtilities.openUTF8Reader(directory, "root.txt");
496         String id = null;
497         String filename = null;
498         Map<String, String> aliasMap = new LinkedHashMap<String, String>();
499 
500         // deregisterIcuTransliterators();
501 
502         // do first, since others depend on theseregisterFromIcuFile
503         /**
504          * Special aliases.
505          * Tone-Digit {
506          * alias {"Pinyin-NumericPinyin"}
507          * }
508          * Digit-Tone {
509          * alias {"NumericPinyin-Pinyin"}
510          * }
511          */
512         // registerFromIcuFile("Latin-ConjoiningJamo", directory, null);
513         // registerFromIcuFile("Pinyin-NumericPinyin", directory, null);
514         // Transliterator.registerAlias("Tone-Digit", "Pinyin-NumericPinyin");
515         // Transliterator.registerAlias("Digit-Tone", "NumericPinyin-Pinyin");
516         // registerFromIcuFile("Fullwidth-Halfwidth", directory, null);
517         // registerFromIcuFile("Hiragana-Katakana", directory, null);
518         // registerFromIcuFile("Latin-Katakana", directory, null);
519         // registerFromIcuFile("Hiragana-Latin", directory, null);
520 
521         while (true) {
522             String line = input.readLine();
523             if (line == null) break;
524             line = line.trim();
525             if (line.startsWith("\uFEFF")) {
526                 line = line.substring(1);
527             }
528             if (line.startsWith("TransliteratorNamePattern")) break; // done
529             // if (line.indexOf("Ethiopic") >= 0) {
530             // appendln("Skipping Ethiopic");
531             // continue;
532             // }
533             if (getId.reset(line).matches()) {
534                 String temp = getId.group(1);
535                 if (!temp.equals("file") && !temp.equals("internal")) id = temp;
536                 continue;
537             }
538             if (getSource.reset(line).matches()) {
539                 String operation = getSource.group(1);
540                 String source = getSource.group(2);
541                 if (operation.equals("alias")) {
542                     aliasMap.put(id, source);
543                     checkIdFix(id, fixedIDs, oddIDs, translitID);
544                     id = null;
545                 } else if (operation.equals("resource:process(transliterator)")) {
546                     filename = source;
547                 } else if (operation.equals("direction")) {
548                     try {
549                         if (id == null || filename == null) {
550                             // appendln("skipping: " + line);
551                             continue;
552                         }
553                         if (filename.indexOf("InterIndic") >= 0 && filename.indexOf("Latin") >= 0) {
554                             // append("**" + id);
555                         }
556                         checkIdFix(id, fixedIDs, oddIDs, translitID);
557 
558                         final int direction = source.equals("FORWARD") ? Transliterator.FORWARD
559                             : Transliterator.REVERSE;
560                         registerFromIcuFile(id, directory, filename, direction);
561 
562                         verifyNullFilter("halfwidth-fullwidth");
563 
564                         id = null;
565                         filename = null;
566                     } catch (RuntimeException e) {
567                         throw (RuntimeException) new IllegalArgumentException("Failed with " + filename + ", " + source)
568                             .initCause(e);
569                     }
570                 } else {
571                     append(dir + "root.txt unhandled line:" + line);
572                 }
573                 continue;
574             }
575             String trimmed = line.trim();
576             if (trimmed.equals("")) continue;
577             if (trimmed.equals("}")) continue;
578             if (trimmed.startsWith("//")) continue;
579             throw new IllegalArgumentException("Unhandled:" + line);
580         }
581 
582         final Set<String> rawIds = idToRules.keySet();
583         Set<String> ordered = dependencyOrder.getOrderedItems(rawIds, null, false);
584         ordered.retainAll(rawIds); // since we are in ID space, kick out anything that isn't
585 
586         for (String id2 : ordered) {
587             RuleDirection stuff = idToRules.get(id2);
588             internalRegisterNoReverseId(id2, stuff.ruleString, stuff.direction);
589             verifyNullFilter("halfwidth-fullwidth"); // TESTING
590         }
591 
592         for (Iterator<String> it = aliasMap.keySet().iterator(); it.hasNext();) {
593             id = it.next();
594             String source = aliasMap.get(id);
595             Transliterator.unregister(id);
596             Transliterator t = Transliterator.createFromRules(id, "::" + source + ";", Transliterator.FORWARD);
597             Transliterator.registerInstance(t);
598             // verifyNullFilter("halfwidth-fullwidth");
599             appendln("Registered new Transliterator Alias: " + id);
600 
601         }
602         appendln("Fixed IDs");
603         for (Iterator<String> it = fixedIDs.keySet().iterator(); it.hasNext();) {
604             String id2 = it.next();
605             appendln("\t" + id2 + "\t" + fixedIDs.get(id2));
606         }
607         appendln("Odd IDs");
608         for (Iterator<String> it = oddIDs.iterator(); it.hasNext();) {
609             String id2 = it.next();
610             appendln("\t" + id2);
611         }
612         Transliterator.registerAny(); // do this last!
613     }
614 
615     Map<String, RuleDirection> idToRules = new TreeMap<String, RuleDirection>();
616 
617     private class RuleDirection {
618         String ruleString;
619         int direction;
620 
RuleDirection(String ruleString, int direction)621         public RuleDirection(String ruleString, int direction) {
622             super();
623             this.ruleString = ruleString;
624             this.direction = direction;
625         }
626     }
627 
registerFromIcuFile(String id, String directory, String filename, int direction)628     private void registerFromIcuFile(String id, String directory, String filename, int direction) {
629         if (filename == null) {
630             filename = id.replace("-", "_").replace("/", "_") + ".txt";
631         }
632         String ruleString = CldrUtility.getText(directory, filename);
633         idToRules.put(id, new RuleDirection(ruleString, direction));
634     }
635 
636     // private void registerFromIcuFile(String id, String dir, String filename) {
637     // registerFromIcuFile(id, dir, filename, Transliterator.FORWARD);
638     // registerFromIcuFile(id, dir, filename, Transliterator.REVERSE);
639     // }
640 
checkIdFix(String id, Map<String, String> fixedIDs, Set<String> oddIDs, Matcher translitID)641     public void checkIdFix(String id, Map<String, String> fixedIDs, Set<String> oddIDs, Matcher translitID) {
642         if (fixedIDs.containsKey(id)) return;
643         if (!translitID.reset(id).matches()) {
644             appendln("Can't fix: " + id);
645             fixedIDs.put(id, "?" + id);
646             return;
647         }
648         String source1 = translitID.group(1);
649         String target1 = translitID.group(2);
650         String variant = translitID.group(3);
651         String source = fixID(source1);
652         String target = fixID(target1);
653         if (!source1.equals(source)) {
654             fixedIDs.put(source1, source);
655         }
656         if (!target1.equals(target)) {
657             fixedIDs.put(target1, target);
658         }
659         if (variant != null) {
660             oddIDs.add("variant: " + variant);
661         }
662     }
663 
fixID(String source)664     static String fixID(String source) {
665         return source; // for now
666     }
667 
deregisterIcuTransliterators(Matcher filter)668     public void deregisterIcuTransliterators(Matcher filter) {
669         // Remove all of the current registrations
670         // first load into array, so we don't get sync problems.
671         List<String> rawAvailable = new ArrayList<String>();
672         for (Enumeration<String> en = Transliterator.getAvailableIDs(); en.hasMoreElements();) {
673             final String id = en.nextElement();
674             if (filter != null && !filter.reset(id).matches()) {
675                 continue;
676             }
677             rawAvailable.add(id);
678         }
679 
680         // deregisterIcuTransliterators(rawAvailable);
681 
682         Set<String> available = dependencyOrder.getOrderedItems(rawAvailable, filter, false);
683         List<String> reversed = new LinkedList<String>();
684         for (String item : available) {
685             reversed.add(0, item);
686         }
687         // available.retainAll(rawAvailable); // remove the items we won't touch anyway
688         // rawAvailable.removeAll(available); // now the ones whose order doesn't matter
689         // deregisterIcuTransliterators(rawAvailable);
690         deregisterIcuTransliterators(reversed);
691 
692         for (Enumeration<String> en = Transliterator.getAvailableIDs(); en.hasMoreElements();) {
693             String oldId = en.nextElement();
694             append("Retaining: " + oldId + "\n");
695         }
696     }
697 
deregisterIcuTransliterators(Collection<String> available)698     public void deregisterIcuTransliterators(Collection<String> available) {
699         for (String oldId : available) {
700             Transliterator t;
701             try {
702                 t = Transliterator.getInstance(oldId);
703             } catch (IllegalArgumentException e) {
704                 if (e.getMessage().startsWith("Illegal ID")) {
705                     continue;
706                 }
707                 append("Failure with: " + oldId);
708                 t = Transliterator.getInstance(oldId);
709                 throw e;
710             } catch (RuntimeException e) {
711                 append("Failure with: " + oldId);
712                 t = Transliterator.getInstance(oldId);
713                 throw e;
714             }
715             String className = t.getClass().getName();
716             if (className.endsWith(".CompoundTransliterator")
717                 || className.endsWith(".RuleBasedTransliterator")
718                 || className.endsWith(".AnyTransliterator")) {
719                 appendln("REMOVING: " + oldId);
720                 Transliterator.unregister(oldId);
721             } else {
722                 appendln("Retaining: " + oldId + "\t\t" + className);
723             }
724         }
725     }
726 
727     public enum Direction {
728         backward, both, forward
729     }
730 
731     public enum Visibility {
732         external, internal
733     }
734 
735     public static class ParsedTransformID {
736         public String source = "Any";
737         public String target = "Any";
738         public String variant;
739         protected String[] aliases = {};
740         protected String[] backwardAliases = {};
741         protected Direction direction = null;
742         protected Visibility visibility;
743 
getId()744         public String getId() {
745             return getSource() + "-" + getTarget() + (getVariant() == null ? "" : "/" + getVariant());
746         }
747 
getDisplayId()748         public String getDisplayId() {
749             return getDisplaySource() + "-" + getDisplayTarget() + (getVariant() == null ? "" : "/" + getDisplayVariant());
750         }
751 
getDisplayVariant()752         private String getDisplayVariant() {
753             return getVariant();
754         }
755 
getDisplayTarget()756         private String getDisplayTarget() {
757             return getDisplaySourceOrTarget(getTarget());
758         }
759 
getDisplaySource()760         private String getDisplaySource() {
761             return getDisplaySourceOrTarget(getSource());
762         }
763 
getDisplaySourceOrTarget(String sourceOrTarget)764         private String getDisplaySourceOrTarget(String sourceOrTarget) {
765             int uscript = UScript.getCodeFromName(sourceOrTarget);
766             if (uscript >= 0) {
767                 return UScript.getName(uscript);
768             }
769             if (sourceOrTarget.contains("FONIPA")) {
770                 return "IPA";
771             }
772             if (sourceOrTarget.equals("InterIndic")) {
773                 return "Indic";
774             }
775             try {
776                 String name = CLDRConfig.getInstance().getEnglish().getName(sourceOrTarget);
777                 return name;
778             } catch (Exception e) {
779                 return sourceOrTarget;
780             }
781         }
782 
783         static final LikelySubtags likely = new LikelySubtags();
784 
getScriptCode(String sourceOrTarget)785         public static String getScriptCode(String sourceOrTarget) {
786             int uscript = UScript.getCodeFromName(sourceOrTarget);
787             if (uscript >= 0) {
788                 return UScript.getShortName(uscript);
789             }
790             if (sourceOrTarget.contains("FONIPA")) {
791                 return "Ipa0";
792             }
793             if (sourceOrTarget.equals("InterIndic")) {
794                 return "Ind0";
795             }
796             try {
797                 String max = likely.maximize(sourceOrTarget);
798                 return max == null ? null : new LanguageTagParser().set(max).getScript();
799             } catch (Exception e) {
800                 return null;
801             }
802         }
803 
getBackwardId()804         public String getBackwardId() {
805             return getTarget() + "-" + getSource() + (getVariant() == null ? "" : "/" + getVariant());
806         }
807 
ParsedTransformID()808         public ParsedTransformID() {
809         }
810 
set(String source, String target, String variant, Direction direction)811         public ParsedTransformID set(String source, String target, String variant, Direction direction) {
812             this.source = source;
813             this.target = target;
814             this.variant = variant;
815             this.direction = direction;
816             return this;
817         }
818 
set(String id)819         public ParsedTransformID set(String id) {
820             variant = null;
821             int pos = id.indexOf('-');
822             if (pos < 0) {
823                 source = "Any";
824                 target = id;
825                 return this;
826             }
827             source = id.substring(0, pos);
828             int pos2 = id.indexOf('/', pos);
829             if (pos2 < 0) {
830                 target = id.substring(pos + 1);
831                 return this;
832             }
833             target = id.substring(pos + 1, pos2);
834             variant = id.substring(pos2 + 1);
835             return this;
836         }
837 
reverse()838         public ParsedTransformID reverse() {
839             String temp = source;
840             source = target;
841             target = temp;
842             return this;
843         }
844 
getTargetVariant()845         public String getTargetVariant() {
846             return target + (variant == null ? "" : "/" + variant);
847         }
848 
getSourceVariant()849         public String getSourceVariant() {
850             return source + (variant == null ? "" : "/" + variant);
851         }
852 
setDirection(Direction direction)853         protected void setDirection(Direction direction) {
854             this.direction = direction;
855         }
856 
getDirection()857         public Direction getDirection() {
858             return direction;
859         }
860 
setVariant(String variant)861         public void setVariant(String variant) {
862             this.variant = variant;
863         }
864 
getVariant()865         protected String getVariant() {
866             return variant;
867         }
868 
setTarget(String target)869         public void setTarget(String target) {
870             this.target = target;
871         }
872 
getTarget()873         public String getTarget() {
874             return target;
875         }
876 
setSource(String source)877         public void setSource(String source) {
878             this.source = source;
879         }
880 
getSource()881         public String getSource() {
882             return source;
883         }
884 
toString()885         public String toString() {
886             return source + "-" + getTargetVariant();
887         }
888 
getId(String source, String target, String variant)889         public static String getId(String source, String target, String variant) {
890             String id = source + '-' + target;
891             if (variant != null) id += "/" + variant;
892             return id;
893         }
894 
reverse(String id)895         public static String reverse(String id) {
896             return new ParsedTransformID().set(id).getBackwardId();
897         }
898 
setAliases(String[] aliases)899         public void setAliases(String[] aliases) {
900             this.aliases = aliases;
901         }
902 
getAliases()903         public String[] getAliases() {
904             return aliases;
905         }
906 
setBackwardAliases(String[] backwardAliases)907         public void setBackwardAliases(String[] backwardAliases) {
908             this.backwardAliases = backwardAliases;
909         }
910 
getBackwardAliases()911         public String[] getBackwardAliases() {
912             return backwardAliases;
913         }
914 
setVisibility(String string)915         protected void setVisibility(String string) {
916             visibility = Visibility.valueOf(string);
917         }
918 
getVisibility()919         public Visibility getVisibility() {
920             return visibility;
921         }
922     }
923 
924     /**
925      * Verify that if the transliterator exists, it has a null filter
926      *
927      * @param id
928      */
verifyNullFilter(String id)929     public static void verifyNullFilter(String id) {
930         Transliterator widen;
931         try {
932             widen = Transliterator.getInstance(id);
933         } catch (Exception e) {
934             return;
935         }
936         UnicodeFilter filter = widen.getFilter();
937         if (filter != null) {
938             throw new IllegalArgumentException(id + " has non-empty filter: " + filter);
939         }
940     }
941 
942     public static class MyHandler extends XMLFileReader.SimpleHandler {
943         boolean first = true;
944         ParsedTransformID directionInfo;
945         XPathParts parts = new XPathParts();
946         String cldrFileName;
947         StringBuilder rules = new StringBuilder();
948 
getRules()949         public String getRules() {
950             return rules.toString();
951         }
952 
MyHandler(String cldrFileName, ParsedTransformID directionInfo)953         public MyHandler(String cldrFileName, ParsedTransformID directionInfo) {
954             super();
955             this.cldrFileName = cldrFileName;
956             this.directionInfo = directionInfo;
957         }
958 
handlePathValue(String path, String value)959         public void handlePathValue(String path, String value) {
960             // private boolean handlePath(String cldrFileName, ParsedTransformID directionInfo, boolean first,
961             // StringBuffer rules, XPathParts parts, String path, String value) {
962             if (first) {
963                 if (path.startsWith("//supplementalData/version")) {
964                     return;
965                 } else if (path.startsWith("//supplementalData/generation")) {
966                     return;
967                 }
968                 parts.set(path);
969                 Map<String, String> attributes = parts.findAttributes("transform");
970                 if (attributes == null) {
971                     throw new IllegalArgumentException("Not an XML transform file: " + cldrFileName + "\t" + path);
972                 }
973                 directionInfo.setSource(attributes.get("source"));
974                 directionInfo.setTarget(attributes.get("target"));
975                 directionInfo.setVariant(attributes.get("variant"));
976                 directionInfo.setDirection(Direction.valueOf(attributes.get("direction").toLowerCase(Locale.ENGLISH)));
977 
978                 String alias = attributes.get("alias");
979                 if (alias != null) {
980                     directionInfo.setAliases(alias.trim().split("\\s+"));
981                 }
982 
983                 String backwardAlias = attributes.get("backwardAlias");
984                 if (backwardAlias != null) {
985                     directionInfo.setBackwardAliases(backwardAlias.trim().split("\\s+"));
986                 }
987 
988                 directionInfo.setVisibility(attributes.get("visibility"));
989                 first = false;
990             }
991             if (path.indexOf("/comment") >= 0) {
992                 // skip
993             } else if (path.indexOf("/tRule") >= 0) {
994                 value = fixup.transliterate(value);
995                 rules.append(value).append(CldrUtility.LINE_SEPARATOR);
996             } else {
997                 throw new IllegalArgumentException("Unknown element: " + path + "\t " + value);
998             }
999         }
1000     }
1001 }
1002