1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2013, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.util;
10 
11 import java.io.BufferedReader;
12 import java.io.File;
13 import java.io.FileReader;
14 import java.io.IOException;
15 import java.io.InputStream;
16 import java.io.InputStreamReader;
17 import java.io.PrintWriter;
18 import java.lang.reflect.Constructor;
19 import java.lang.reflect.Method;
20 import java.nio.charset.Charset;
21 import java.nio.file.Files;
22 import java.nio.file.Path;
23 import java.nio.file.Paths;
24 import java.util.ArrayList;
25 import java.util.Arrays;
26 import java.util.Calendar;
27 import java.util.Collection;
28 import java.util.Collections;
29 import java.util.Comparator;
30 import java.util.Date;
31 import java.util.EnumSet;
32 import java.util.HashMap;
33 import java.util.HashSet;
34 import java.util.Iterator;
35 import java.util.LinkedHashMap;
36 import java.util.LinkedHashSet;
37 import java.util.List;
38 import java.util.Map;
39 import java.util.Map.Entry;
40 import java.util.Objects;
41 import java.util.Set;
42 import java.util.SortedMap;
43 import java.util.SortedSet;
44 import java.util.TreeMap;
45 import java.util.TreeSet;
46 import java.util.concurrent.ConcurrentHashMap;
47 import java.util.regex.Matcher;
48 import java.util.regex.Pattern;
49 
50 import org.unicode.cldr.draft.FileUtilities;
51 
52 import com.google.common.base.Splitter;
53 import com.google.common.collect.ImmutableMap;
54 import com.google.common.collect.ImmutableMultimap;
55 import com.google.common.collect.Multimap;
56 import com.ibm.icu.impl.Utility;
57 import com.ibm.icu.text.DateFormat;
58 import com.ibm.icu.text.SimpleDateFormat;
59 import com.ibm.icu.text.Transform;
60 import com.ibm.icu.text.Transliterator;
61 import com.ibm.icu.text.UTF16;
62 import com.ibm.icu.text.UnicodeSet;
63 import com.ibm.icu.text.UnicodeSetIterator;
64 import com.ibm.icu.util.Freezable;
65 import com.ibm.icu.util.TimeZone;
66 
67 public class CldrUtility {
68 
69     public static final boolean DEBUG_MISSING_DIRECTORIES = false;
70 
71     public static final Charset UTF8 = Charset.forName("utf-8");
72     public static final boolean BETA = false;
73 
74     public static final String LINE_SEPARATOR = "\n";
75     public final static Pattern SEMI_SPLIT = PatternCache.get("\\s*;\\s*");
76 
77     private static final boolean HANDLEFILE_SHOW_SKIP = false;
78     // Constant for "∅∅∅". Indicates that a child locale has no value for a
79     // path even though a parent does.
80     public static final String NO_INHERITANCE_MARKER = new String(new char[] { 0x2205, 0x2205, 0x2205 });
81 
82     /**
83      * Define the constant INHERITANCE_MARKER for "↑↑↑", used by Survey Tool to indicate a "passthru" vote to the parent locale.
84      * If CLDRFile ever finds this value in a data field, writing of the field should be suppressed.
85      */
86     public static final String INHERITANCE_MARKER = new String(new char[] { 0x2191, 0x2191, 0x2191 });
87 
88     public static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze();
89 
90     /**
91      * Very simple class, used to replace variables in a string. For example
92      * <p>
93      *
94      * <pre>
95      * static VariableReplacer langTag = new VariableReplacer()
96      * 			.add("$alpha", "[a-zA-Z]")
97      * 			.add("$digit", "[0-9]")
98      * 			.add("$alphanum", "[a-zA-Z0-9]")
99      * 			.add("$x", "[xX]");
100      * 			...
101      * 			String langTagPattern = langTag.replace(...);
102      * </pre>
103      */
104     public static class VariableReplacer {
105         // simple implementation for now
106         private Map<String, String> m = new TreeMap<>(Collections.reverseOrder());
107 
add(String variable, String value)108         public VariableReplacer add(String variable, String value) {
109             m.put(variable, value);
110             return this;
111         }
112 
replace(String source)113         public String replace(String source) {
114             String oldSource;
115             do {
116                 oldSource = source;
117                 for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) {
118                     String variable = it.next();
119                     String value = m.get(variable);
120                     source = replaceAll(source, variable, value);
121                 }
122             } while (!source.equals(oldSource));
123             return source;
124         }
125 
replaceAll(String source, String key, String value)126         public String replaceAll(String source, String key, String value) {
127             while (true) {
128                 int pos = source.indexOf(key);
129                 if (pos < 0) return source;
130                 source = source.substring(0, pos) + value + source.substring(pos + key.length());
131             }
132         }
133     }
134 
135     public interface LineHandler {
136         /**
137          * Return false if line was skipped
138          *
139          * @param line
140          * @return
141          */
handle(String line)142         boolean handle(String line) throws Exception;
143     }
144 
getPath(String fileOrDir, String filename)145     public static String getPath(String fileOrDir, String filename) {
146         // Required for cases where a system property is read but not default is given.
147         // TODO: Fix callers to not fail silently if properties are missing.
148         if (fileOrDir == null) {
149             return null;
150         }
151         Path path = Paths.get(fileOrDir);
152         if (filename != null) {
153             path = path.resolve(filename);
154         }
155         if (DEBUG_MISSING_DIRECTORIES && !Files.exists(path)) {
156             System.err.println("Warning: directory doesn't exist: " + path);
157         }
158         return PathUtilities.getNormalizedPathString(path) + File.separatorChar;
159     }
160 
getPath(String path)161     static String getPath(String path) {
162         return getPath(path, null);
163     }
164 
165     public static final String ANALYTICS = "<script>\n"
166         + "var gaJsHost = ((\"https:\" == document.location.protocol) ? \"https://ssl.\" : \"http://www.\");\n"
167         + "document.write(unescape(\"%3Cscript src='\" + gaJsHost + \"google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E\"));\n"
168         + "</script>\n"
169         + "<script>\n"
170         + "try {\n"
171         + "var pageTracker = _gat._getTracker(\"UA-7672775-1\");\n"
172         + "pageTracker._trackPageview();\n"
173         + "} catch(err) {}</script>";
174 
175     public static final List<String> MINIMUM_LANGUAGES = Arrays.asList(new String[] { "ar", "en", "de", "fr", "hi",
176         "it", "es", "pt", "ru", "zh", "ja" }); // plus language itself
177     public static final List<String> MINIMUM_TERRITORIES = Arrays.asList(new String[] { "US", "GB", "DE", "FR", "IT",
178         "JP", "CN", "IN", "RU", "BR" });
179 
180     public interface LineComparer {
181         static final int LINES_DIFFERENT = -1, LINES_SAME = 0, SKIP_FIRST = 1, SKIP_SECOND = 2;
182 
183         /**
184          * Returns LINES_DIFFERENT, LINES_SAME, or if one of the lines is ignorable, SKIP_FIRST or SKIP_SECOND
185          *
186          * @param line1
187          * @param line2
188          * @return
189          */
compare(String line1, String line2)190         int compare(String line1, String line2);
191     }
192 
193     public static class SimpleLineComparator implements LineComparer {
194         public static final int TRIM = 1, SKIP_SPACES = 2, SKIP_EMPTY = 4, SKIP_CVS_TAGS = 8;
195         StringIterator si1 = new StringIterator();
196         StringIterator si2 = new StringIterator();
197         int flags;
198 
SimpleLineComparator(int flags)199         public SimpleLineComparator(int flags) {
200             this.flags = flags;
201         }
202 
203         @Override
compare(String line1, String line2)204         public int compare(String line1, String line2) {
205             // first, see if we want to skip one or the other lines
206             int skipper = 0;
207             if (line1 == null) {
208                 skipper = SKIP_FIRST;
209             } else {
210                 if ((flags & TRIM) != 0) line1 = line1.trim();
211                 if ((flags & SKIP_EMPTY) != 0 && line1.length() == 0) skipper = SKIP_FIRST;
212             }
213             if (line2 == null) {
214                 skipper = SKIP_SECOND;
215             } else {
216                 if ((flags & TRIM) != 0) line2 = line2.trim();
217                 if ((flags & SKIP_EMPTY) != 0 && line2.length() == 0) skipper += SKIP_SECOND;
218             }
219             if (skipper != 0) {
220                 if (skipper == SKIP_FIRST + SKIP_SECOND) return LINES_SAME; // ok, don't skip both
221                 return skipper;
222             }
223 
224             // check for null
225             if (line1 == null) {
226                 if (line2 == null) return LINES_SAME;
227                 return LINES_DIFFERENT;
228             }
229             if (line2 == null) {
230                 return LINES_DIFFERENT;
231             }
232 
233             // now check equality
234             if (line1.equals(line2)) return LINES_SAME;
235 
236             // if not equal, see if we are skipping spaces
237             if ((flags & SKIP_CVS_TAGS) != 0) {
238                 if (line1.indexOf('$') >= 0 && line2.indexOf('$') >= 0) {
239                     line1 = stripTags(line1);
240                     line2 = stripTags(line2);
241                     if (line1.equals(line2)) return LINES_SAME;
242                 } else if (line1.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/")
243                     && line2.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/")) {
244                     return LINES_SAME;
245                 }
246             }
247             if ((flags & SKIP_SPACES) != 0 && si1.set(line1).matches(si2.set(line2))) return LINES_SAME;
248             return LINES_DIFFERENT;
249         }
250 
251         // private Matcher dtdMatcher = PatternCache.get(
252         // "\\Q<!DOCTYPE ldml SYSTEM \"http://www.unicode.org/cldr/dtd/\\E.*\\Q/ldml.dtd\">\\E").matcher("");
253 
254         private String[] CVS_TAGS = { "Revision", "Date" };
255 
stripTags(String line)256         private String stripTags(String line) {
257             // $
258             // Revision: 8994 $
259             // $
260             // Date: 2013-07-03 21:31:17 +0200 (Wed, 03 Jul 2013) $
261             int pos = line.indexOf('$');
262             if (pos < 0) return line;
263             pos++;
264             int endpos = line.indexOf('$', pos);
265             if (endpos < 0) return line;
266             for (int i = 0; i < CVS_TAGS.length; ++i) {
267                 if (!line.startsWith(CVS_TAGS[i], pos)) continue;
268                 line = line.substring(0, pos + CVS_TAGS[i].length()) + line.substring(endpos);
269             }
270             return line;
271         }
272 
273     }
274 
275     /**
276      *
277      * @param file1
278      * @param file2
279      * @param failureLines
280      *            on input, String[2], on output, failing lines
281      * @param lineComparer
282      * @return
283      * @throws IOException
284      */
areFileIdentical(String file1, String file2, String[] failureLines, LineComparer lineComparer)285     public static boolean areFileIdentical(String file1, String file2, String[] failureLines,
286         LineComparer lineComparer) throws IOException {
287         try (BufferedReader br1 = new BufferedReader(new FileReader(file1), 32 * 1024);
288             BufferedReader br2 = new BufferedReader(new FileReader(file2), 32 * 1024);) {
289             String line1 = "";
290             String line2 = "";
291             int skip = 0;
292 
293             while (true) {
294                 if ((skip & LineComparer.SKIP_FIRST) == 0) line1 = br1.readLine();
295                 if ((skip & LineComparer.SKIP_SECOND) == 0) line2 = br2.readLine();
296                 if (line1 == null && line2 == null) return true;
297                 if (line1 == null || line2 == null) {
298                     // System.out.println("debug");
299                 }
300                 skip = lineComparer.compare(line1, line2);
301                 if (skip == LineComparer.LINES_DIFFERENT) {
302                     break;
303                 }
304             }
305             failureLines[0] = line1 != null ? line1 : "<end of file>";
306             failureLines[1] = line2 != null ? line2 : "<end of file>";
307             return false;
308         }
309     }
310 
311     /*
312      * static String getLineWithoutFluff(BufferedReader br1, boolean first, int flags) throws IOException {
313      * while (true) {
314      * String line1 = br1.readLine();
315      * if (line1 == null) return line1;
316      * if ((flags & TRIM)!= 0) line1 = line1.trim();
317      * if ((flags & SKIP_EMPTY)!= 0 && line1.length() == 0) continue;
318      * return line1;
319      * }
320      * }
321      */
322 
323     public final static class StringIterator {
324         String string;
325         int position = 0;
326 
next()327         char next() {
328             while (true) {
329                 if (position >= string.length()) return '\uFFFF';
330                 char ch = string.charAt(position++);
331                 if (ch != ' ' && ch != '\t') return ch;
332             }
333         }
334 
reset()335         StringIterator reset() {
336             position = 0;
337             return this;
338         }
339 
set(String string)340         StringIterator set(String string) {
341             this.string = string;
342             position = 0;
343             return this;
344         }
345 
matches(StringIterator other)346         boolean matches(StringIterator other) {
347             while (true) {
348                 char c1 = next();
349                 char c2 = other.next();
350                 if (c1 != c2) return false;
351                 if (c1 == '\uFFFF') return true;
352             }
353         }
354 
355         /**
356          * @return Returns the position.
357          */
getPosition()358         public int getPosition() {
359             return position;
360         }
361     }
362 
splitArray(String source, char separator)363     public static String[] splitArray(String source, char separator) {
364         return splitArray(source, separator, false);
365     }
366 
splitArray(String source, char separator, boolean trim)367     public static String[] splitArray(String source, char separator, boolean trim) {
368         List<String> piecesList = splitList(source, separator, trim);
369         String[] pieces = new String[piecesList.size()];
370         piecesList.toArray(pieces);
371         return pieces;
372     }
373 
splitCommaSeparated(String line)374     public static String[] splitCommaSeparated(String line) {
375         // items are separated by ','
376         // each item is of the form abc...
377         // or "..." (required if a comma or quote is contained)
378         // " in a field is represented by ""
379         List<String> result = new ArrayList<>();
380         StringBuilder item = new StringBuilder();
381         boolean inQuote = false;
382         for (int i = 0; i < line.length(); ++i) {
383             char ch = line.charAt(i); // don't worry about supplementaries
384             switch (ch) {
385             case '"':
386                 inQuote = !inQuote;
387                 // at start or end, that's enough
388                 // if get a quote when we are not in a quote, and not at start, then add it and return to inQuote
389                 if (inQuote && item.length() != 0) {
390                     item.append('"');
391                     inQuote = true;
392                 }
393                 break;
394             case ',':
395                 if (!inQuote) {
396                     result.add(item.toString());
397                     item.setLength(0);
398                 } else {
399                     item.append(ch);
400                 }
401                 break;
402             default:
403                 item.append(ch);
404                 break;
405             }
406         }
407         result.add(item.toString());
408         return result.toArray(new String[result.size()]);
409     }
410 
splitList(String source, char separator)411     public static List<String> splitList(String source, char separator) {
412         return splitList(source, separator, false, null);
413     }
414 
splitList(String source, char separator, boolean trim)415     public static List<String> splitList(String source, char separator, boolean trim) {
416         return splitList(source, separator, trim, null);
417     }
418 
splitList(String source, char separator, boolean trim, List<String> output)419     public static List<String> splitList(String source, char separator, boolean trim, List<String> output) {
420         return splitList(source, Character.toString(separator), trim, output);
421     }
422 
splitList(String source, String separator)423     public static List<String> splitList(String source, String separator) {
424         return splitList(source, separator, false, null);
425     }
426 
splitList(String source, String separator, boolean trim)427     public static List<String> splitList(String source, String separator, boolean trim) {
428         return splitList(source, separator, trim, null);
429     }
430 
splitList(String source, String separator, boolean trim, List<String> output)431     public static List<String> splitList(String source, String separator, boolean trim, List<String> output) {
432         if (output == null) output = new ArrayList<>();
433         if (source.length() == 0) return output;
434         int pos = 0;
435         do {
436             int npos = source.indexOf(separator, pos);
437             if (npos < 0) npos = source.length();
438             String piece = source.substring(pos, npos);
439             if (trim) piece = piece.trim();
440             output.add(piece);
441             pos = npos + 1;
442         } while (pos < source.length());
443         return output;
444     }
445 
446     /**
447      * Protect a collection (as much as Java lets us!) from modification.
448      * Really, really ugly code, since Java doesn't let us do better.
449      */
450     @SuppressWarnings({ "rawtypes", "unchecked" })
protectCollection(T source)451     public static <T> T protectCollection(T source) {
452         // TODO - exclude UnmodifiableMap, Set, ...
453         if (source instanceof Map) {
454             Map<Object,Object> sourceMap = (Map) source;
455             ImmutableMap.Builder<Object,Object> builder = ImmutableMap.builder();
456             for (Entry<Object,Object> entry : sourceMap.entrySet()) {
457                 final Object key = entry.getKey();
458                 final Object value = entry.getValue();
459                 builder.put(protectCollection(key), protectCollection(value));
460             }
461             return (T) builder.build();
462         } else if (source instanceof Multimap) {
463             Multimap<Object,Object> sourceMap = (Multimap) source;
464             ImmutableMultimap.Builder<Object,Object> builder = ImmutableMultimap.builder();
465             for (Entry<Object,Object> entry : sourceMap.entries()) {
466                 builder.put(protectCollection(entry.getKey()), protectCollection(entry.getValue()));
467             }
468             return (T) builder.build();
469         } else if (source instanceof Collection) {
470             // TODO use ImmutableSet, List, ...
471             Collection sourceCollection = (Collection) source;
472             Collection<Object> resultCollection = clone(sourceCollection);
473             if (resultCollection == null) return (T) sourceCollection; // failed
474             resultCollection.clear();
475 
476             for (Object item : sourceCollection) {
477                 resultCollection.add(protectCollection(item));
478             }
479 
480             return sourceCollection instanceof List ? (T) Collections.unmodifiableList((List) sourceCollection)
481                 : sourceCollection instanceof SortedSet ? (T) Collections
482                     .unmodifiableSortedSet((SortedSet) sourceCollection)
483                     : sourceCollection instanceof Set ? (T) Collections.unmodifiableSet((Set) sourceCollection)
484                         : (T) Collections.unmodifiableCollection(sourceCollection);
485         } else if (source instanceof Freezable) {
486             Freezable freezableSource = (Freezable) source;
487             if (freezableSource.isFrozen()) return source;
488             return (T) ((Freezable) (freezableSource.cloneAsThawed())).freeze();
489         } else {
490             return source; // can't protect
491         }
492     }
493 
494     /**
495      * Protect a collections where we don't need to clone.
496      * @param source
497      * @return
498      */
499     @SuppressWarnings({ "rawtypes", "unchecked" })
protectCollectionX(T source)500     public static <T> T protectCollectionX(T source) {
501         // TODO - exclude UnmodifiableMap, Set, ...
502         if (isImmutable(source)) {
503             return source;
504         }
505         if (source instanceof Map) {
506             Map sourceMap = (Map) source;
507             // recurse
508             LinkedHashMap tempMap = new LinkedHashMap<>(sourceMap); // copy contents
509             sourceMap.clear();
510             for (Object key : tempMap.keySet()) {
511                 sourceMap.put(protectCollection(key), protectCollectionX(tempMap.get(key)));
512             }
513             return sourceMap instanceof SortedMap ? (T) Collections.unmodifiableSortedMap((SortedMap) sourceMap)
514                 : (T) Collections.unmodifiableMap(sourceMap);
515         } else if (source instanceof Collection) {
516             Collection sourceCollection = (Collection) source;
517             LinkedHashSet tempSet = new LinkedHashSet<>(sourceCollection); // copy contents
518 
519             sourceCollection.clear();
520             for (Object item : tempSet) {
521                 sourceCollection.add(protectCollectionX(item));
522             }
523 
524             return sourceCollection instanceof List ? (T) Collections.unmodifiableList((List) sourceCollection)
525                 : sourceCollection instanceof SortedSet ? (T) Collections
526                     .unmodifiableSortedSet((SortedSet) sourceCollection)
527                     : sourceCollection instanceof Set ? (T) Collections.unmodifiableSet((Set) sourceCollection)
528                         : (T) Collections.unmodifiableCollection(sourceCollection);
529         } else if (source instanceof Freezable) {
530             Freezable freezableSource = (Freezable) source;
531             return (T) freezableSource.freeze();
532         } else {
533             throw new IllegalArgumentException("Can’t protect: " + source.getClass().toString());
534         }
535     }
536 
537     private static final Set<Object> KNOWN_IMMUTABLES = new HashSet<>(Arrays.asList(
538         String.class));
539 
isImmutable(Object source)540     public static boolean isImmutable(Object source) {
541         return source == null
542             || source instanceof Enum
543             || source instanceof Number
544             || KNOWN_IMMUTABLES.contains(source.getClass());
545     }
546 
547     /**
548      * Clones T if we can; otherwise returns null.
549      *
550      * @param <T>
551      * @param source
552      * @return
553      */
554     @SuppressWarnings("unchecked")
clone(T source)555     private static <T> T clone(T source) {
556         final Class<? extends Object> class1 = source.getClass();
557         try {
558             final Method declaredMethod = class1.getDeclaredMethod("clone", (Class<?>) null);
559             return (T) declaredMethod.invoke(source, (Object) null);
560         } catch (Exception e) {
561         }
562         try {
563             final Constructor<? extends Object> declaredMethod = class1.getConstructor((Class<?>) null);
564             return (T) declaredMethod.newInstance((Object) null);
565         } catch (Exception e) {
566         }
567         return null; // uncloneable
568     }
569 
570     /**
571      * Appends two strings, inserting separator if either is empty
572      */
joinWithSeparation(String a, String separator, String b)573     public static String joinWithSeparation(String a, String separator, String b) {
574         if (a.length() == 0) return b;
575         if (b.length() == 0) return a;
576         return a + separator + b;
577     }
578 
579     /**
580      * Appends two strings, inserting separator if either is empty. Modifies first map
581      */
joinWithSeparation(Map<String, String> a, String separator, Map<String, String> b)582     public static Map<String, String> joinWithSeparation(Map<String, String> a, String separator, Map<String, String> b) {
583         for (Iterator<String> it = b.keySet().iterator(); it.hasNext();) {
584             String key = it.next();
585             String bvalue = b.get(key);
586             String avalue = a.get(key);
587             if (avalue != null) {
588                 if (avalue.trim().equals(bvalue.trim())) continue;
589                 bvalue = joinWithSeparation(avalue, separator, bvalue);
590             }
591             a.put(key, bvalue);
592         }
593         return a;
594     }
595 
join(Collection<T> c, String separator)596     public static <T> String join(Collection<T> c, String separator) {
597         return join(c, separator, null);
598     }
599 
join(Object[] c, String separator)600     public static String join(Object[] c, String separator) {
601         return join(c, separator, null);
602     }
603 
join(Collection<T> c, String separator, Transform<T, String> transform)604     public static <T> String join(Collection<T> c, String separator, Transform<T, String> transform) {
605         StringBuffer output = new StringBuffer();
606         boolean isFirst = true;
607         for (T item : c) {
608             if (isFirst) {
609                 isFirst = false;
610             } else {
611                 output.append(separator);
612             }
613             output.append(transform != null ? transform.transform(item) : item);
614         }
615         return output.toString();
616     }
617 
join(T[] c, String separator, Transform<T, String> transform)618     public static <T> String join(T[] c, String separator, Transform<T, String> transform) {
619         return join(Arrays.asList(c), separator, transform);
620     }
621 
622     /**
623      * Utility like Arrays.asList()
624      */
625     @SuppressWarnings("unchecked")
asMap(Object[][] source, Map<K, V> target, boolean reverse)626     public static <K, V> Map<K, V> asMap(Object[][] source, Map<K, V> target, boolean reverse) {
627         int from = 0, to = 1;
628         if (reverse) {
629             from = 1;
630             to = 0;
631         }
632         for (int i = 0; i < source.length; ++i) {
633             if (source[i].length != 2) {
634                 throw new IllegalArgumentException("Source must be array of pairs of strings: "
635                     + Arrays.asList(source[i]));
636             }
637             target.put((K) source[i][from], (V) source[i][to]);
638         }
639         return target;
640     }
641 
asMap(Object[][] source)642     public static <K, V> Map<K, V> asMap(Object[][] source) {
643         return asMap(source, new HashMap<K, V>(), false);
644     }
645 
646     /**
647      * Returns the canonical name for a file.
648      */
getCanonicalName(String file)649     public static String getCanonicalName(String file) {
650         try {
651             return PathUtilities.getNormalizedPathString(file);
652         } catch (Exception e) {
653             return file;
654         }
655     }
656 
657     /**
658      * Convert a UnicodeSet into a string that can be embedded into a Regex. Handles strings that are in the UnicodeSet,
659      * Supplementary ranges, and escaping
660      *
661      * @param source
662      *            The source set
663      * @param escaper
664      *            A transliterator that is used to escape the characters according to the requirements of the regex.
665      * @return
666      */
toRegex(UnicodeSet source)667     public static String toRegex(UnicodeSet source) {
668         return toRegex(source, null, false);
669     }
670 
671     private static final Transliterator DEFAULT_REGEX_ESCAPER = Transliterator.createFromRules(
672         "foo",
673         "([ \\- \\\\ \\[ \\] ]) > '\\' $1 ;"
674             // + " ([:c:]) > &hex($1);"
675             + " ([[:control:][[:z:]&[:ascii:]]]) > &hex($1);",
676         Transliterator.FORWARD);
677 
678     /**
679      * Convert a UnicodeSet into a string that can be embedded into a Regex.
680      * Handles strings that are in the UnicodeSet, Supplementary ranges, and
681      * escaping
682      *
683      * @param source
684      *            The source set
685      * @param escaper
686      *            A transliterator that is used to escape the characters according
687      *            to the requirements of the regex. The default puts a \\ before [, -,
688      *            \, and ], and converts controls and Ascii whitespace to hex.
689      *            Alternatives can be supplied. Note that some Regex engines,
690      *            including Java 1.5, don't really deal with escaped supplementaries
691      *            well.
692      * @param onlyBmp
693      *            Set to true if the Regex only accepts BMP characters. In that
694      *            case, ranges of supplementary characters are converted to lists of
695      *            ranges. For example, [\uFFF0-\U0010000F \U0010100F-\U0010300F]
696      *            converts into:
697      *
698      *            <pre>
699      *          [\uD800][\uDC00-\uDFFF]
700      *          [\uD801-\uDBBF][\uDC00-\uDFFF]
701      *          [\uDBC0][\uDC00-\uDC0F]
702      * </pre>
703      *
704      *            and
705      *
706      *            <pre>
707      *          [\uDBC4][\uDC0F-\uDFFF]
708      *          [\uDBC5-\uDBCB][\uDC00-\uDFFF]
709      *          [\uDBCC][\uDC00-\uDC0F]
710      * </pre>
711      *
712      *            These are then coalesced into a list of alternatives by sharing
713      *            parts where feasible. For example, the above turns into 3 pairs of ranges:
714      *
715      *            <pre>
716      *          [\uDBC0\uDBCC][\uDC00-\uDC0F]|\uDBC4[\uDC0F-\uDFFF]|[\uD800-\uDBBF\uDBC5-\uDBCB][\uDC00-\uDFFF]
717      * </pre>
718      *
719      * @return escaped string. Something like [a-z] or (?:[a-m]|{zh}) if there is
720      *         a string zh in the set, or a more complicated case for
721      *         supplementaries. <br>
722      *         Special cases: [] returns "", single item returns a string
723      *         (escaped), like [a] => "a", or [{abc}] => "abc"<br>
724      *         Supplementaries are handled specially, as described under onlyBmp.
725      */
toRegex(UnicodeSet source, Transliterator escaper, boolean onlyBmp)726     public static String toRegex(UnicodeSet source, Transliterator escaper, boolean onlyBmp) {
727         if (escaper == null) {
728             escaper = DEFAULT_REGEX_ESCAPER;
729         }
730         UnicodeSetIterator it = new UnicodeSetIterator(source);
731         // if there is only one item, return it
732         if (source.size() == 0) {
733             return "";
734         }
735         if (source.size() == 1) {
736             it.next();
737             return escaper.transliterate(it.getString());
738         }
739         // otherwise, we figure out what is in the set, and will return
740         StringBuilder base = new StringBuilder("[");
741         StringBuilder alternates = new StringBuilder();
742         Map<UnicodeSet, UnicodeSet> lastToFirst = new TreeMap<>(new UnicodeSetComparator());
743         int alternateCount = 0;
744         while (it.nextRange()) {
745             if (it.codepoint == UnicodeSetIterator.IS_STRING) {
746                 ++alternateCount;
747                 alternates.append('|').append(escaper.transliterate(it.string));
748             } else if (!onlyBmp || it.codepointEnd <= 0xFFFF) { // BMP
749                 addBmpRange(it.codepoint, it.codepointEnd, escaper, base);
750             } else { // supplementary
751                 if (it.codepoint <= 0xFFFF) {
752                     addBmpRange(it.codepoint, 0xFFFF, escaper, base);
753                     it.codepoint = 0x10000; // reset the range
754                 }
755                 // this gets a bit ugly; we are trying to minimize the extra ranges for supplementaries
756                 // we do this by breaking up X-Y based on the Lead and Trail values for X and Y
757                 // Lx [Tx - Ty]) (if Lx == Ly)
758                 // Lx [Tx - DFFF] | Ly [DC00-Ty] (if Lx == Ly - 1)
759                 // Lx [Tx - DFFF] | [Lx+1 - Ly-1][DC00-DFFF] | Ly [DC00-Ty] (otherwise)
760                 int leadX = UTF16.getLeadSurrogate(it.codepoint);
761                 int trailX = UTF16.getTrailSurrogate(it.codepoint);
762                 int leadY = UTF16.getLeadSurrogate(it.codepointEnd);
763                 int trailY = UTF16.getTrailSurrogate(it.codepointEnd);
764                 if (leadX == leadY) {
765                     addSupplementalRange(leadX, leadX, trailX, trailY, escaper, lastToFirst);
766                 } else {
767                     addSupplementalRange(leadX, leadX, trailX, 0xDFFF, escaper, lastToFirst);
768                     if (leadX != leadY - 1) {
769                         addSupplementalRange(leadX + 1, leadY - 1, 0xDC00, 0xDFFF, escaper, lastToFirst);
770                     }
771                     addSupplementalRange(leadY, leadY, 0xDC00, trailY, escaper, lastToFirst);
772                 }
773             }
774         }
775         // add in the supplementary ranges
776         if (lastToFirst.size() != 0) {
777             for (UnicodeSet last : lastToFirst.keySet()) {
778                 ++alternateCount;
779                 alternates.append('|').append(toRegex(lastToFirst.get(last), escaper, onlyBmp))
780                     .append(toRegex(last, escaper, onlyBmp));
781             }
782         }
783         // Return the output. We separate cases in order to get the minimal extra apparatus
784         base.append("]");
785         if (alternateCount == 0) {
786             return base.toString();
787         } else if (base.length() > 2) {
788             return "(?:" + base + "|" + alternates.substring(1) + ")";
789         } else if (alternateCount == 1) {
790             return alternates.substring(1);
791         } else {
792             return "(?:" + alternates.substring(1) + ")";
793         }
794     }
795 
addSupplementalRange(int leadX, int leadY, int trailX, int trailY, Transliterator escaper, Map<UnicodeSet, UnicodeSet> lastToFirst)796     private static void addSupplementalRange(int leadX, int leadY, int trailX, int trailY, Transliterator escaper,
797         Map<UnicodeSet, UnicodeSet> lastToFirst) {
798         System.out.println("\tadding: " + new UnicodeSet(leadX, leadY) + "\t" + new UnicodeSet(trailX, trailY));
799         UnicodeSet last = new UnicodeSet(trailX, trailY);
800         UnicodeSet first = lastToFirst.get(last);
801         if (first == null) {
802             lastToFirst.put(last, first = new UnicodeSet());
803         }
804         first.add(leadX, leadY);
805     }
806 
addBmpRange(int start, int limit, Transliterator escaper, StringBuilder base)807     private static void addBmpRange(int start, int limit, Transliterator escaper, StringBuilder base) {
808         base.append(escaper.transliterate(UTF16.valueOf(start)));
809         if (start != limit) {
810             base.append("-").append(escaper.transliterate(UTF16.valueOf(limit)));
811         }
812     }
813 
814     public static class UnicodeSetComparator implements Comparator<UnicodeSet> {
815         @Override
compare(UnicodeSet o1, UnicodeSet o2)816         public int compare(UnicodeSet o1, UnicodeSet o2) {
817             return o1.compareTo(o2);
818         }
819     }
820 
821     public static class CollectionComparator<T extends Comparable<T>> implements Comparator<Collection<T>> {
822         @Override
compare(Collection<T> o1, Collection<T> o2)823         public int compare(Collection<T> o1, Collection<T> o2) {
824             return UnicodeSet.compare(o1, o2, UnicodeSet.ComparisonStyle.SHORTER_FIRST);
825         }
826     }
827 
828     public static class ComparableComparator<T extends Comparable<T>> implements Comparator<T> {
829         @Override
compare(T arg0, T arg1)830         public int compare(T arg0, T arg1) {
831             return Utility.checkCompare(arg0, arg1);
832         }
833     }
834 
835     @SuppressWarnings({ "rawtypes", "unchecked" })
addTreeMapChain(Map coverageData, Object... objects)836     public static void addTreeMapChain(Map coverageData, Object... objects) {
837         Map<Object, Object> base = coverageData;
838         for (int i = 0; i < objects.length - 2; ++i) {
839             Map<Object, Object> nextOne = (Map<Object, Object>) base.get(objects[i]);
840             if (nextOne == null) base.put(objects[i], nextOne = new TreeMap<>());
841             base = nextOne;
842         }
843         base.put(objects[objects.length - 2], objects[objects.length - 1]);
844     }
845 
846     public static abstract class CollectionTransform<S, T> implements Transform<S, T> {
847         @Override
transform(S source)848         public abstract T transform(S source);
849 
transform(Collection<S> input, Collection<T> output)850         public Collection<T> transform(Collection<S> input, Collection<T> output) {
851             return CldrUtility.transform(input, this, output);
852         }
853 
transform(Collection<S> input)854         public Collection<T> transform(Collection<S> input) {
855             return transform(input, new ArrayList<T>());
856         }
857     }
858 
transform(SC source, Transform<S, T> transform, TC target)859     public static <S, T, SC extends Collection<S>, TC extends Collection<T>> TC transform(SC source, Transform<S, T> transform, TC target) {
860         for (S sourceItem : source) {
861             T targetItem = transform.transform(sourceItem);
862             if (targetItem != null) {
863                 target.add(targetItem);
864             }
865         }
866         return target;
867     }
868 
transform( SM source, Transform<SK, TK> transformKey, Transform<SV, TV> transformValue, TM target)869     public static <SK, SV, TK, TV, SM extends Map<SK, SV>, TM extends Map<TK, TV>> TM transform(
870         SM source, Transform<SK, TK> transformKey, Transform<SV, TV> transformValue, TM target) {
871         for (Entry<SK, SV> sourceEntry : source.entrySet()) {
872             TK targetKey = transformKey.transform(sourceEntry.getKey());
873             TV targetValue = transformValue.transform(sourceEntry.getValue());
874             if (targetKey != null && targetValue != null) {
875                 target.put(targetKey, targetValue);
876             }
877         }
878         return target;
879     }
880 
881     public static abstract class Apply<T> {
apply(T item)882         public abstract void apply(T item);
883 
applyTo(U collection)884         public <U extends Collection<T>> void applyTo(U collection) {
885             for (T item : collection) {
886                 apply(item);
887             }
888         }
889     }
890 
891     public static abstract class Filter<T> {
892 
contains(T item)893         public abstract boolean contains(T item);
894 
retainAll(U c)895         public <U extends Collection<T>> U retainAll(U c) {
896             for (Iterator<T> it = c.iterator(); it.hasNext();) {
897                 if (!contains(it.next())) it.remove();
898             }
899             return c;
900         }
901 
extractMatches(U c, U target)902         public <U extends Collection<T>> U extractMatches(U c, U target) {
903             for (Iterator<T> it = c.iterator(); it.hasNext();) {
904                 T item = it.next();
905                 if (contains(item)) {
906                     target.add(item);
907                 }
908             }
909             return target;
910         }
911 
removeAll(U c)912         public <U extends Collection<T>> U removeAll(U c) {
913             for (Iterator<T> it = c.iterator(); it.hasNext();) {
914                 if (contains(it.next())) it.remove();
915             }
916             return c;
917         }
918 
extractNonMatches(U c, U target)919         public <U extends Collection<T>> U extractNonMatches(U c, U target) {
920             for (Iterator<T> it = c.iterator(); it.hasNext();) {
921                 T item = it.next();
922                 if (!contains(item)) {
923                     target.add(item);
924                 }
925             }
926             return target;
927         }
928     }
929 
930     public static class MatcherFilter<T> extends Filter<T> {
931         private Matcher matcher;
932 
MatcherFilter(String pattern)933         public MatcherFilter(String pattern) {
934             this.matcher = PatternCache.get(pattern).matcher("");
935         }
936 
MatcherFilter(Matcher matcher)937         public MatcherFilter(Matcher matcher) {
938             this.matcher = matcher;
939         }
940 
set(Matcher matcher)941         public MatcherFilter<T> set(Matcher matcher) {
942             this.matcher = matcher;
943             return this;
944         }
945 
set(String pattern)946         public MatcherFilter<T> set(String pattern) {
947             this.matcher = PatternCache.get(pattern).matcher("");
948             return this;
949         }
950 
951         @Override
contains(T o)952         public boolean contains(T o) {
953             return matcher.reset(o.toString()).matches();
954         }
955     }
956 
957     // static final class HandlingTransform implements Transform<String, Handling> {
958     // @Override
959     // public Handling transform(String source) {
960     // return Handling.valueOf(source);
961     // }
962     // }
963 
964     public static final class PairComparator<K extends Comparable<K>, V extends Comparable<V>> implements java.util.Comparator<Pair<K, V>> {
965 
966         private Comparator<K> comp1;
967         private Comparator<V> comp2;
968 
PairComparator(Comparator<K> comp1, Comparator<V> comp2)969         public PairComparator(Comparator<K> comp1, Comparator<V> comp2) {
970             this.comp1 = comp1;
971             this.comp2 = comp2;
972         }
973 
974         @Override
compare(Pair<K, V> o1, Pair<K, V> o2)975         public int compare(Pair<K, V> o1, Pair<K, V> o2) {
976             {
977                 K o1First = o1.getFirst();
978                 K o2First = o2.getFirst();
979                 int diff = o1First == null ? (o2First == null ? 0 : -1)
980                     : o2First == null ? 1
981                         : comp1 == null ? o1First.compareTo(o2First)
982                             : comp1.compare(o1First, o2First);
983                 if (diff != 0) {
984                     return diff;
985                 }
986             }
987             V o1Second = o1.getSecond();
988             V o2Second = o2.getSecond();
989             return o1Second == null ? (o2Second == null ? 0 : -1)
990                 : o2Second == null ? 1
991                     : comp2 == null ? o1Second.compareTo(o2Second)
992                         : comp2.compare(o1Second, o2Second);
993         }
994 
995     }
996 
997     /**
998      * Fetch data from jar
999      *
1000      * @param name
1001      *            a name residing in the org/unicode/cldr/util/data/ directory, or loading from a jar will break.
1002      */
getUTF8Data(String name)1003     public static BufferedReader getUTF8Data(String name) {
1004         if (new File(name).isAbsolute()) {
1005             throw new IllegalArgumentException(
1006                 "Path must be relative to org/unicode/cldr/util/data  such as 'file.txt' or 'casing/file.txt', but got '"
1007                     + name + "'.");
1008         }
1009         return FileReaders.openFile(CldrUtility.class, "data/" + name);
1010     }
1011 
1012     /**
1013      * Fetch data from jar
1014      *
1015      * @param name
1016      *            a name residing in the org/unicode/cldr/util/data/ directory, or loading from a jar will break.
1017      */
getInputStream(String name)1018     public static InputStream getInputStream(String name) {
1019         if (new File(name).isAbsolute()) {
1020             throw new IllegalArgumentException(
1021                 "Path must be relative to org/unicode/cldr/util/data  such as 'file.txt' or 'casing/file.txt', but got '"
1022                     + name + "'.");
1023         }
1024         return getInputStream(CldrUtility.class, "data/" + name);
1025     }
1026 
getInputStream(Class<?> callingClass, String relativePath)1027     public static InputStream getInputStream(Class<?> callingClass, String relativePath) {
1028         InputStream is = callingClass.getResourceAsStream(relativePath);
1029         // add buffering
1030         return InputStreamFactory.buffer(is);
1031     }
1032 
1033     /**
1034      * Takes a Map that goes from Object to Set, and fills in the transpose
1035      *
1036      * @param source_key_valueSet
1037      * @param output_value_key
1038      */
putAllTransposed(Map<Object, Set<Object>> source_key_valueSet, Map<Object, Object> output_value_key)1039     public static void putAllTransposed(Map<Object, Set<Object>> source_key_valueSet, Map<Object, Object> output_value_key) {
1040         for (Iterator<Object> it = source_key_valueSet.keySet().iterator(); it.hasNext();) {
1041             Object key = it.next();
1042             Set<Object> values = source_key_valueSet.get(key);
1043             for (Iterator<Object> it2 = values.iterator(); it2.hasNext();) {
1044                 Object value = it2.next();
1045                 output_value_key.put(value, key);
1046             }
1047         }
1048     }
1049 
countInstances(String source, String substring)1050     public static int countInstances(String source, String substring) {
1051         int count = 0;
1052         int pos = 0;
1053         while (true) {
1054             pos = source.indexOf(substring, pos) + 1;
1055             if (pos <= 0) break;
1056             count++;
1057         }
1058         return count;
1059     }
1060 
registerTransliteratorFromFile(String id, String dir, String filename)1061     public static void registerTransliteratorFromFile(String id, String dir, String filename) {
1062         registerTransliteratorFromFile(id, dir, filename, Transliterator.FORWARD, true);
1063         registerTransliteratorFromFile(id, dir, filename, Transliterator.REVERSE, true);
1064     }
1065 
registerTransliteratorFromFile(String id, String dir, String filename, int direction, boolean reverseID)1066     public static void registerTransliteratorFromFile(String id, String dir, String filename, int direction,
1067         boolean reverseID) {
1068         if (filename == null) {
1069             filename = id.replace('-', '_');
1070             filename = filename.replace('/', '_');
1071             filename += ".txt";
1072         }
1073         String rules = getText(dir, filename);
1074         Transliterator t;
1075         int pos = id.indexOf('-');
1076         String rid;
1077         if (pos < 0) {
1078             rid = id + "-Any";
1079             id = "Any-" + id;
1080         } else {
1081             rid = id.substring(pos + 1) + "-" + id.substring(0, pos);
1082         }
1083         if (!reverseID) rid = id;
1084 
1085         if (direction == Transliterator.FORWARD) {
1086             Transliterator.unregister(id);
1087             t = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
1088             Transliterator.registerInstance(t);
1089             System.out.println("Registered new Transliterator: " + id);
1090         }
1091 
1092         /*
1093          * String test = "\u049A\u0430\u0437\u0430\u049B";
1094          * System.out.println(t.transliterate(test));
1095          * t = Transliterator.getInstance(id);
1096          * System.out.println(t.transliterate(test));
1097          */
1098 
1099         if (direction == Transliterator.REVERSE) {
1100             Transliterator.unregister(rid);
1101             t = Transliterator.createFromRules(rid, rules, Transliterator.REVERSE);
1102             Transliterator.registerInstance(t);
1103             System.out.println("Registered new Transliterator: " + rid);
1104         }
1105     }
1106 
getText(String dir, String filename)1107     public static String getText(String dir, String filename) {
1108         try {
1109             BufferedReader br = FileUtilities.openUTF8Reader(dir, filename);
1110             StringBuffer buffer = new StringBuffer();
1111             while (true) {
1112                 String line = br.readLine();
1113                 if (line == null) break;
1114                 if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1);
1115                 if (line.startsWith("//")) continue;
1116                 buffer.append(line).append(CldrUtility.LINE_SEPARATOR);
1117             }
1118             br.close();
1119             String rules = buffer.toString();
1120             return rules;
1121         } catch (IOException e) {
1122             throw (IllegalArgumentException) new IllegalArgumentException("Can't open " + dir + ", " + filename)
1123                 .initCause(e);
1124         }
1125     }
1126 
callMethod(String methodNames, Class<?> cls)1127     public static void callMethod(String methodNames, Class<?> cls) {
1128         for (String methodName : methodNames.split(",")) {
1129             try {
1130                 Method method;
1131                 try {
1132                     method = cls.getMethod(methodName, (Class[]) null);
1133                     try {
1134                         method.invoke(null, (Object[]) null);
1135                     } catch (Exception e) {
1136                         e.printStackTrace();
1137                     }
1138                 } catch (Exception e) {
1139                     System.out.println("No such method: " + methodName);
1140                     showMethods(cls);
1141                 }
1142             } catch (ClassNotFoundException e) {
1143                 e.printStackTrace();
1144             }
1145         }
1146     }
1147 
showMethods(Class<?> cls)1148     public static void showMethods(Class<?> cls) throws ClassNotFoundException {
1149         System.out.println("Possible methods of " + cls.getCanonicalName() + " are: ");
1150         Method[] methods = cls.getMethods();
1151         Set<String> names = new TreeSet<>();
1152         for (int i = 0; i < methods.length; ++i) {
1153             if (methods[i].getGenericParameterTypes().length != 0) continue;
1154             //int mods = methods[i].getModifiers();
1155             // if (!Modifier.isStatic(mods)) continue;
1156             String name = methods[i].getName();
1157             names.add(name);
1158         }
1159         for (Iterator<String> it = names.iterator(); it.hasNext();) {
1160             System.out.println("\t" + it.next());
1161         }
1162     }
1163 
1164     /**
1165      * Breaks lines if they are too long, or if matcher.group(1) != last. Only breaks just before matcher.
1166      *
1167      * @param input
1168      * @param separator
1169      * @param matcher
1170      *            must match each possible item. The first group is significant; if different, will cause break
1171      * @return
1172      */
breakLines(CharSequence input, String separator, Matcher matcher, int width)1173     static public String breakLines(CharSequence input, String separator, Matcher matcher, int width) {
1174         StringBuffer output = new StringBuffer();
1175         String lastPrefix = "";
1176         int lastEnd = 0;
1177         int lastBreakPos = 0;
1178         matcher.reset(input);
1179         while (true) {
1180             boolean match = matcher.find();
1181             if (!match) {
1182                 output.append(input.subSequence(lastEnd, input.length()));
1183                 break;
1184             }
1185             String prefix = matcher.group(1);
1186             if (!prefix.equalsIgnoreCase(lastPrefix) || matcher.end() - lastBreakPos > width) { // break before?
1187                 output.append(separator);
1188                 lastBreakPos = lastEnd;
1189             } else if (lastEnd != 0) {
1190                 output.append(' ');
1191             }
1192             output.append(input.subSequence(lastEnd, matcher.end()).toString().trim());
1193             lastEnd = matcher.end();
1194             lastPrefix = prefix;
1195         }
1196         return output.toString();
1197     }
1198 
showOptions(String[] args)1199     public static void showOptions(String[] args) {
1200         // Properties props = System.getProperties();
1201         System.out.println("Arguments: " + join(args, " ")); // + (props == null ? "" : " " + props));
1202     }
1203 
roundToDecimals(double input, int places)1204     public static double roundToDecimals(double input, int places) {
1205         double log10 = Math.log10(input); // 15000 => 4.xxx
1206         double intLog10 = Math.floor(log10);
1207         double scale = Math.pow(10, intLog10 - places + 1);
1208         double factored = Math.round(input / scale) * scale;
1209         // System.out.println("###\t" +input + "\t" + factored);
1210         return factored;
1211     }
1212 
1213     /**
1214      * Get a property value, returning the value if there is one (eg -Dkey=value),
1215      * otherwise the default value (for either empty or null).
1216      *
1217      * @param key
1218      * @param valueIfNull
1219      * @param valueIfEmpty
1220      * @return
1221      */
getProperty(String key, String defaultValue)1222     public static String getProperty(String key, String defaultValue) {
1223         return getProperty(key, defaultValue, defaultValue);
1224     }
1225 
1226     /**
1227      * Get a property value, returning the value if there is one, otherwise null.
1228      */
getProperty(String key)1229     public static String getProperty(String key) {
1230         return getProperty(key, null, null);
1231     }
1232 
1233     /**
1234      * Get a property value, returning the value if there is one (eg -Dkey=value),
1235      * the valueIfEmpty if there is one with no value (eg -Dkey) and the valueIfNull
1236      * if there is no property.
1237      *
1238      * @param key
1239      * @param valueIfNull
1240      * @param valueIfEmpty
1241      * @return
1242      */
getProperty(String key, String valueIfNull, String valueIfEmpty)1243     public static String getProperty(String key, String valueIfNull, String valueIfEmpty) {
1244         String result = CLDRConfig.getInstance().getProperty(key);
1245         if (result == null) {
1246             result = valueIfNull;
1247         } else if (result.length() == 0) {
1248             result = valueIfEmpty;
1249         }
1250         return result;
1251     }
1252 
hex(byte[] bytes, int start, int end, String separator)1253     public static String hex(byte[] bytes, int start, int end, String separator) {
1254         StringBuilder result = new StringBuilder();
1255         for (int i = 0; i < end; ++i) {
1256             if (result.length() != 0) {
1257                 result.append(separator);
1258             }
1259             result.append(Utility.hex(bytes[i] & 0xFF, 2));
1260         }
1261         return result.toString();
1262     }
1263 
getProperty(String string, boolean b)1264     public static boolean getProperty(String string, boolean b) {
1265         return getProperty(string, b ? "true" : "false", "true").matches("(?i)T|TRUE");
1266     }
1267 
checkValidDirectory(String sourceDirectory)1268     public static String checkValidDirectory(String sourceDirectory) {
1269         return checkValidFile(sourceDirectory, true, null);
1270     }
1271 
checkValidDirectory(String sourceDirectory, String correction)1272     public static String checkValidDirectory(String sourceDirectory, String correction) {
1273         return checkValidFile(sourceDirectory, true, correction);
1274     }
1275 
checkValidFile(String sourceDirectory, boolean checkForDirectory, String correction)1276     public static String checkValidFile(String sourceDirectory, boolean checkForDirectory, String correction) {
1277         File file = null;
1278         String normalizedPath = null;
1279         try {
1280             file = new File(sourceDirectory);
1281             normalizedPath = PathUtilities.getNormalizedPathString(file) + File.separatorChar;
1282         } catch (Exception e) {
1283         }
1284         if (file == null || normalizedPath == null || checkForDirectory && !file.isDirectory()) {
1285             throw new RuntimeException("Directory not found: " + sourceDirectory
1286                 + (normalizedPath == null ? "" : " => " + normalizedPath)
1287                 + (correction == null ? "" : CldrUtility.LINE_SEPARATOR + correction));
1288         }
1289         return normalizedPath;
1290     }
1291 
1292     /**
1293      * Copy up to matching line (not included). If output is null, then just skip until.
1294      *
1295      * @param oldFile
1296      *            file to copy
1297      * @param readUntilPattern
1298      *            pattern to search for. If null, goes to end of file.
1299      * @param output
1300      *            into to copy into. If null, just skips in the input.
1301      * @param includeMatchingLine
1302      *            inclde the matching line when copying.
1303      * @throws IOException
1304      */
copyUpTo(BufferedReader oldFile, final Pattern readUntilPattern, final PrintWriter output, boolean includeMatchingLine)1305     public static void copyUpTo(BufferedReader oldFile, final Pattern readUntilPattern,
1306         final PrintWriter output, boolean includeMatchingLine) throws IOException {
1307         Matcher readUntil = readUntilPattern == null ? null : readUntilPattern.matcher("");
1308         while (true) {
1309             String line = oldFile.readLine();
1310             if (line == null) {
1311                 break;
1312             }
1313             if (line.startsWith("\uFEFF")) {
1314                 line = line.substring(1);
1315             }
1316             if (readUntil != null && readUntil.reset(line).matches()) {
1317                 if (includeMatchingLine && output != null) {
1318                     output.println(line);
1319                 }
1320                 break;
1321             }
1322             if (output != null) {
1323                 output.println(line);
1324             }
1325         }
1326     }
1327 
1328     private static DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss 'GMT'");
1329     private static DateFormat DATE_ONLY = new SimpleDateFormat("yyyy-MM-dd");
1330     static {
1331         df.setTimeZone(TimeZone.getTimeZone("GMT"));
1332         DATE_ONLY.setTimeZone(TimeZone.getTimeZone("GMT"));
1333     }
1334 
isoFormat(Date date)1335     public static String isoFormat(Date date) {
1336         synchronized (df) {
1337             return df.format(date);
1338         }
1339     }
1340 
isoFormatDateOnly(Date date)1341     public static String isoFormatDateOnly(Date date) {
1342         synchronized (DATE_ONLY) {
1343             return DATE_ONLY.format(date);
1344         }
1345     }
1346 
newConcurrentHashMap()1347     public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap() {
1348         // http://ria101.wordpress.com/2011/12/12/concurrenthashmap-avoid-a-common-misuse/
1349         return new ConcurrentHashMap<>(4, 0.9f, 1);
1350     }
1351 
newConcurrentHashMap(Map<K, V> source)1352     public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap(Map<K, V> source) {
1353         ConcurrentHashMap<K, V> result = newConcurrentHashMap();
1354         result.putAll(source);
1355         return result;
1356     }
1357 
equals(Object a, Object b)1358     public static boolean equals(Object a, Object b) {
1359         return a == b ? true
1360             : a == null || b == null ? false
1361                 : a.equals(b);
1362     }
1363 
getDoubleLink(String code)1364     public static String getDoubleLink(String code) {
1365         final String anchorSafe = TransliteratorUtilities.toHTML.transliterate(code).replace(" ", "_");
1366         return "<a name='" + anchorSafe + "' href='#" + anchorSafe + "'>";
1367     }
1368 
getDoubleLinkedText(String anchor, String anchorText)1369     public static String getDoubleLinkedText(String anchor, String anchorText) {
1370         return getDoubleLink(anchor) + TransliteratorUtilities.toHTML.transliterate(anchorText).replace("_", " ")
1371             + "</a>";
1372     }
1373 
getDoubleLinkedText(String anchor)1374     public static String getDoubleLinkedText(String anchor) {
1375         return getDoubleLinkedText(anchor, anchor);
1376     }
1377 
getDoubleLinkMsg()1378     public static String getDoubleLinkMsg() {
1379         return "<a name=''{0}'' href=''#{0}''>{0}</a>";
1380     }
1381 
getDoubleLinkMsg2()1382     public static String getDoubleLinkMsg2() {
1383         return "<a name=''{0}{1}'' href=''#{0}{1}''>{0}</a>";
1384     }
1385 
getCopyrightString()1386     public static String getCopyrightString() {
1387         return getCopyrightString("");
1388     }
1389 
getCopyrightString(String linePrefix)1390     public static String getCopyrightString(String linePrefix) {
1391         // now do the rest
1392         return linePrefix + "Copyright \u00A9 1991-" + Calendar.getInstance().get(Calendar.YEAR) + " Unicode, Inc." + CldrUtility.LINE_SEPARATOR
1393             + linePrefix + "For terms of use, see http://www.unicode.org/copyright.html" + CldrUtility.LINE_SEPARATOR
1394             + linePrefix + "Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries." + CldrUtility.LINE_SEPARATOR
1395             + linePrefix + "CLDR data files are interpreted according to the LDML specification " + "(http://unicode.org/reports/tr35/)";
1396     }
1397 
1398     // TODO Move to collection utilities
1399     /**
1400      * Type-safe get
1401      * @param map
1402      * @param key
1403      * @return value
1404      */
get(M map, K key)1405     public static <K, V, M extends Map<K, V>> V get(M map, K key) {
1406         return map.get(key);
1407     }
1408 
1409     /**
1410      * Type-safe contains
1411      * @param map
1412      * @param key
1413      * @return value
1414      */
contains(C collection, K key)1415     public static <K, C extends Collection<K>> boolean contains(C collection, K key) {
1416         return collection.contains(key);
1417     }
1418 
toEnumSet(Class<E> classValue, Collection<String> stringValues)1419     public static <E extends Enum<E>> EnumSet<E> toEnumSet(Class<E> classValue, Collection<String> stringValues) {
1420         EnumSet<E> result = EnumSet.noneOf(classValue);
1421         for (String s : stringValues) {
1422             result.add(Enum.valueOf(classValue, s));
1423         }
1424         return result;
1425     }
1426 
putNew(M map, K key, V value)1427     public static <K, V, M extends Map<K, V>> M putNew(M map, K key, V value) {
1428         if (!map.containsKey(key)) {
1429             map.put(key, value);
1430         }
1431         return map;
1432     }
1433 
cleanSemiFields(String line)1434     public static String[] cleanSemiFields(String line) {
1435         line = cleanLine(line);
1436         return line.isEmpty() ? null : SEMI_SPLIT.split(line);
1437     }
1438 
cleanLine(String line)1439     private static String cleanLine(String line) {
1440         int comment = line.indexOf("#");
1441         if (comment >= 0) {
1442             line = line.substring(0, comment);
1443         }
1444         if (line.startsWith("\uFEFF")) {
1445             line = line.substring(1);
1446         }
1447         return line.trim();
1448     }
1449 
handleFile(String filename, LineHandler handler)1450     public static void handleFile(String filename, LineHandler handler) throws IOException {
1451         try (BufferedReader in = getUTF8Data(filename);) {
1452             String line = null;
1453             while ((line = in.readLine()) != null) {
1454                 //                String line = in.readLine();
1455                 //                if (line == null) {
1456                 //                    break;
1457                 //                }
1458                 try {
1459                     if (!handler.handle(line)) {
1460                         if (HANDLEFILE_SHOW_SKIP) {
1461                             System.out.println("Skipping line: " + line);
1462                         }
1463                     }
1464                 } catch (Exception e) {
1465                     throw (RuntimeException) new IllegalArgumentException("Problem with line: " + line)
1466                         .initCause(e);
1467                 }
1468             }
1469         }
1470         //        in.close();
1471     }
1472 
ifNull(T x, T y)1473     public static <T> T ifNull(T x, T y) {
1474         return x == null
1475             ? y
1476             : x;
1477     }
1478 
ifSame(T source, T replaceIfSame, T replacement)1479     public static <T> T ifSame(T source, T replaceIfSame, T replacement) {
1480         return source == replaceIfSame ? replacement : source;
1481     }
1482 
ifEqual(T source, T replaceIfSame, T replacement)1483     public static <T> T ifEqual(T source, T replaceIfSame, T replacement) {
1484         return Objects.equals(source, replaceIfSame) ? replacement : source;
1485     }
1486 
intersect(Set<T> a, Collection<T> b)1487     public static <T> Set<T> intersect(Set<T> a, Collection<T> b) {
1488         Set<T> result = new LinkedHashSet<>(a);
1489         result.retainAll(b);
1490         return result;
1491     }
1492 
subtract(Set<T> a, Collection<T> b)1493     public static <T> Set<T> subtract(Set<T> a, Collection<T> b) {
1494         Set<T> result = new LinkedHashSet<>(a);
1495         result.removeAll(b);
1496         return result;
1497     }
1498 
deepEquals(Object... pairs)1499     public static boolean deepEquals(Object... pairs) {
1500         for (int item = 0; item < pairs.length;) {
1501             if (!Objects.deepEquals(pairs[item++], pairs[item++])) {
1502                 return false;
1503             }
1504         }
1505         return true;
1506     }
1507 
array(Splitter splitter, String source)1508     public static String[] array(Splitter splitter, String source) {
1509         List<String> list = splitter.splitToList(source);
1510         return list.toArray(new String[list.size()]);
1511     }
1512 
toHex(String in, boolean javaStyle)1513     public static String toHex(String in, boolean javaStyle) {
1514         StringBuilder result = new StringBuilder();
1515         for (int i = 0; i < in.length(); ++i) {
1516             result.append(toHex(in.charAt(i), javaStyle));
1517         }
1518         return result.toString();
1519     }
1520 
toHex(int j, boolean javaStyle)1521     public static String toHex(int j, boolean javaStyle) {
1522         if (j == '\"') {
1523             return "\\\"";
1524         } else if (j == '\\') {
1525             return "\\\\";
1526         } else if (0x20 < j && j < 0x7F) {
1527             return String.valueOf((char) j);
1528         }
1529         final String hexString = Integer.toHexString(j).toUpperCase();
1530         int gap = 4 - hexString.length();
1531         if (gap < 0) {
1532             gap = 0;
1533         }
1534         String prefix = javaStyle ? "\\u" : "U+";
1535         return prefix + "000".substring(0, gap) + hexString;
1536     }
1537 
1538     /**
1539      * get string format for debugging, since Java has a useless display for many items
1540      * @param item
1541      * @return
1542      */
toString(Object item)1543     public static String toString(Object item) {
1544         if (item instanceof Object[]) {
1545             return toString(Arrays.asList((Object[]) item));
1546         } else if (item instanceof Entry) {
1547             return toString(((Entry) item).getKey()) + "≔" + toString(((Entry) item).getValue());
1548         } else if (item instanceof Map) {
1549             return "{" + toString(((Map) item).entrySet()) + "}";
1550         } else if (item instanceof Collection) {
1551             List<String> result = new ArrayList<>();
1552             for (Object subitem : (Collection) item) {
1553                 result.add(toString(subitem));
1554             }
1555             return result.toString();
1556         }
1557         return item.toString();
1558     }
1559 
1560     /**
1561      * Return the git hash for the CLDR base directory.
1562      *
1563      * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3"
1564      */
getCldrBaseDirHash()1565     public static String getCldrBaseDirHash() {
1566         final File baseDir = CLDRConfig.getInstance().getCldrBaseDirectory();
1567         return getGitHashForDir(baseDir.toString());
1568     }
1569 
1570     /**
1571      * Return the git hash for a directory.
1572      *
1573      * @param dir the directory name
1574      * @return the hash, like "9786e05e95a2e4f02687fa3b84126782f9f698a3"
1575      */
getGitHashForDir(String dir)1576     public final static String getGitHashForDir(String dir) {
1577         final String GIT_HASH_COMMANDS[] = { "git",  "rev-parse", "HEAD" };
1578         try {
1579             if (dir == null) {
1580                 return CLDRURLS.UNKNOWN_REVISION; // no dir
1581             }
1582             File f = new File(dir);
1583             if (!f.isDirectory()) {
1584                 return CLDRURLS.UNKNOWN_REVISION; // does not exist
1585             }
1586             Process p = Runtime.getRuntime().exec(GIT_HASH_COMMANDS, null, f);
1587             try (BufferedReader is = new BufferedReader(new InputStreamReader(p.getInputStream()))) {
1588                 String str = is.readLine();
1589                 if (str.length() == 0) {
1590                     throw new Exception("git returned empty");
1591                 }
1592                 return str;
1593             }
1594         } catch(Throwable t) {
1595             // We do not expect this to be called frequently.
1596             System.err.println("While trying to get 'git' hash for " + dir + " : " + t.getMessage());
1597             t.printStackTrace();
1598             return CLDRURLS.UNKNOWN_REVISION;
1599         }
1600     }
1601 }
1602