1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2013, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.util;
10 
11 import java.io.BufferedReader;
12 import java.io.File;
13 import java.io.FileReader;
14 import java.io.IOException;
15 import java.io.InputStream;
16 import java.io.PrintWriter;
17 import java.lang.reflect.Constructor;
18 import java.lang.reflect.Method;
19 import java.nio.charset.Charset;
20 import java.util.ArrayList;
21 import java.util.Arrays;
22 import java.util.Calendar;
23 import java.util.Collection;
24 import java.util.Collections;
25 import java.util.Comparator;
26 import java.util.Date;
27 import java.util.EnumSet;
28 import java.util.HashMap;
29 import java.util.HashSet;
30 import java.util.Iterator;
31 import java.util.LinkedHashMap;
32 import java.util.LinkedHashSet;
33 import java.util.List;
34 import java.util.Map;
35 import java.util.Map.Entry;
36 import java.util.Objects;
37 import java.util.Set;
38 import java.util.SortedMap;
39 import java.util.SortedSet;
40 import java.util.TreeMap;
41 import java.util.TreeSet;
42 import java.util.concurrent.ConcurrentHashMap;
43 import java.util.regex.Matcher;
44 import java.util.regex.Pattern;
45 
46 import org.unicode.cldr.draft.FileUtilities;
47 import org.unicode.cldr.util.RegexLookup.Finder;
48 
49 import com.google.common.base.Splitter;
50 import com.ibm.icu.dev.test.TestFmwk;
51 import com.ibm.icu.impl.Utility;
52 import com.ibm.icu.text.DateFormat;
53 import com.ibm.icu.text.SimpleDateFormat;
54 import com.ibm.icu.text.Transform;
55 import com.ibm.icu.text.Transliterator;
56 import com.ibm.icu.text.UTF16;
57 import com.ibm.icu.text.UnicodeSet;
58 import com.ibm.icu.text.UnicodeSetIterator;
59 import com.ibm.icu.util.Freezable;
60 import com.ibm.icu.util.Output;
61 import com.ibm.icu.util.TimeZone;
62 
63 public class CldrUtility {
64 
65     public static final Charset UTF8 = Charset.forName("utf-8");
66     public static final boolean BETA = false;
67 
68     public static final String LINE_SEPARATOR = "\n";
69     public final static Pattern SEMI_SPLIT = PatternCache.get("\\s*;\\s*");
70 
71     private static final boolean HANDLEFILE_SHOW_SKIP = false;
72     // Constant for "∅∅∅". Indicates that a child locale has no value for a
73     // path even though a parent does.
74     public static final String NO_INHERITANCE_MARKER = new String(new char[] { 0x2205, 0x2205, 0x2205 });
75 
76     /**
77      * Define the constant INHERITANCE_MARKER for "↑↑↑", used by Survey Tool to indicate a "passthru" vote to the parent locale.
78      * If CLDRFile ever finds this value in a data field, writing of the field should be suppressed.
79      */
80     public static final String INHERITANCE_MARKER = new String(new char[] { 0x2191, 0x2191, 0x2191 });
81 
82     public static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze();
83 
84     /**
85      * Very simple class, used to replace variables in a string. For example
86      * <p>
87      *
88      * <pre>
89      * static VariableReplacer langTag = new VariableReplacer()
90      * 			.add("$alpha", "[a-zA-Z]")
91      * 			.add("$digit", "[0-9]")
92      * 			.add("$alphanum", "[a-zA-Z0-9]")
93      * 			.add("$x", "[xX]");
94      * 			...
95      * 			String langTagPattern = langTag.replace(...);
96      * </pre>
97      */
98     public static class VariableReplacer {
99         // simple implementation for now
100         private Map<String, String> m = new TreeMap<String, String>(Collections.reverseOrder());
101 
add(String variable, String value)102         public VariableReplacer add(String variable, String value) {
103             m.put(variable, value);
104             return this;
105         }
106 
replace(String source)107         public String replace(String source) {
108             String oldSource;
109             do {
110                 oldSource = source;
111                 for (Iterator<String> it = m.keySet().iterator(); it.hasNext();) {
112                     String variable = it.next();
113                     String value = m.get(variable);
114                     source = replaceAll(source, variable, value);
115                 }
116             } while (!source.equals(oldSource));
117             return source;
118         }
119 
replaceAll(String source, String key, String value)120         public String replaceAll(String source, String key, String value) {
121             while (true) {
122                 int pos = source.indexOf(key);
123                 if (pos < 0) return source;
124                 source = source.substring(0, pos) + value + source.substring(pos + key.length());
125             }
126         }
127     }
128 
129     public interface LineHandler {
130         /**
131          * Return false if line was skipped
132          *
133          * @param line
134          * @return
135          */
handle(String line)136         boolean handle(String line) throws Exception;
137     }
138 
getPath(String path, String filename)139     public static String getPath(String path, String filename) {
140         if (path == null) {
141             return null;
142         }
143         final File file = filename == null ? new File(path)
144             : new File(path, filename);
145         try {
146             return file.getCanonicalPath() + File.separatorChar;
147         } catch (IOException e) {
148             return file.getPath() + File.separatorChar;
149         }
150     }
151 
getPath(String path)152     static String getPath(String path) {
153         return getPath(path, null);
154     }
155 
156     public static final String ANALYTICS = "<script type=\"text/javascript\">\n"
157         + "var gaJsHost = ((\"https:\" == document.location.protocol) ? \"https://ssl.\" : \"http://www.\");\n"
158         + "document.write(unescape(\"%3Cscript src='\" + gaJsHost + \"google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E\"));\n"
159         + "</script>\n"
160         + "<script type=\"text/javascript\">\n"
161         + "try {\n"
162         + "var pageTracker = _gat._getTracker(\"UA-7672775-1\");\n"
163         + "pageTracker._trackPageview();\n"
164         + "} catch(err) {}</script>";
165 
166     public static final List<String> MINIMUM_LANGUAGES = Arrays.asList(new String[] { "ar", "en", "de", "fr", "hi",
167         "it", "es", "pt", "ru", "zh", "ja" }); // plus language itself
168     public static final List<String> MINIMUM_TERRITORIES = Arrays.asList(new String[] { "US", "GB", "DE", "FR", "IT",
169         "JP", "CN", "IN", "RU", "BR" });
170 
171     public interface LineComparer {
172         static final int LINES_DIFFERENT = -1, LINES_SAME = 0, SKIP_FIRST = 1, SKIP_SECOND = 2;
173 
174         /**
175          * Returns LINES_DIFFERENT, LINES_SAME, or if one of the lines is ignorable, SKIP_FIRST or SKIP_SECOND
176          *
177          * @param line1
178          * @param line2
179          * @return
180          */
compare(String line1, String line2)181         int compare(String line1, String line2);
182     }
183 
184     public static class SimpleLineComparator implements LineComparer {
185         public static final int TRIM = 1, SKIP_SPACES = 2, SKIP_EMPTY = 4, SKIP_CVS_TAGS = 8;
186         StringIterator si1 = new StringIterator();
187         StringIterator si2 = new StringIterator();
188         int flags;
189 
SimpleLineComparator(int flags)190         public SimpleLineComparator(int flags) {
191             this.flags = flags;
192         }
193 
compare(String line1, String line2)194         public int compare(String line1, String line2) {
195             // first, see if we want to skip one or the other lines
196             int skipper = 0;
197             if (line1 == null) {
198                 skipper = SKIP_FIRST;
199             } else {
200                 if ((flags & TRIM) != 0) line1 = line1.trim();
201                 if ((flags & SKIP_EMPTY) != 0 && line1.length() == 0) skipper = SKIP_FIRST;
202             }
203             if (line2 == null) {
204                 skipper = SKIP_SECOND;
205             } else {
206                 if ((flags & TRIM) != 0) line2 = line2.trim();
207                 if ((flags & SKIP_EMPTY) != 0 && line2.length() == 0) skipper += SKIP_SECOND;
208             }
209             if (skipper != 0) {
210                 if (skipper == SKIP_FIRST + SKIP_SECOND) return LINES_SAME; // ok, don't skip both
211                 return skipper;
212             }
213 
214             // check for null
215             if (line1 == null) {
216                 if (line2 == null) return LINES_SAME;
217                 return LINES_DIFFERENT;
218             }
219             if (line2 == null) {
220                 return LINES_DIFFERENT;
221             }
222 
223             // now check equality
224             if (line1.equals(line2)) return LINES_SAME;
225 
226             // if not equal, see if we are skipping spaces
227             if ((flags & SKIP_CVS_TAGS) != 0) {
228                 if (line1.indexOf('$') >= 0 && line2.indexOf('$') >= 0) {
229                     line1 = stripTags(line1);
230                     line2 = stripTags(line2);
231                     if (line1.equals(line2)) return LINES_SAME;
232                 } else if (line1.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/")
233                     && line2.startsWith("<!DOCTYPE ldml SYSTEM \"../../common/dtd/")) {
234                     return LINES_SAME;
235                 }
236             }
237             if ((flags & SKIP_SPACES) != 0 && si1.set(line1).matches(si2.set(line2))) return LINES_SAME;
238             return LINES_DIFFERENT;
239         }
240 
241         // private Matcher dtdMatcher = PatternCache.get(
242         // "\\Q<!DOCTYPE ldml SYSTEM \"http://www.unicode.org/cldr/dtd/\\E.*\\Q/ldml.dtd\">\\E").matcher("");
243 
244         private String[] CVS_TAGS = { "Revision", "Date" };
245 
stripTags(String line)246         private String stripTags(String line) {
247             // $
248             // Revision: 8994 $
249             // $
250             // Date: 2013-07-03 21:31:17 +0200 (Wed, 03 Jul 2013) $
251             int pos = line.indexOf('$');
252             if (pos < 0) return line;
253             pos++;
254             int endpos = line.indexOf('$', pos);
255             if (endpos < 0) return line;
256             for (int i = 0; i < CVS_TAGS.length; ++i) {
257                 if (!line.startsWith(CVS_TAGS[i], pos)) continue;
258                 line = line.substring(0, pos + CVS_TAGS[i].length()) + line.substring(endpos);
259             }
260             return line;
261         }
262 
263     }
264 
265     /**
266      *
267      * @param file1
268      * @param file2
269      * @param failureLines
270      *            on input, String[2], on output, failing lines
271      * @param lineComparer
272      * @return
273      * @throws IOException
274      */
areFileIdentical(String file1, String file2, String[] failureLines, LineComparer lineComparer)275     public static boolean areFileIdentical(String file1, String file2, String[] failureLines,
276         LineComparer lineComparer) throws IOException {
277         try (BufferedReader br1 = new BufferedReader(new FileReader(file1), 32 * 1024);
278             BufferedReader br2 = new BufferedReader(new FileReader(file2), 32 * 1024);) {
279             String line1 = "";
280             String line2 = "";
281             int skip = 0;
282 
283             while (true) {
284                 if ((skip & LineComparer.SKIP_FIRST) == 0) line1 = br1.readLine();
285                 if ((skip & LineComparer.SKIP_SECOND) == 0) line2 = br2.readLine();
286                 if (line1 == null && line2 == null) return true;
287                 if (line1 == null || line2 == null) {
288                     // System.out.println("debug");
289                 }
290                 skip = lineComparer.compare(line1, line2);
291                 if (skip == LineComparer.LINES_DIFFERENT) {
292                     break;
293                 }
294             }
295             failureLines[0] = line1 != null ? line1 : "<end of file>";
296             failureLines[1] = line2 != null ? line2 : "<end of file>";
297             return false;
298         }
299     }
300 
301     /*
302      * static String getLineWithoutFluff(BufferedReader br1, boolean first, int flags) throws IOException {
303      * while (true) {
304      * String line1 = br1.readLine();
305      * if (line1 == null) return line1;
306      * if ((flags & TRIM)!= 0) line1 = line1.trim();
307      * if ((flags & SKIP_EMPTY)!= 0 && line1.length() == 0) continue;
308      * return line1;
309      * }
310      * }
311      */
312 
313     public final static class StringIterator {
314         String string;
315         int position = 0;
316 
next()317         char next() {
318             while (true) {
319                 if (position >= string.length()) return '\uFFFF';
320                 char ch = string.charAt(position++);
321                 if (ch != ' ' && ch != '\t') return ch;
322             }
323         }
324 
reset()325         StringIterator reset() {
326             position = 0;
327             return this;
328         }
329 
set(String string)330         StringIterator set(String string) {
331             this.string = string;
332             position = 0;
333             return this;
334         }
335 
matches(StringIterator other)336         boolean matches(StringIterator other) {
337             while (true) {
338                 char c1 = next();
339                 char c2 = other.next();
340                 if (c1 != c2) return false;
341                 if (c1 == '\uFFFF') return true;
342             }
343         }
344 
345         /**
346          * @return Returns the position.
347          */
getPosition()348         public int getPosition() {
349             return position;
350         }
351     }
352 
splitArray(String source, char separator)353     public static String[] splitArray(String source, char separator) {
354         return splitArray(source, separator, false);
355     }
356 
splitArray(String source, char separator, boolean trim)357     public static String[] splitArray(String source, char separator, boolean trim) {
358         List<String> piecesList = splitList(source, separator, trim);
359         String[] pieces = new String[piecesList.size()];
360         piecesList.toArray(pieces);
361         return pieces;
362     }
363 
splitCommaSeparated(String line)364     public static String[] splitCommaSeparated(String line) {
365         // items are separated by ','
366         // each item is of the form abc...
367         // or "..." (required if a comma or quote is contained)
368         // " in a field is represented by ""
369         List<String> result = new ArrayList<String>();
370         StringBuilder item = new StringBuilder();
371         boolean inQuote = false;
372         for (int i = 0; i < line.length(); ++i) {
373             char ch = line.charAt(i); // don't worry about supplementaries
374             switch (ch) {
375             case '"':
376                 inQuote = !inQuote;
377                 // at start or end, that's enough
378                 // if get a quote when we are not in a quote, and not at start, then add it and return to inQuote
379                 if (inQuote && item.length() != 0) {
380                     item.append('"');
381                     inQuote = true;
382                 }
383                 break;
384             case ',':
385                 if (!inQuote) {
386                     result.add(item.toString());
387                     item.setLength(0);
388                 } else {
389                     item.append(ch);
390                 }
391                 break;
392             default:
393                 item.append(ch);
394                 break;
395             }
396         }
397         result.add(item.toString());
398         return result.toArray(new String[result.size()]);
399     }
400 
splitList(String source, char separator)401     public static List<String> splitList(String source, char separator) {
402         return splitList(source, separator, false, null);
403     }
404 
splitList(String source, char separator, boolean trim)405     public static List<String> splitList(String source, char separator, boolean trim) {
406         return splitList(source, separator, trim, null);
407     }
408 
splitList(String source, char separator, boolean trim, List<String> output)409     public static List<String> splitList(String source, char separator, boolean trim, List<String> output) {
410         return splitList(source, Character.toString(separator), trim, output);
411     }
412 
splitList(String source, String separator)413     public static List<String> splitList(String source, String separator) {
414         return splitList(source, separator, false, null);
415     }
416 
splitList(String source, String separator, boolean trim)417     public static List<String> splitList(String source, String separator, boolean trim) {
418         return splitList(source, separator, trim, null);
419     }
420 
splitList(String source, String separator, boolean trim, List<String> output)421     public static List<String> splitList(String source, String separator, boolean trim, List<String> output) {
422         if (output == null) output = new ArrayList<String>();
423         if (source.length() == 0) return output;
424         int pos = 0;
425         do {
426             int npos = source.indexOf(separator, pos);
427             if (npos < 0) npos = source.length();
428             String piece = source.substring(pos, npos);
429             if (trim) piece = piece.trim();
430             output.add(piece);
431             pos = npos + 1;
432         } while (pos < source.length());
433         return output;
434     }
435 
436     /**
437      * Protect a collection (as much as Java lets us!) from modification.
438      * Really, really ugly code, since Java doesn't let us do better.
439      */
440     @SuppressWarnings({ "rawtypes", "unchecked" })
protectCollection(T source)441     public static <T> T protectCollection(T source) {
442         // TODO - exclude UnmodifiableMap, Set, ...
443         if (source instanceof Map) {
444             Map sourceMap = (Map) source;
445             Map resultMap = clone(sourceMap);
446             if (resultMap == null) return (T) sourceMap; // failed
447             resultMap.clear();
448             for (Object key : sourceMap.keySet()) {
449                 resultMap.put(protectCollection(key), protectCollection(sourceMap.get(key)));
450             }
451             return resultMap instanceof SortedMap ? (T) Collections.unmodifiableSortedMap((SortedMap) resultMap)
452                 : (T) Collections.unmodifiableMap(resultMap);
453         } else if (source instanceof Collection) {
454             Collection sourceCollection = (Collection) source;
455             Collection<Object> resultCollection = clone(sourceCollection);
456             if (resultCollection == null) return (T) sourceCollection; // failed
457             resultCollection.clear();
458 
459             for (Object item : sourceCollection) {
460                 resultCollection.add(protectCollection(item));
461             }
462 
463             return sourceCollection instanceof List ? (T) Collections.unmodifiableList((List) sourceCollection)
464                 : sourceCollection instanceof SortedSet ? (T) Collections
465                     .unmodifiableSortedSet((SortedSet) sourceCollection)
466                     : sourceCollection instanceof Set ? (T) Collections.unmodifiableSet((Set) sourceCollection)
467                         : (T) Collections.unmodifiableCollection(sourceCollection);
468         } else if (source instanceof Freezable) {
469             Freezable freezableSource = (Freezable) source;
470             if (freezableSource.isFrozen()) return source;
471             return (T) ((Freezable) (freezableSource.cloneAsThawed())).freeze();
472         } else {
473             return source; // can't protect
474         }
475     }
476 
477     /**
478      * Protect a collections where we don't need to clone.
479      * @param source
480      * @return
481      */
482     @SuppressWarnings({ "rawtypes", "unchecked" })
protectCollectionX(T source)483     public static <T> T protectCollectionX(T source) {
484         // TODO - exclude UnmodifiableMap, Set, ...
485         if (isImmutable(source)) {
486             return source;
487         }
488         if (source instanceof Map) {
489             Map sourceMap = (Map) source;
490             // recurse
491             LinkedHashMap tempMap = new LinkedHashMap<>(sourceMap); // copy contents
492             sourceMap.clear();
493             for (Object key : tempMap.keySet()) {
494                 sourceMap.put(protectCollection(key), protectCollectionX(tempMap.get(key)));
495             }
496             return sourceMap instanceof SortedMap ? (T) Collections.unmodifiableSortedMap((SortedMap) sourceMap)
497                 : (T) Collections.unmodifiableMap(sourceMap);
498         } else if (source instanceof Collection) {
499             Collection sourceCollection = (Collection) source;
500             LinkedHashSet tempSet = new LinkedHashSet<>(sourceCollection); // copy contents
501 
502             sourceCollection.clear();
503             for (Object item : tempSet) {
504                 sourceCollection.add(protectCollectionX(item));
505             }
506 
507             return sourceCollection instanceof List ? (T) Collections.unmodifiableList((List) sourceCollection)
508                 : sourceCollection instanceof SortedSet ? (T) Collections
509                     .unmodifiableSortedSet((SortedSet) sourceCollection)
510                     : sourceCollection instanceof Set ? (T) Collections.unmodifiableSet((Set) sourceCollection)
511                         : (T) Collections.unmodifiableCollection(sourceCollection);
512         } else if (source instanceof Freezable) {
513             Freezable freezableSource = (Freezable) source;
514             return (T) freezableSource.freeze();
515         } else {
516             throw new IllegalArgumentException("Can’t protect: " + source.getClass().toString());
517         }
518     }
519 
520     private static final Set<Object> KNOWN_IMMUTABLES = new HashSet<Object>(Arrays.asList(
521         String.class));
522 
isImmutable(Object source)523     public static boolean isImmutable(Object source) {
524         return source == null
525             || source instanceof Enum
526             || source instanceof Number
527             || KNOWN_IMMUTABLES.contains(source.getClass());
528     }
529 
530     /**
531      * Clones T if we can; otherwise returns null.
532      *
533      * @param <T>
534      * @param source
535      * @return
536      */
537     @SuppressWarnings("unchecked")
clone(T source)538     private static <T> T clone(T source) {
539         final Class<? extends Object> class1 = source.getClass();
540         try {
541             final Method declaredMethod = class1.getDeclaredMethod("clone", (Class<?>) null);
542             return (T) declaredMethod.invoke(source, (Object) null);
543         } catch (Exception e) {
544         }
545         try {
546             final Constructor<? extends Object> declaredMethod = class1.getConstructor((Class<?>) null);
547             return (T) declaredMethod.newInstance((Object) null);
548         } catch (Exception e) {
549         }
550         return null; // uncloneable
551     }
552 
553     /**
554      * Appends two strings, inserting separator if either is empty
555      */
joinWithSeparation(String a, String separator, String b)556     public static String joinWithSeparation(String a, String separator, String b) {
557         if (a.length() == 0) return b;
558         if (b.length() == 0) return a;
559         return a + separator + b;
560     }
561 
562     /**
563      * Appends two strings, inserting separator if either is empty. Modifies first map
564      */
joinWithSeparation(Map<String, String> a, String separator, Map<String, String> b)565     public static Map<String, String> joinWithSeparation(Map<String, String> a, String separator, Map<String, String> b) {
566         for (Iterator<String> it = b.keySet().iterator(); it.hasNext();) {
567             String key = it.next();
568             String bvalue = b.get(key);
569             String avalue = a.get(key);
570             if (avalue != null) {
571                 if (avalue.trim().equals(bvalue.trim())) continue;
572                 bvalue = joinWithSeparation(avalue, separator, bvalue);
573             }
574             a.put(key, bvalue);
575         }
576         return a;
577     }
578 
join(Collection<T> c, String separator)579     public static <T> String join(Collection<T> c, String separator) {
580         return join(c, separator, null);
581     }
582 
join(Object[] c, String separator)583     public static String join(Object[] c, String separator) {
584         return join(c, separator, null);
585     }
586 
join(Collection<T> c, String separator, Transform<T, String> transform)587     public static <T> String join(Collection<T> c, String separator, Transform<T, String> transform) {
588         StringBuffer output = new StringBuffer();
589         boolean isFirst = true;
590         for (T item : c) {
591             if (isFirst) {
592                 isFirst = false;
593             } else {
594                 output.append(separator);
595             }
596             output.append(transform != null ? transform.transform(item) : item == null ? item : item.toString());
597         }
598         return output.toString();
599     }
600 
join(T[] c, String separator, Transform<T, String> transform)601     public static <T> String join(T[] c, String separator, Transform<T, String> transform) {
602         return join(Arrays.asList(c), separator, transform);
603     }
604 
605     /**
606      * Utility like Arrays.asList()
607      */
608     @SuppressWarnings("unchecked")
asMap(Object[][] source, Map<K, V> target, boolean reverse)609     public static <K, V> Map<K, V> asMap(Object[][] source, Map<K, V> target, boolean reverse) {
610         int from = 0, to = 1;
611         if (reverse) {
612             from = 1;
613             to = 0;
614         }
615         for (int i = 0; i < source.length; ++i) {
616             if (source[i].length != 2) {
617                 throw new IllegalArgumentException("Source must be array of pairs of strings: "
618                     + Arrays.asList(source[i]));
619             }
620             target.put((K) source[i][from], (V) source[i][to]);
621         }
622         return target;
623     }
624 
asMap(Object[][] source)625     public static <K, V> Map<K, V> asMap(Object[][] source) {
626         return asMap(source, new HashMap<K, V>(), false);
627     }
628 
629     /**
630      * Returns the canonical name for a file.
631      */
getCanonicalName(String file)632     public static String getCanonicalName(String file) {
633         try {
634             return new File(file).getCanonicalPath();
635         } catch (Exception e) {
636             return file;
637         }
638     }
639 
640     /**
641      * Convert a UnicodeSet into a string that can be embedded into a Regex. Handles strings that are in the UnicodeSet,
642      * Supplementary ranges, and escaping
643      *
644      * @param source
645      *            The source set
646      * @param escaper
647      *            A transliterator that is used to escape the characters according to the requirements of the regex.
648      * @return
649      */
toRegex(UnicodeSet source)650     public static String toRegex(UnicodeSet source) {
651         return toRegex(source, null, false);
652     }
653 
654     private static final Transliterator DEFAULT_REGEX_ESCAPER = Transliterator.createFromRules(
655         "foo",
656         "([ \\- \\\\ \\[ \\] ]) > '\\' $1 ;"
657             // + " ([:c:]) > &hex($1);"
658             + " ([[:control:][[:z:]&[:ascii:]]]) > &hex($1);",
659         Transliterator.FORWARD);
660 
661     /**
662      * Convert a UnicodeSet into a string that can be embedded into a Regex.
663      * Handles strings that are in the UnicodeSet, Supplementary ranges, and
664      * escaping
665      *
666      * @param source
667      *            The source set
668      * @param escaper
669      *            A transliterator that is used to escape the characters according
670      *            to the requirements of the regex. The default puts a \\ before [, -,
671      *            \, and ], and converts controls and Ascii whitespace to hex.
672      *            Alternatives can be supplied. Note that some Regex engines,
673      *            including Java 1.5, don't really deal with escaped supplementaries
674      *            well.
675      * @param onlyBmp
676      *            Set to true if the Regex only accepts BMP characters. In that
677      *            case, ranges of supplementary characters are converted to lists of
678      *            ranges. For example, [\uFFF0-\U0010000F \U0010100F-\U0010300F]
679      *            converts into:
680      *
681      *            <pre>
682      *          [\uD800][\uDC00-\uDFFF]
683      *          [\uD801-\uDBBF][\uDC00-\uDFFF]
684      *          [\uDBC0][\uDC00-\uDC0F]
685      * </pre>
686      *
687      *            and
688      *
689      *            <pre>
690      *          [\uDBC4][\uDC0F-\uDFFF]
691      *          [\uDBC5-\uDBCB][\uDC00-\uDFFF]
692      *          [\uDBCC][\uDC00-\uDC0F]
693      * </pre>
694      *
695      *            These are then coalesced into a list of alternatives by sharing
696      *            parts where feasible. For example, the above turns into 3 pairs of ranges:
697      *
698      *            <pre>
699      *          [\uDBC0\uDBCC][\uDC00-\uDC0F]|\uDBC4[\uDC0F-\uDFFF]|[\uD800-\uDBBF\uDBC5-\uDBCB][\uDC00-\uDFFF]
700      * </pre>
701      *
702      * @return escaped string. Something like [a-z] or (?:[a-m]|{zh}) if there is
703      *         a string zh in the set, or a more complicated case for
704      *         supplementaries. <br>
705      *         Special cases: [] returns "", single item returns a string
706      *         (escaped), like [a] => "a", or [{abc}] => "abc"<br>
707      *         Supplementaries are handled specially, as described under onlyBmp.
708      */
toRegex(UnicodeSet source, Transliterator escaper, boolean onlyBmp)709     public static String toRegex(UnicodeSet source, Transliterator escaper, boolean onlyBmp) {
710         if (escaper == null) {
711             escaper = DEFAULT_REGEX_ESCAPER;
712         }
713         UnicodeSetIterator it = new UnicodeSetIterator(source);
714         // if there is only one item, return it
715         if (source.size() == 0) {
716             return "";
717         }
718         if (source.size() == 1) {
719             it.next();
720             return escaper.transliterate(it.getString());
721         }
722         // otherwise, we figure out what is in the set, and will return
723         StringBuilder base = new StringBuilder("[");
724         StringBuilder alternates = new StringBuilder();
725         Map<UnicodeSet, UnicodeSet> lastToFirst = new TreeMap<UnicodeSet, UnicodeSet>(new UnicodeSetComparator());
726         int alternateCount = 0;
727         while (it.nextRange()) {
728             if (it.codepoint == UnicodeSetIterator.IS_STRING) {
729                 ++alternateCount;
730                 alternates.append('|').append(escaper.transliterate(it.string));
731             } else if (!onlyBmp || it.codepointEnd <= 0xFFFF) { // BMP
732                 addBmpRange(it.codepoint, it.codepointEnd, escaper, base);
733             } else { // supplementary
734                 if (it.codepoint <= 0xFFFF) {
735                     addBmpRange(it.codepoint, 0xFFFF, escaper, base);
736                     it.codepoint = 0x10000; // reset the range
737                 }
738                 // this gets a bit ugly; we are trying to minimize the extra ranges for supplementaries
739                 // we do this by breaking up X-Y based on the Lead and Trail values for X and Y
740                 // Lx [Tx - Ty]) (if Lx == Ly)
741                 // Lx [Tx - DFFF] | Ly [DC00-Ty] (if Lx == Ly - 1)
742                 // Lx [Tx - DFFF] | [Lx+1 - Ly-1][DC00-DFFF] | Ly [DC00-Ty] (otherwise)
743                 int leadX = UTF16.getLeadSurrogate(it.codepoint);
744                 int trailX = UTF16.getTrailSurrogate(it.codepoint);
745                 int leadY = UTF16.getLeadSurrogate(it.codepointEnd);
746                 int trailY = UTF16.getTrailSurrogate(it.codepointEnd);
747                 if (leadX == leadY) {
748                     addSupplementalRange(leadX, leadX, trailX, trailY, escaper, lastToFirst);
749                 } else {
750                     addSupplementalRange(leadX, leadX, trailX, 0xDFFF, escaper, lastToFirst);
751                     if (leadX != leadY - 1) {
752                         addSupplementalRange(leadX + 1, leadY - 1, 0xDC00, 0xDFFF, escaper, lastToFirst);
753                     }
754                     addSupplementalRange(leadY, leadY, 0xDC00, trailY, escaper, lastToFirst);
755                 }
756             }
757         }
758         // add in the supplementary ranges
759         if (lastToFirst.size() != 0) {
760             for (UnicodeSet last : lastToFirst.keySet()) {
761                 ++alternateCount;
762                 alternates.append('|').append(toRegex(lastToFirst.get(last), escaper, onlyBmp))
763                     .append(toRegex(last, escaper, onlyBmp));
764             }
765         }
766         // Return the output. We separate cases in order to get the minimal extra apparatus
767         base.append("]");
768         if (alternateCount == 0) {
769             return base.toString();
770         } else if (base.length() > 2) {
771             return "(?:" + base + "|" + alternates.substring(1) + ")";
772         } else if (alternateCount == 1) {
773             return alternates.substring(1);
774         } else {
775             return "(?:" + alternates.substring(1) + ")";
776         }
777     }
778 
addSupplementalRange(int leadX, int leadY, int trailX, int trailY, Transliterator escaper, Map<UnicodeSet, UnicodeSet> lastToFirst)779     private static void addSupplementalRange(int leadX, int leadY, int trailX, int trailY, Transliterator escaper,
780         Map<UnicodeSet, UnicodeSet> lastToFirst) {
781         System.out.println("\tadding: " + new UnicodeSet(leadX, leadY) + "\t" + new UnicodeSet(trailX, trailY));
782         UnicodeSet last = new UnicodeSet(trailX, trailY);
783         UnicodeSet first = lastToFirst.get(last);
784         if (first == null) {
785             lastToFirst.put(last, first = new UnicodeSet());
786         }
787         first.add(leadX, leadY);
788     }
789 
addBmpRange(int start, int limit, Transliterator escaper, StringBuilder base)790     private static void addBmpRange(int start, int limit, Transliterator escaper, StringBuilder base) {
791         base.append(escaper.transliterate(UTF16.valueOf(start)));
792         if (start != limit) {
793             base.append("-").append(escaper.transliterate(UTF16.valueOf(limit)));
794         }
795     }
796 
797     public static class UnicodeSetComparator implements Comparator<UnicodeSet> {
compare(UnicodeSet o1, UnicodeSet o2)798         public int compare(UnicodeSet o1, UnicodeSet o2) {
799             return o1.compareTo(o2);
800         }
801     }
802 
803     public static class CollectionComparator<T extends Comparable<T>> implements Comparator<Collection<T>> {
compare(Collection<T> o1, Collection<T> o2)804         public int compare(Collection<T> o1, Collection<T> o2) {
805             return UnicodeSet.compare(o1, o2, UnicodeSet.ComparisonStyle.SHORTER_FIRST);
806         }
807     }
808 
809     public static class ComparableComparator<T extends Comparable<T>> implements Comparator<T> {
compare(T arg0, T arg1)810         public int compare(T arg0, T arg1) {
811             return Utility.checkCompare(arg0, arg1);
812         }
813     }
814 
815     @SuppressWarnings({ "rawtypes", "unchecked" })
addTreeMapChain(Map coverageData, Object... objects)816     public static void addTreeMapChain(Map coverageData, Object... objects) {
817         Map<Object, Object> base = coverageData;
818         for (int i = 0; i < objects.length - 2; ++i) {
819             Map<Object, Object> nextOne = (Map<Object, Object>) base.get(objects[i]);
820             if (nextOne == null) base.put(objects[i], nextOne = new TreeMap<Object, Object>());
821             base = nextOne;
822         }
823         base.put(objects[objects.length - 2], objects[objects.length - 1]);
824     }
825 
826     public static abstract class CollectionTransform<S, T> implements Transform<S, T> {
transform(S source)827         public abstract T transform(S source);
828 
transform(Collection<S> input, Collection<T> output)829         public Collection<T> transform(Collection<S> input, Collection<T> output) {
830             return CldrUtility.transform(input, this, output);
831         }
832 
transform(Collection<S> input)833         public Collection<T> transform(Collection<S> input) {
834             return transform(input, new ArrayList<T>());
835         }
836     }
837 
transform(SC source, Transform<S, T> transform, TC target)838     public static <S, T, SC extends Collection<S>, TC extends Collection<T>> TC transform(SC source, Transform<S, T> transform, TC target) {
839         for (S sourceItem : source) {
840             T targetItem = transform.transform(sourceItem);
841             if (targetItem != null) {
842                 target.add(targetItem);
843             }
844         }
845         return target;
846     }
847 
transform( SM source, Transform<SK, TK> transformKey, Transform<SV, TV> transformValue, TM target)848     public static <SK, SV, TK, TV, SM extends Map<SK, SV>, TM extends Map<TK, TV>> TM transform(
849         SM source, Transform<SK, TK> transformKey, Transform<SV, TV> transformValue, TM target) {
850         for (Entry<SK, SV> sourceEntry : source.entrySet()) {
851             TK targetKey = transformKey.transform(sourceEntry.getKey());
852             TV targetValue = transformValue.transform(sourceEntry.getValue());
853             if (targetKey != null && targetValue != null) {
854                 target.put(targetKey, targetValue);
855             }
856         }
857         return target;
858     }
859 
860     public static abstract class Apply<T> {
apply(T item)861         public abstract void apply(T item);
862 
applyTo(U collection)863         public <U extends Collection<T>> void applyTo(U collection) {
864             for (T item : collection) {
865                 apply(item);
866             }
867         }
868     }
869 
870     public static abstract class Filter<T> {
871 
contains(T item)872         public abstract boolean contains(T item);
873 
retainAll(U c)874         public <U extends Collection<T>> U retainAll(U c) {
875             for (Iterator<T> it = c.iterator(); it.hasNext();) {
876                 if (!contains(it.next())) it.remove();
877             }
878             return c;
879         }
880 
extractMatches(U c, U target)881         public <U extends Collection<T>> U extractMatches(U c, U target) {
882             for (Iterator<T> it = c.iterator(); it.hasNext();) {
883                 T item = it.next();
884                 if (contains(item)) {
885                     target.add(item);
886                 }
887             }
888             return target;
889         }
890 
removeAll(U c)891         public <U extends Collection<T>> U removeAll(U c) {
892             for (Iterator<T> it = c.iterator(); it.hasNext();) {
893                 if (contains(it.next())) it.remove();
894             }
895             return c;
896         }
897 
extractNonMatches(U c, U target)898         public <U extends Collection<T>> U extractNonMatches(U c, U target) {
899             for (Iterator<T> it = c.iterator(); it.hasNext();) {
900                 T item = it.next();
901                 if (!contains(item)) {
902                     target.add(item);
903                 }
904             }
905             return target;
906         }
907     }
908 
909     public static class MatcherFilter<T> extends Filter<T> {
910         private Matcher matcher;
911 
MatcherFilter(String pattern)912         public MatcherFilter(String pattern) {
913             this.matcher = PatternCache.get(pattern).matcher("");
914         }
915 
MatcherFilter(Matcher matcher)916         public MatcherFilter(Matcher matcher) {
917             this.matcher = matcher;
918         }
919 
set(Matcher matcher)920         public MatcherFilter<T> set(Matcher matcher) {
921             this.matcher = matcher;
922             return this;
923         }
924 
set(String pattern)925         public MatcherFilter<T> set(String pattern) {
926             this.matcher = PatternCache.get(pattern).matcher("");
927             return this;
928         }
929 
contains(T o)930         public boolean contains(T o) {
931             return matcher.reset(o.toString()).matches();
932         }
933     }
934 
935     // static final class HandlingTransform implements Transform<String, Handling> {
936     // @Override
937     // public Handling transform(String source) {
938     // return Handling.valueOf(source);
939     // }
940     // }
941 
942     public static final class PairComparator<K extends Comparable<K>, V extends Comparable<V>> implements java.util.Comparator<Pair<K, V>> {
943 
944         private Comparator<K> comp1;
945         private Comparator<V> comp2;
946 
PairComparator(Comparator<K> comp1, Comparator<V> comp2)947         public PairComparator(Comparator<K> comp1, Comparator<V> comp2) {
948             this.comp1 = comp1;
949             this.comp2 = comp2;
950         }
951 
952         @Override
compare(Pair<K, V> o1, Pair<K, V> o2)953         public int compare(Pair<K, V> o1, Pair<K, V> o2) {
954             {
955                 K o1First = o1.getFirst();
956                 K o2First = o2.getFirst();
957                 int diff = o1First == null ? (o2First == null ? 0 : -1)
958                     : o2First == null ? 1
959                         : comp1 == null ? o1First.compareTo(o2First)
960                             : comp1.compare(o1First, o2First);
961                 if (diff != 0) {
962                     return diff;
963                 }
964             }
965             V o1Second = o1.getSecond();
966             V o2Second = o2.getSecond();
967             return o1Second == null ? (o2Second == null ? 0 : -1)
968                 : o2Second == null ? 1
969                     : comp2 == null ? o1Second.compareTo(o2Second)
970                         : comp2.compare(o1Second, o2Second);
971         }
972 
973     }
974 
975     /**
976      * Fetch data from jar
977      *
978      * @param name
979      *            a name residing in the org/unicode/cldr/util/data/ directory, or loading from a jar will break.
980      */
getUTF8Data(String name)981     public static BufferedReader getUTF8Data(String name) {
982         if (new File(name).isAbsolute()) {
983             throw new IllegalArgumentException(
984                 "Path must be relative to org/unicode/cldr/util/data  such as 'file.txt' or 'casing/file.txt', but got '"
985                     + name + "'.");
986         }
987 
988         return FileReaders.openFile(CldrUtility.class, "data/" + name);
989     }
990 
991     /**
992      * Fetch data from jar
993      *
994      * @param name
995      *            a name residing in the org/unicode/cldr/util/data/ directory, or loading from a jar will break.
996      */
getInputStream(String name)997     public static InputStream getInputStream(String name) {
998         if (new File(name).isAbsolute()) {
999             throw new IllegalArgumentException(
1000                 "Path must be relative to org/unicode/cldr/util/data  such as 'file.txt' or 'casing/file.txt', but got '"
1001                     + name + "'.");
1002         }
1003         return getInputStream(CldrUtility.class, "data/" + name);
1004     }
1005 
1006     @SuppressWarnings("resource")
getInputStream(Class<?> callingClass, String relativePath)1007     public static InputStream getInputStream(Class<?> callingClass, String relativePath) {
1008         InputStream is = callingClass.getResourceAsStream(relativePath);
1009         // add buffering
1010         return InputStreamFactory.buffer(is);
1011     }
1012 
1013     /**
1014      * Takes a Map that goes from Object to Set, and fills in the transpose
1015      *
1016      * @param source_key_valueSet
1017      * @param output_value_key
1018      */
putAllTransposed(Map<Object, Set<Object>> source_key_valueSet, Map<Object, Object> output_value_key)1019     public static void putAllTransposed(Map<Object, Set<Object>> source_key_valueSet, Map<Object, Object> output_value_key) {
1020         for (Iterator<Object> it = source_key_valueSet.keySet().iterator(); it.hasNext();) {
1021             Object key = it.next();
1022             Set<Object> values = source_key_valueSet.get(key);
1023             for (Iterator<Object> it2 = values.iterator(); it2.hasNext();) {
1024                 Object value = it2.next();
1025                 output_value_key.put(value, key);
1026             }
1027         }
1028     }
1029 
countInstances(String source, String substring)1030     public static int countInstances(String source, String substring) {
1031         int count = 0;
1032         int pos = 0;
1033         while (true) {
1034             pos = source.indexOf(substring, pos) + 1;
1035             if (pos <= 0) break;
1036             count++;
1037         }
1038         return count;
1039     }
1040 
registerTransliteratorFromFile(String id, String dir, String filename)1041     public static void registerTransliteratorFromFile(String id, String dir, String filename) {
1042         registerTransliteratorFromFile(id, dir, filename, Transliterator.FORWARD, true);
1043         registerTransliteratorFromFile(id, dir, filename, Transliterator.REVERSE, true);
1044     }
1045 
registerTransliteratorFromFile(String id, String dir, String filename, int direction, boolean reverseID)1046     public static void registerTransliteratorFromFile(String id, String dir, String filename, int direction,
1047         boolean reverseID) {
1048         if (filename == null) {
1049             filename = id.replace('-', '_');
1050             filename = filename.replace('/', '_');
1051             filename += ".txt";
1052         }
1053         String rules = getText(dir, filename);
1054         Transliterator t;
1055         int pos = id.indexOf('-');
1056         String rid;
1057         if (pos < 0) {
1058             rid = id + "-Any";
1059             id = "Any-" + id;
1060         } else {
1061             rid = id.substring(pos + 1) + "-" + id.substring(0, pos);
1062         }
1063         if (!reverseID) rid = id;
1064 
1065         if (direction == Transliterator.FORWARD) {
1066             Transliterator.unregister(id);
1067             t = Transliterator.createFromRules(id, rules, Transliterator.FORWARD);
1068             Transliterator.registerInstance(t);
1069             System.out.println("Registered new Transliterator: " + id);
1070         }
1071 
1072         /*
1073          * String test = "\u049A\u0430\u0437\u0430\u049B";
1074          * System.out.println(t.transliterate(test));
1075          * t = Transliterator.getInstance(id);
1076          * System.out.println(t.transliterate(test));
1077          */
1078 
1079         if (direction == Transliterator.REVERSE) {
1080             Transliterator.unregister(rid);
1081             t = Transliterator.createFromRules(rid, rules, Transliterator.REVERSE);
1082             Transliterator.registerInstance(t);
1083             System.out.println("Registered new Transliterator: " + rid);
1084         }
1085     }
1086 
getText(String dir, String filename)1087     public static String getText(String dir, String filename) {
1088         try {
1089             BufferedReader br = FileUtilities.openUTF8Reader(dir, filename);
1090             StringBuffer buffer = new StringBuffer();
1091             while (true) {
1092                 String line = br.readLine();
1093                 if (line == null) break;
1094                 if (line.length() > 0 && line.charAt(0) == '\uFEFF') line = line.substring(1);
1095                 if (line.startsWith("//")) continue;
1096                 buffer.append(line).append(CldrUtility.LINE_SEPARATOR);
1097             }
1098             br.close();
1099             String rules = buffer.toString();
1100             return rules;
1101         } catch (IOException e) {
1102             throw (IllegalArgumentException) new IllegalArgumentException("Can't open " + dir + ", " + filename)
1103                 .initCause(e);
1104         }
1105     }
1106 
callMethod(String methodNames, Class<?> cls)1107     public static void callMethod(String methodNames, Class<?> cls) {
1108         for (String methodName : methodNames.split(",")) {
1109             try {
1110                 Method method;
1111                 try {
1112                     method = cls.getMethod(methodName, (Class[]) null);
1113                     try {
1114                         method.invoke(null, (Object[]) null);
1115                     } catch (Exception e) {
1116                         e.printStackTrace();
1117                     }
1118                 } catch (Exception e) {
1119                     System.out.println("No such method: " + methodName);
1120                     showMethods(cls);
1121                 }
1122             } catch (ClassNotFoundException e) {
1123                 e.printStackTrace();
1124             }
1125         }
1126     }
1127 
showMethods(Class<?> cls)1128     public static void showMethods(Class<?> cls) throws ClassNotFoundException {
1129         System.out.println("Possible methods of " + cls.getCanonicalName() + " are: ");
1130         Method[] methods = cls.getMethods();
1131         Set<String> names = new TreeSet<String>();
1132         for (int i = 0; i < methods.length; ++i) {
1133             if (methods[i].getGenericParameterTypes().length != 0) continue;
1134             //int mods = methods[i].getModifiers();
1135             // if (!Modifier.isStatic(mods)) continue;
1136             String name = methods[i].getName();
1137             names.add(name);
1138         }
1139         for (Iterator<String> it = names.iterator(); it.hasNext();) {
1140             System.out.println("\t" + it.next());
1141         }
1142     }
1143 
1144     /**
1145      * Breaks lines if they are too long, or if matcher.group(1) != last. Only breaks just before matcher.
1146      *
1147      * @param input
1148      * @param separator
1149      * @param matcher
1150      *            must match each possible item. The first group is significant; if different, will cause break
1151      * @return
1152      */
breakLines(CharSequence input, String separator, Matcher matcher, int width)1153     static public String breakLines(CharSequence input, String separator, Matcher matcher, int width) {
1154         StringBuffer output = new StringBuffer();
1155         String lastPrefix = "";
1156         int lastEnd = 0;
1157         int lastBreakPos = 0;
1158         matcher.reset(input);
1159         while (true) {
1160             boolean match = matcher.find();
1161             if (!match) {
1162                 output.append(input.subSequence(lastEnd, input.length()));
1163                 break;
1164             }
1165             String prefix = matcher.group(1);
1166             if (!prefix.equalsIgnoreCase(lastPrefix) || matcher.end() - lastBreakPos > width) { // break before?
1167                 output.append(separator);
1168                 lastBreakPos = lastEnd;
1169             } else if (lastEnd != 0) {
1170                 output.append(' ');
1171             }
1172             output.append(input.subSequence(lastEnd, matcher.end()).toString().trim());
1173             lastEnd = matcher.end();
1174             lastPrefix = prefix;
1175         }
1176         return output.toString();
1177     }
1178 
showOptions(String[] args)1179     public static void showOptions(String[] args) {
1180         // Properties props = System.getProperties();
1181         System.out.println("Arguments: " + join(args, " ")); // + (props == null ? "" : " " + props));
1182     }
1183 
roundToDecimals(double input, int places)1184     public static double roundToDecimals(double input, int places) {
1185         double log10 = Math.log10(input); // 15000 => 4.xxx
1186         double intLog10 = Math.floor(log10);
1187         double scale = Math.pow(10, intLog10 - places + 1);
1188         double factored = Math.round(input / scale) * scale;
1189         // System.out.println("###\t" +input + "\t" + factored);
1190         return factored;
1191     }
1192 
1193     /**
1194      * Get a property value, returning the value if there is one (eg -Dkey=value),
1195      * otherwise the default value (for either empty or null).
1196      *
1197      * @param key
1198      * @param valueIfNull
1199      * @param valueIfEmpty
1200      * @return
1201      */
getProperty(String key, String defaultValue)1202     public static String getProperty(String key, String defaultValue) {
1203         return getProperty(key, defaultValue, defaultValue);
1204     }
1205 
1206     /**
1207      * Get a property value, returning the value if there is one, otherwise null.
1208      */
getProperty(String key)1209     public static String getProperty(String key) {
1210         return getProperty(key, null, null);
1211     }
1212 
1213     /**
1214      * Get a property value, returning the value if there is one (eg -Dkey=value),
1215      * the valueIfEmpty if there is one with no value (eg -Dkey) and the valueIfNull
1216      * if there is no property.
1217      *
1218      * @param key
1219      * @param valueIfNull
1220      * @param valueIfEmpty
1221      * @return
1222      */
getProperty(String key, String valueIfNull, String valueIfEmpty)1223     public static String getProperty(String key, String valueIfNull, String valueIfEmpty) {
1224         String result = CLDRConfig.getInstance().getProperty(key);
1225         if (result == null) {
1226             result = valueIfNull;
1227         } else if (result.length() == 0) {
1228             result = valueIfEmpty;
1229         }
1230         return result;
1231     }
1232 
hex(byte[] bytes, int start, int end, String separator)1233     public static String hex(byte[] bytes, int start, int end, String separator) {
1234         StringBuilder result = new StringBuilder();
1235         for (int i = 0; i < end; ++i) {
1236             if (result.length() != 0) {
1237                 result.append(separator);
1238             }
1239             result.append(Utility.hex(bytes[i] & 0xFF, 2));
1240         }
1241         return result.toString();
1242     }
1243 
getProperty(String string, boolean b)1244     public static boolean getProperty(String string, boolean b) {
1245         return getProperty(string, b ? "true" : "false", "true").matches("(?i)T|TRUE");
1246     }
1247 
checkValidDirectory(String sourceDirectory)1248     public static String checkValidDirectory(String sourceDirectory) {
1249         return checkValidFile(sourceDirectory, true, null);
1250     }
1251 
checkValidDirectory(String sourceDirectory, String correction)1252     public static String checkValidDirectory(String sourceDirectory, String correction) {
1253         return checkValidFile(sourceDirectory, true, correction);
1254     }
1255 
checkValidFile(String sourceDirectory, boolean checkForDirectory, String correction)1256     public static String checkValidFile(String sourceDirectory, boolean checkForDirectory, String correction) {
1257         File file = null;
1258         String canonicalPath = null;
1259         try {
1260             file = new File(sourceDirectory);
1261             canonicalPath = file.getCanonicalPath() + File.separatorChar;
1262         } catch (Exception e) {
1263         }
1264         if (file == null || canonicalPath == null || checkForDirectory && !file.isDirectory()) {
1265             throw new RuntimeException("Directory not found: " + sourceDirectory
1266                 + (canonicalPath == null ? "" : " => " + canonicalPath)
1267                 + (correction == null ? "" : CldrUtility.LINE_SEPARATOR + correction));
1268         }
1269         return canonicalPath;
1270     }
1271 
1272     /**
1273      * Copy up to matching line (not included). If output is null, then just skip until.
1274      *
1275      * @param oldFile
1276      *            file to copy
1277      * @param readUntilPattern
1278      *            pattern to search for. If null, goes to end of file.
1279      * @param output
1280      *            into to copy into. If null, just skips in the input.
1281      * @param includeMatchingLine
1282      *            inclde the matching line when copying.
1283      * @throws IOException
1284      */
copyUpTo(BufferedReader oldFile, final Pattern readUntilPattern, final PrintWriter output, boolean includeMatchingLine)1285     public static void copyUpTo(BufferedReader oldFile, final Pattern readUntilPattern,
1286         final PrintWriter output, boolean includeMatchingLine) throws IOException {
1287         Matcher readUntil = readUntilPattern == null ? null : readUntilPattern.matcher("");
1288         while (true) {
1289             String line = oldFile.readLine();
1290             if (line == null) {
1291                 break;
1292             }
1293             if (line.startsWith("\uFEFF")) {
1294                 line = line.substring(1);
1295             }
1296             if (readUntil != null && readUntil.reset(line).matches()) {
1297                 if (includeMatchingLine && output != null) {
1298                     output.println(line);
1299                 }
1300                 break;
1301             }
1302             if (output != null) {
1303                 output.println(line);
1304             }
1305         }
1306     }
1307 
1308     private static DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss 'GMT'");
1309     private static DateFormat DATE_ONLY = new SimpleDateFormat("yyyy-MM-dd");
1310     static {
1311         df.setTimeZone(TimeZone.getTimeZone("GMT"));
1312         DATE_ONLY.setTimeZone(TimeZone.getTimeZone("GMT"));
1313     }
1314 
isoFormat(Date date)1315     public static String isoFormat(Date date) {
1316         synchronized (df) {
1317             return df.format(date);
1318         }
1319     }
1320 
isoFormatDateOnly(Date date)1321     public static String isoFormatDateOnly(Date date) {
1322         synchronized (DATE_ONLY) {
1323             return DATE_ONLY.format(date);
1324         }
1325     }
1326 
newConcurrentHashMap()1327     public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap() {
1328         // http://ria101.wordpress.com/2011/12/12/concurrenthashmap-avoid-a-common-misuse/
1329         return new ConcurrentHashMap<K, V>(4, 0.9f, 1);
1330     }
1331 
newConcurrentHashMap(Map<K, V> source)1332     public static <K, V> ConcurrentHashMap<K, V> newConcurrentHashMap(Map<K, V> source) {
1333         ConcurrentHashMap<K, V> result = newConcurrentHashMap();
1334         result.putAll(source);
1335         return result;
1336     }
1337 
equals(Object a, Object b)1338     public static boolean equals(Object a, Object b) {
1339         return a == b ? true
1340             : a == null || b == null ? false
1341                 : a.equals(b);
1342     }
1343 
getDoubleLink(String code)1344     public static String getDoubleLink(String code) {
1345         final String anchorSafe = TransliteratorUtilities.toHTML.transliterate(code).replace(" ", "_");
1346         return "<a name='" + anchorSafe + "' href='#" + anchorSafe + "'>";
1347     }
1348 
getDoubleLinkedText(String anchor, String anchorText)1349     public static String getDoubleLinkedText(String anchor, String anchorText) {
1350         return getDoubleLink(anchor) + TransliteratorUtilities.toHTML.transliterate(anchorText).replace("_", " ")
1351             + "</a>";
1352     }
1353 
getDoubleLinkedText(String anchor)1354     public static String getDoubleLinkedText(String anchor) {
1355         return getDoubleLinkedText(anchor, anchor);
1356     }
1357 
getDoubleLinkMsg()1358     public static String getDoubleLinkMsg() {
1359         return "<a name=''{0}'' href=''#{0}''>{0}</a>";
1360     }
1361 
getDoubleLinkMsg2()1362     public static String getDoubleLinkMsg2() {
1363         return "<a name=''{0}{1}'' href=''#{0}{1}''>{0}</a>";
1364     }
1365 
getCopyrightString()1366     public static String getCopyrightString() {
1367         // now do the rest
1368         return "Copyright \u00A9 1991-" + Calendar.getInstance().get(Calendar.YEAR) + " Unicode, Inc." + CldrUtility.LINE_SEPARATOR
1369             + "For terms of use, see http://www.unicode.org/copyright.html" + CldrUtility.LINE_SEPARATOR
1370             + "Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries." + CldrUtility.LINE_SEPARATOR
1371             + "CLDR data files are interpreted according to the LDML specification " + "(http://unicode.org/reports/tr35/)";
1372     }
1373 
1374     // TODO Move to collection utilities
1375     /**
1376      * Type-safe get
1377      * @param map
1378      * @param key
1379      * @return value
1380      */
get(M map, K key)1381     public static <K, V, M extends Map<K, V>> V get(M map, K key) {
1382         return map.get(key);
1383     }
1384 
1385     /**
1386      * Type-safe contains
1387      * @param map
1388      * @param key
1389      * @return value
1390      */
contains(C collection, K key)1391     public static <K, C extends Collection<K>> boolean contains(C collection, K key) {
1392         return collection.contains(key);
1393     }
1394 
toEnumSet(Class<E> classValue, Collection<String> stringValues)1395     public static <E extends Enum<E>> EnumSet<E> toEnumSet(Class<E> classValue, Collection<String> stringValues) {
1396         EnumSet<E> result = EnumSet.noneOf(classValue);
1397         for (String s : stringValues) {
1398             result.add(Enum.valueOf(classValue, s));
1399         }
1400         return result;
1401     }
1402 
putNew(M map, K key, V value)1403     public static <K, V, M extends Map<K, V>> M putNew(M map, K key, V value) {
1404         if (!map.containsKey(key)) {
1405             map.put(key, value);
1406         }
1407         return map;
1408     }
1409 
cleanSemiFields(String line)1410     public static String[] cleanSemiFields(String line) {
1411         line = cleanLine(line);
1412         return line.isEmpty() ? null : SEMI_SPLIT.split(line);
1413     }
1414 
cleanLine(String line)1415     private static String cleanLine(String line) {
1416         int comment = line.indexOf("#");
1417         if (comment >= 0) {
1418             line = line.substring(0, comment);
1419         }
1420         if (line.startsWith("\uFEFF")) {
1421             line = line.substring(1);
1422         }
1423         return line.trim();
1424     }
1425 
handleFile(String filename, LineHandler handler)1426     public static void handleFile(String filename, LineHandler handler) throws IOException {
1427         try (BufferedReader in = getUTF8Data(filename);) {
1428             String line = null;
1429             while ((line = in.readLine()) != null) {
1430                 //                String line = in.readLine();
1431                 //                if (line == null) {
1432                 //                    break;
1433                 //                }
1434                 try {
1435                     if (!handler.handle(line)) {
1436                         if (HANDLEFILE_SHOW_SKIP) {
1437                             System.out.println("Skipping line: " + line);
1438                         }
1439                     }
1440                 } catch (Exception e) {
1441                     throw (RuntimeException) new IllegalArgumentException("Problem with line: " + line)
1442                         .initCause(e);
1443                 }
1444             }
1445         }
1446         //        in.close();
1447     }
1448 
ifNull(T x, T y)1449     public static <T> T ifNull(T x, T y) {
1450         return x == null
1451             ? y
1452             : x;
1453     }
1454 
ifSame(T source, T replaceIfSame, T replacement)1455     public static <T> T ifSame(T source, T replaceIfSame, T replacement) {
1456         return source == replaceIfSame ? replacement : source;
1457     }
1458 
ifEqual(T source, T replaceIfSame, T replacement)1459     public static <T> T ifEqual(T source, T replaceIfSame, T replacement) {
1460         return Objects.equals(source, replaceIfSame) ? replacement : source;
1461     }
1462 
intersect(Set<T> a, Collection<T> b)1463     public static <T> Set<T> intersect(Set<T> a, Collection<T> b) {
1464         Set<T> result = new LinkedHashSet<>(a);
1465         result.retainAll(b);
1466         return result;
1467     }
1468 
subtract(Set<T> a, Collection<T> b)1469     public static <T> Set<T> subtract(Set<T> a, Collection<T> b) {
1470         Set<T> result = new LinkedHashSet<>(a);
1471         result.removeAll(b);
1472         return result;
1473     }
1474 
logRegexLookup(TestFmwk testFramework, RegexLookup<T> lookup, String toLookup)1475     public static <T> void logRegexLookup(TestFmwk testFramework, RegexLookup<T> lookup, String toLookup) {
1476         Output<String[]> arguments = new Output<>();
1477         Output<Finder> matcherFound = new Output<>();
1478         List<String> failures = new ArrayList<String>();
1479         lookup.get(toLookup, null, arguments, matcherFound, failures);
1480         testFramework.logln("lookup arguments: " + (arguments.value == null ? "null" : Arrays.asList(arguments.value)));
1481         testFramework.logln("lookup matcherFound: " + matcherFound);
1482         for (String s : failures) {
1483             testFramework.logln(s);
1484         }
1485     }
1486 
deepEquals(Object... pairs)1487     public static boolean deepEquals(Object... pairs) {
1488         for (int item = 0; item < pairs.length;) {
1489             if (!Objects.deepEquals(pairs[item++], pairs[item++])) {
1490                 return false;
1491             }
1492         }
1493         return true;
1494     }
1495 
array(Splitter splitter, String source)1496     public static String[] array(Splitter splitter, String source) {
1497         List<String> list = splitter.splitToList(source);
1498         return list.toArray(new String[list.size()]);
1499     }
1500 
toHex(String in, boolean javaStyle)1501     public static String toHex(String in, boolean javaStyle) {
1502         StringBuilder result = new StringBuilder();
1503         for (int i = 0; i < in.length(); ++i) {
1504             result.append(toHex(in.charAt(i), javaStyle));
1505         }
1506         return result.toString();
1507     }
1508 
toHex(int j, boolean javaStyle)1509     public static String toHex(int j, boolean javaStyle) {
1510         if (j == '\"') {
1511             return "\\\"";
1512         } else if (j == '\\') {
1513             return "\\\\";
1514         } else if (0x20 < j && j < 0x7F) {
1515             return String.valueOf((char) j);
1516         }
1517         final String hexString = Integer.toHexString(j).toUpperCase();
1518         int gap = 4 - hexString.length();
1519         if (gap < 0) {
1520             gap = 0;
1521         }
1522         String prefix = javaStyle ? "\\u" : "U+";
1523         return prefix + "000".substring(0, gap) + hexString;
1524     }
1525 
1526     /**
1527      * get string format for debugging, since Java has a useless display for many items
1528      * @param item
1529      * @return
1530      */
toString(Object item)1531     public static String toString(Object item) {
1532         if (item instanceof Object[]) {
1533             return toString(Arrays.asList((Object[]) item));
1534         } else if (item instanceof Entry) {
1535             return toString(((Entry) item).getKey()) + "≔" + toString(((Entry) item).getValue());
1536         } else if (item instanceof Map) {
1537             return "{" + toString(((Map) item).entrySet()) + "}";
1538         } else if (item instanceof Collection) {
1539             List<String> result = new ArrayList<>();
1540             for (Object subitem : (Collection) item) {
1541                 result.add(toString(subitem));
1542             }
1543             return result.toString();
1544         }
1545         return item.toString();
1546     }
1547 }
1548