1 package org.unicode.cldr.util;
2 
3 import java.util.Arrays;
4 import java.util.Collections;
5 import java.util.EnumMap;
6 import java.util.HashMap;
7 import java.util.HashSet;
8 import java.util.Iterator;
9 import java.util.LinkedHashMap;
10 import java.util.LinkedHashSet;
11 import java.util.List;
12 import java.util.Locale;
13 import java.util.Map;
14 import java.util.Map.Entry;
15 import java.util.Set;
16 import java.util.TreeMap;
17 import java.util.TreeSet;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20 
21 import org.unicode.cldr.draft.ScriptMetadata;
22 import org.unicode.cldr.draft.ScriptMetadata.Info;
23 import org.unicode.cldr.tool.LikelySubtags;
24 import org.unicode.cldr.util.RegexLookup.Finder;
25 import org.unicode.cldr.util.With.SimpleIterator;
26 
27 import com.google.common.base.Splitter;
28 import com.ibm.icu.dev.util.CollectionUtilities;
29 import com.ibm.icu.impl.Relation;
30 import com.ibm.icu.impl.Row;
31 import com.ibm.icu.lang.UCharacter;
32 import com.ibm.icu.text.Collator;
33 import com.ibm.icu.text.Transform;
34 import com.ibm.icu.util.ICUException;
35 import com.ibm.icu.util.Output;
36 import com.ibm.icu.util.ULocale;
37 
38 /**
39  * Provides a mechanism for dividing up LDML paths into understandable
40  * categories, eg for the Survey tool.
41  */
42 public class PathHeader implements Comparable<PathHeader> {
43     /**
44      * Link to a section. Commenting out the page switch for now.
45      */
46     public static final String SECTION_LINK = "<a " + /* "target='CLDR_ST-SECTION' "+*/"href='";
47     static boolean UNIFORM_CONTINENTS = true;
48     static Factory factorySingleton = null;
49 
50     static final boolean SKIP_ORIGINAL_PATH = true;
51 
52     /**
53      * What status the survey tool should use. Can be overridden in
54      * Phase.getAction()
55      */
56     public enum SurveyToolStatus {
57         /**
58          * Never show.
59          */
60         DEPRECATED,
61         /**
62          * Hide. Can be overridden in Phase.getAction()
63          */
64         HIDE,
65         /**
66          * Don't allow Change box (except TC), instead show ticket. But allow
67          * votes. Can be overridden in Phase.getAction()
68          */
69         READ_ONLY,
70         /**
71          * Allow change box and votes. Can be overridden in Phase.getAction()
72          */
73         READ_WRITE,
74         /**
75          * Changes are allowed as READ_WRITE, but field is always displayed as
76          * LTR, even in RTL locales (used for patterns).
77          */
78         LTR_ALWAYS
79     }
80 
81     private static EnumNames<SectionId> SectionIdNames = new EnumNames<SectionId>();
82 
83     /**
84      * The Section for a path. Don't change these without committee buy-in. The
85      * 'name' may be 'Core_Data' and the toString is 'Core Data' toString gives
86      * the human name
87      */
88     public enum SectionId {
89         Core_Data("Core Data"), Locale_Display_Names("Locale Display Names"), DateTime("Date & Time"), Timezones, Numbers, Currencies, Units, Characters, Misc(
90             "Miscellaneous"), BCP47, Supplemental, Special;
91 
SectionId(String... alternateNames)92         private SectionId(String... alternateNames) {
93             SectionIdNames.add(this, alternateNames);
94         }
95 
forString(String name)96         public static SectionId forString(String name) {
97             return SectionIdNames.forString(name);
98         }
99 
toString()100         public String toString() {
101             return SectionIdNames.toString(this);
102         }
103     }
104 
105     private static EnumNames<PageId> PageIdNames = new EnumNames<PageId>();
106     private static Relation<SectionId, PageId> SectionIdToPageIds = Relation.of(new TreeMap<SectionId, Set<PageId>>(),
107         TreeSet.class);
108 
109     private static class SubstringOrder implements Comparable<SubstringOrder> {
110         final String mainOrder;
111         final int order;
112 
SubstringOrder(String source)113         public SubstringOrder(String source) {
114             int pos = source.lastIndexOf('-') + 1;
115             int ordering = COUNTS.indexOf(source.substring(pos));
116             // account for digits, and "some" future proofing.
117             order = ordering < 0
118                 ? source.charAt(pos)
119                     : 0x10000 + ordering;
120                 mainOrder = source.substring(0, pos);
121         }
122 
123         @Override
124         public String toString() {
125             return "{" + mainOrder + ", " + order + "}";
126         }
127 
128         @Override
129         public int compareTo(SubstringOrder other) {
130             int diff = alphabeticCompare(mainOrder, other.mainOrder);
131             if (diff != 0) {
132                 return diff;
133             }
134             return order - other.order;
135         }
136     }
137 
138     /**
139      * The Page for a path (within a Section). Don't change these without
140      * committee buy-in. the name is for example WAsia where toString gives
141      * Western Asia
142      */
143     public enum PageId {
144         Alphabetic_Information(SectionId.Core_Data, "Alphabetic Information"), Numbering_Systems(SectionId.Core_Data,
145             "Numbering Systems"), Locale_Name_Patterns(SectionId.Locale_Display_Names, "Locale Name Patterns"), Languages_A_D(SectionId.Locale_Display_Names,
146                 "Languages (A-D)"), Languages_E_J(SectionId.Locale_Display_Names, "Languages (E-J)"), Languages_K_N(SectionId.Locale_Display_Names,
147                     "Languages (K-N)"), Languages_O_S(SectionId.Locale_Display_Names, "Languages (O-S)"), Languages_T_Z(SectionId.Locale_Display_Names,
148                         "Languages (T-Z)"), Scripts(SectionId.Locale_Display_Names), Territories(SectionId.Locale_Display_Names,
149                             "Geographic Regions"), T_NAmerica(SectionId.Locale_Display_Names, "Territories (North America)"), T_SAmerica(
150                                 SectionId.Locale_Display_Names,
151                                 "Territories (South America)"), T_Africa(SectionId.Locale_Display_Names, "Territories (Africa)"), T_Europe(
152                                     SectionId.Locale_Display_Names,
153                                     "Territories (Europe)"), T_Asia(SectionId.Locale_Display_Names, "Territories (Asia)"), T_Oceania(
154                                         SectionId.Locale_Display_Names,
155                                         "Territories (Oceania)"), Locale_Variants(SectionId.Locale_Display_Names, "Locale Variants"), Keys(
156                                             SectionId.Locale_Display_Names), Fields(SectionId.DateTime), Gregorian(SectionId.DateTime), Generic(
157                                                 SectionId.DateTime), Buddhist(SectionId.DateTime), Chinese(SectionId.DateTime), Coptic(
158                                                     SectionId.DateTime), Dangi(SectionId.DateTime), Ethiopic(SectionId.DateTime), Ethiopic_Amete_Alem(
159                                                         SectionId.DateTime, "Ethiopic-Amete-Alem"), Hebrew(SectionId.DateTime), Indian(
160                                                             SectionId.DateTime), Islamic(SectionId.DateTime), Japanese(SectionId.DateTime), Persian(
161                                                                 SectionId.DateTime), Minguo(SectionId.DateTime), Timezone_Display_Patterns(SectionId.Timezones,
162                                                                     "Timezone Display Patterns"), NAmerica(SectionId.Timezones, "North America"), SAmerica(
163                                                                         SectionId.Timezones, "South America"), Africa(SectionId.Timezones), Europe(
164                                                                             SectionId.Timezones), Russia(SectionId.Timezones), WAsia(SectionId.Timezones,
165                                                                                 "Western Asia"), CAsia(SectionId.Timezones, "Central Asia"), EAsia(
166                                                                                     SectionId.Timezones,
167                                                                                     "Eastern Asia"), SAsia(SectionId.Timezones, "Southern Asia"), SEAsia(
168                                                                                         SectionId.Timezones,
169                                                                                         "Southeast Asia"), Australasia(SectionId.Timezones), Antarctica(
170                                                                                             SectionId.Timezones), Oceania(SectionId.Timezones), UnknownT(
171                                                                                                 SectionId.Timezones,
172                                                                                                 "Unknown Region"), Overrides(SectionId.Timezones), Symbols(
173                                                                                                     SectionId.Numbers), MinimalPairs(SectionId.Numbers,
174                                                                                                         "Minimal Pairs"), Number_Formatting_Patterns(
175                                                                                                             SectionId.Numbers,
176                                                                                                             "Number Formatting Patterns"), Compact_Decimal_Formatting(
177                                                                                                                 SectionId.Numbers,
178                                                                                                                 "Compact Decimal Formatting"), Compact_Decimal_Formatting_Other(
179                                                                                                                     SectionId.Numbers,
180                                                                                                                     "Compact Decimal Formatting (Other Numbering Systems)"), Measurement_Systems(
181                                                                                                                         SectionId.Units,
182                                                                                                                         "Measurement Systems"), Duration(
183                                                                                                                             SectionId.Units), Length(
184                                                                                                                                 SectionId.Units), Area(
185                                                                                                                                     SectionId.Units), Volume(
186                                                                                                                                         SectionId.Units), SpeedAcceleration(
187                                                                                                                                             SectionId.Units,
188                                                                                                                                             "Speed and Acceleration"), MassWeight(
189                                                                                                                                                 SectionId.Units,
190                                                                                                                                                 "Mass and Weight"), EnergyPower(
191                                                                                                                                                     SectionId.Units,
192                                                                                                                                                     "Energy and Power"), ElectricalFrequency(
193                                                                                                                                                         SectionId.Units,
194                                                                                                                                                         "Electrical and Frequency"), Weather(
195                                                                                                                                                             SectionId.Units), Digital(
196                                                                                                                                                                 SectionId.Units), Coordinates(
197                                                                                                                                                                     SectionId.Units), OtherUnits(
198                                                                                                                                                                         SectionId.Units,
199                                                                                                                                                                         "Other Units"), CompoundUnits(
200                                                                                                                                                                             SectionId.Units,
201                                                                                                                                                                             "Compound Units"), Displaying_Lists(
202                                                                                                                                                                                 SectionId.Misc,
203                                                                                                                                                                                 "Displaying Lists"), LinguisticElements(
204                                                                                                                                                                                     SectionId.Misc,
205                                                                                                                                                                                     "Linguistic Elements"), Transforms(
206                                                                                                                                                                                         SectionId.Misc), Identity(
207                                                                                                                                                                                             SectionId.Special), Version(
208                                                                                                                                                                                                 SectionId.Special), Suppress(
209                                                                                                                                                                                                     SectionId.Special), Deprecated(
210                                                                                                                                                                                                         SectionId.Special), Unknown(
211                                                                                                                                                                                                             SectionId.Special), C_NAmerica(
212                                                                                                                                                                                                                 SectionId.Currencies,
213                                                                                                                                                                                                                 "North America (C)"), //need to add (C) to differentiate from Timezone territories
214         C_SAmerica(SectionId.Currencies, "South America (C)"), C_NWEurope(SectionId.Currencies, "Northern/Western Europe"), C_SEEurope(SectionId.Currencies,
215             "Southern/Eastern Europe"), C_NAfrica(SectionId.Currencies, "Northern Africa"), C_WAfrica(SectionId.Currencies, "Western Africa"), C_MAfrica(
216                 SectionId.Currencies, "Middle Africa"), C_EAfrica(SectionId.Currencies, "Eastern Africa"), C_SAfrica(SectionId.Currencies,
217                     "Southern Africa"), C_WAsia(SectionId.Currencies, "Western Asia (C)"), C_CAsia(SectionId.Currencies, "Central Asia (C)"), C_EAsia(
218                         SectionId.Currencies, "Eastern Asia (C)"), C_SAsia(SectionId.Currencies, "Southern Asia (C)"), C_SEAsia(SectionId.Currencies,
219                             "Southeast Asia (C)"), C_Oceania(SectionId.Currencies, "Oceania (C)"), C_Unknown(SectionId.Currencies, "Unknown Region (C)"),
220         // BCP47
221         u_Extension(SectionId.BCP47), t_Extension(SectionId.BCP47),
222         // Supplemental
223         Alias(SectionId.Supplemental), IdValidity(SectionId.Supplemental), Locale(SectionId.Supplemental), RegionMapping(SectionId.Supplemental), WZoneMapping(
224             SectionId.Supplemental), Transform(SectionId.Supplemental), UnitPreferences(SectionId.Supplemental), Likely(SectionId.Supplemental), LanguageMatch(
225                 SectionId.Supplemental), TerritoryInfo(SectionId.Supplemental), LanguageInfo(SectionId.Supplemental), LanguageGroup(
226                     SectionId.Supplemental), Fallback(SectionId.Supplemental), Gender(SectionId.Supplemental), Metazone(SectionId.Supplemental), NumberSystem(
227                         SectionId.Supplemental), Plural(SectionId.Supplemental), PluralRange(SectionId.Supplemental), Containment(
228                             SectionId.Supplemental), Currency(SectionId.Supplemental), Calendar(SectionId.Supplemental), WeekData(
229                                 SectionId.Supplemental), Measurement(SectionId.Supplemental), Language(SectionId.Supplemental), RBNF(
230                                     SectionId.Supplemental), Segmentation(SectionId.Supplemental), DayPeriod(SectionId.Supplemental),
231 
232         Category(SectionId.Characters),
233         // [Smileys, People, Animals & Nature, Food & Drink, Travel & Places, Activities, Objects, Symbols, Flags]
234         Smileys(SectionId.Characters), People(SectionId.Characters), Animals_Nature(SectionId.Characters, "Animals & Nature"), Food_Drink(SectionId.Characters,
235             "Food & Drink"), Travel_Places(SectionId.Characters, "Travel & Places"), Activities(SectionId.Characters), Objects(
236                 SectionId.Characters), Symbols2(SectionId.Characters), Flags(SectionId.Characters), Component(SectionId.Characters),
237 
238         Typography(SectionId.Characters),
239         ;
240 
241         private final SectionId sectionId;
242 
243         private PageId(SectionId sectionId, String... alternateNames) {
244             this.sectionId = sectionId;
245             SectionIdToPageIds.put(sectionId, this);
246             PageIdNames.add(this, alternateNames);
247         }
248 
249         /**
250          * Construct a pageId given a string
251          *
252          * @param name
253          * @return
254          */
255         public static PageId forString(String name) {
256             try {
257                 return PageIdNames.forString(name);
258             } catch (Exception e) {
259                 throw new ICUException("No PageId for " + name, e);
260             }
261         }
262 
263         /**
264          * Returns the page id
265          *
266          * @return a page ID, such as 'Languages'
267          */
268         public String toString() {
269             return PageIdNames.toString(this);
270         }
271 
272         /**
273          * Get the containing section id, such as 'Code Lists'
274          *
275          * @return the containing section ID
276          */
277         public SectionId getSectionId() {
278             return sectionId;
279         }
280     }
281 
282     private final SectionId sectionId;
283     private final PageId pageId;
284     private final String header;
285     private final String code;
286     private final String originalPath;
287     private final SurveyToolStatus status;
288 
289     // Used for ordering
290     private final int headerOrder;
291     private final int codeOrder;
292     private final SubstringOrder codeSuborder;
293 
294     static final Pattern SEMI = PatternCache.get("\\s*;\\s*");
295     static final Matcher ALT_MATCHER = PatternCache.get(
296         "\\[@alt=\"([^\"]*+)\"]")
297         .matcher("");
298 
299     static final Collator alphabetic = CLDRConfig.getInstance().getCollatorRoot();
300 
301 //    static final RuleBasedCollator alphabetic = (RuleBasedCollator) Collator
302 //            .getInstance(ULocale.ENGLISH);
303 //    static {
304 //        alphabetic.setNumericCollation(true);
305 //        alphabetic.freeze();
306 //    }
307 
308     static final SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance();
309     static final Map<String, String> metazoneToContinent = supplementalDataInfo
310         .getMetazoneToContinentMap();
311     static final StandardCodes standardCode = StandardCodes.make();
312     static final Map<String, String> metazoneToPageTerritory = new HashMap<String, String>();
313     static {
314         Map<String, Map<String, String>> metazoneToRegionToZone = supplementalDataInfo.getMetazoneToRegionToZone();
315         for (Entry<String, Map<String, String>> metazoneEntry : metazoneToRegionToZone.entrySet()) {
316             String metazone = metazoneEntry.getKey();
317             String worldZone = metazoneEntry.getValue().get("001");
318             String territory = Containment.getRegionFromZone(worldZone);
319             if (territory == null) {
320                 territory = "ZZ";
321             }
322             // Russia, Antarctica => territory
323             // in Australasia, Asia, S. America => subcontinent
324             // in N. America => N. America (grouping of 3 subcontinents)
325             // in everything else => continent
326             if (territory.equals("RU") || territory.equals("AQ")) {
327                 metazoneToPageTerritory.put(metazone, territory);
328             } else {
329                 String continent = Containment.getContinent(territory);
330                 String subcontinent = Containment.getSubcontinent(territory);
331                 if (continent.equals("142")) { // Asia
332                     metazoneToPageTerritory.put(metazone, subcontinent);
333                 } else if (continent.equals("019")) { // Americas
334                     metazoneToPageTerritory.put(metazone, subcontinent.equals("005") ? subcontinent : "003");
335                 } else if (subcontinent.equals("053")) { // Australasia
336                     metazoneToPageTerritory.put(metazone, subcontinent);
337                 } else {
338                     metazoneToPageTerritory.put(metazone, continent);
339                 }
340             }
341         }
342     }
343 
344     /**
345      * @param section
346      * @param sectionOrder
347      * @param page
348      * @param pageOrder
349      * @param header
350      * @param headerOrder
351      * @param code
352      * @param codeOrder
353      * @param suborder
354      * @param status
355      */
356     private PathHeader(SectionId sectionId, PageId pageId, String header,
357         int headerOrder, String code, int codeOrder, SubstringOrder suborder, SurveyToolStatus status,
358         String originalPath) {
359         this.sectionId = sectionId;
360         this.pageId = pageId;
361         this.header = header;
362         this.headerOrder = headerOrder;
363         this.code = code;
364         this.codeOrder = codeOrder;
365         this.codeSuborder = suborder;
366         this.originalPath = originalPath;
367         this.status = status;
368     }
369 
370     /**
371      * Return a factory for use in creating the headers. This should be cached.
372      * The calls are thread-safe. The englishFile sets a static for now; after
373      * the first time, null can be passed.
374      *
375      * @param englishFile
376      */
377     public static Factory getFactory(CLDRFile englishFile) {
378         if (factorySingleton == null) {
379             if (englishFile == null) {
380                 throw new IllegalArgumentException("English CLDRFile must not be null");
381             }
382             if (!englishFile.getLocaleID().equals(ULocale.ENGLISH.getBaseName())) {
383                 throw new IllegalArgumentException("PathHeader's CLDRFile must be '" +
384                     ULocale.ENGLISH.getBaseName() + "', but found '" + englishFile.getLocaleID() + "'");
385             }
386             factorySingleton = new Factory(englishFile);
387         }
388         return factorySingleton;
389     }
390 
391     /**
392      * @deprecated
393      */
394     public String getSection() {
395         return sectionId.toString();
396     }
397 
398     public SectionId getSectionId() {
399         return sectionId;
400     }
401 
402     /**
403      * @deprecated
404      */
405     public String getPage() {
406         return pageId.toString();
407     }
408 
409     public PageId getPageId() {
410         return pageId;
411     }
412 
413     public String getHeader() {
414         return header == null ? "" : header;
415     }
416 
417     public String getCode() {
418         return code;
419     }
420 
421     public String getHeaderCode() {
422         return getHeader() + ": " + getCode();
423     }
424 
425     public String getOriginalPath() {
426         return originalPath;
427     }
428 
429     public SurveyToolStatus getSurveyToolStatus() {
430         return status;
431     }
432 
433     @Override
434     public String toString() {
435         return sectionId
436             + "\t" + pageId
437             + "\t" + header // + "\t" + headerOrder
438             + "\t" + code // + "\t" + codeOrder
439             ;
440     }
441 
442     @Override
443     public int compareTo(PathHeader other) {
444         // Within each section, order alphabetically if the integer orders are
445         // not different.
446         try {
447             int result;
448             if (0 != (result = sectionId.compareTo(other.sectionId))) {
449                 return result;
450             }
451             if (0 != (result = pageId.compareTo(other.pageId))) {
452                 return result;
453             }
454             if (0 != (result = headerOrder - other.headerOrder)) {
455                 return result;
456             }
457             if (0 != (result = alphabeticCompare(header, other.header))) {
458                 return result;
459             }
460             if (0 != (result = codeOrder - other.codeOrder)) {
461                 return result;
462             }
463             if (codeSuborder != null) { // do all three cases, for transitivity
464                 if (other.codeSuborder != null) {
465                     if (0 != (result = codeSuborder.compareTo(other.codeSuborder))) {
466                         return result;
467                     }
468                 } else {
469                     return 1; // if codeSuborder != null (and other.codeSuborder
470                     // == null), it is greater
471                 }
472             } else if (other.codeSuborder != null) {
473                 return -1; // if codeSuborder == null (and other.codeSuborder !=
474                 // null), it is greater
475             }
476             if (0 != (result = alphabeticCompare(code, other.code))) {
477                 return result;
478             }
479             if (!SKIP_ORIGINAL_PATH && 0 != (result = alphabeticCompare(originalPath, other.originalPath))) {
480                 return result;
481             }
482             return 0;
483         } catch (RuntimeException e) {
484             throw new IllegalArgumentException("Internal problem comparing " + this + " and " + other, e);
485         }
486     }
487 
488     public int compareHeader(PathHeader other) {
489         int result;
490         if (0 != (result = headerOrder - other.headerOrder)) {
491             return result;
492         }
493         if (0 != (result = alphabeticCompare(header, other.header))) {
494             return result;
495         }
496         return result;
497     }
498 
499     public int compareCode(PathHeader other) {
500         int result;
501         if (0 != (result = codeOrder - other.codeOrder)) {
502             return result;
503         }
504         if (codeSuborder != null) { // do all three cases, for transitivity
505             if (other.codeSuborder != null) {
506                 if (0 != (result = codeSuborder.compareTo(other.codeSuborder))) {
507                     return result;
508                 }
509             } else {
510                 return 1; // if codeSuborder != null (and other.codeSuborder
511                 // == null), it is greater
512             }
513         } else if (other.codeSuborder != null) {
514             return -1; // if codeSuborder == null (and other.codeSuborder !=
515             // null), it is greater
516         }
517         if (0 != (result = alphabeticCompare(code, other.code))) {
518             return result;
519         }
520         return result;
521     }
522 
523     @Override
524     public boolean equals(Object obj) {
525         PathHeader other;
526         try {
527             other = (PathHeader) obj;
528         } catch (Exception e) {
529             return false;
530         }
531         return sectionId == other.sectionId && pageId == other.pageId
532             && header.equals(other.header) && code.equals(other.code);
533     }
534 
535     @Override
536     public int hashCode() {
537         return sectionId.hashCode() ^ pageId.hashCode() ^ header.hashCode() ^ code.hashCode();
538     }
539 
540     public static class Factory implements Transform<String, PathHeader> {
541         static final RegexLookup<RawData> lookup = RegexLookup
542             .of(new PathHeaderTransform())
543             .setPatternTransform(
544                 RegexLookup.RegexFinderTransformPath)
545             .loadFromFile(
546                 PathHeader.class,
547                 "data/PathHeader.txt");
548         // synchronized with lookup
549         static final Output<String[]> args = new Output<String[]>();
550         // synchronized with lookup
551         static final Counter<RawData> counter = new Counter<RawData>();
552         // synchronized with lookup
553         static final Map<RawData, String> samples = new HashMap<RawData, String>();
554         // synchronized with lookup
555         static int order;
556         static SubstringOrder suborder;
557 
558         static final Map<String, PathHeader> cache = new HashMap<String, PathHeader>();
559         // synchronized with cache
560         static final Map<SectionId, Map<PageId, SectionPage>> sectionToPageToSectionPage = new EnumMap<SectionId, Map<PageId, SectionPage>>(
561             SectionId.class);
562         static final Relation<SectionPage, String> sectionPageToPaths = Relation
563             .of(new TreeMap<SectionPage, Set<String>>(),
564                 HashSet.class);
565         private static CLDRFile englishFile;
566         private Set<String> matchersFound = new HashSet<String>();
567 
568         /**
569          * Create a factory for creating PathHeaders.
570          *
571          * @param englishFile
572          *            - only sets the file (statically!) if not already set.
573          */
574         private Factory(CLDRFile englishFile) {
575             setEnglishCLDRFileIfNotSet(englishFile); // temporary
576         }
577 
578         /**
579          * Returns true if we set it, false if set before.
580          *
581          * @param englishFile2
582          * @return
583          */
584         private static boolean setEnglishCLDRFileIfNotSet(CLDRFile englishFile2) {
585             synchronized (Factory.class) {
586                 if (englishFile != null) {
587                     return false;
588                 }
589                 englishFile = englishFile2;
590                 return true;
591             }
592         }
593 
594         /**
595          * Use only when trying to find unmatched patterns
596          */
597         public void clearCache() {
598             synchronized (cache) {
599                 cache.clear();
600             }
601         }
602 
603         /**
604          * Return the PathHeader for a given path. Thread-safe.
605          */
606         public PathHeader fromPath(String path) {
607             return fromPath(path, null);
608         }
609 
610         /**
611          * Return the PathHeader for a given path. Thread-safe.
612          */
613         public PathHeader transform(String path) {
614             return fromPath(path, null);
615         }
616 
617         /**
618          * Return the PathHeader for a given path. Thread-safe.
619          * @param failures a list of failures to add to.
620          */
621         public PathHeader fromPath(String path, List<String> failures) {
622             if (path == null) {
623                 throw new NullPointerException("Path cannot be null");
624             }
625             synchronized (cache) {
626                 PathHeader old = cache.get(path);
627                 if (old != null) {
628                     return old;
629                 }
630             }
631             synchronized (lookup) {
632                 String cleanPath = path;
633                 // special handling for alt
634                 String alt = null;
635                 int altPos = cleanPath.indexOf("[@alt=");
636                 if (altPos >= 0 && !cleanPath.endsWith("/symbol[@alt=\"narrow\"]")) {
637                     if (ALT_MATCHER.reset(cleanPath).find()) {
638                         alt = ALT_MATCHER.group(1);
639                         cleanPath = cleanPath.substring(0, ALT_MATCHER.start())
640                             + cleanPath.substring(ALT_MATCHER.end());
641                         int pos = alt.indexOf("proposed");
642                         if (pos >= 0 && !path.startsWith("//ldml/collations")) {
643                             alt = pos == 0 ? null : alt.substring(0, pos - 1);
644                             // drop "proposed",
645                             // change "xxx-proposed" to xxx.
646                         }
647                     } else {
648                         throw new IllegalArgumentException();
649                     }
650                 }
651                 Output<Finder> matcherFound = new Output<Finder>();
652                 RawData data = lookup.get(cleanPath, null, args, matcherFound, failures);
653                 if (data == null) {
654                     return null;
655                 }
656                 matchersFound.add(matcherFound.value.toString());
657                 counter.add(data, 1);
658                 if (!samples.containsKey(data)) {
659                     samples.put(data, cleanPath);
660                 }
661                 try {
662                     PathHeader result = new PathHeader(
663                         SectionId.forString(fix(data.section, 0)),
664                         PageId.forString(fix(data.page, 0)),
665                         fix(data.header, data.headerOrder),
666                         order, // only valid after call to fix. TODO, make
667                         // this cleaner
668                         fix(data.code + (alt == null ? "" : ("-" + alt)), data.codeOrder),
669                         order, // only valid after call to fix
670                         suborder,
671                         data.status,
672                         path);
673                     synchronized (cache) {
674                         PathHeader old = cache.get(path);
675                         if (old == null) {
676                             cache.put(path, result);
677                         } else {
678                             result = old;
679                         }
680                         Map<PageId, SectionPage> pageToPathHeaders = sectionToPageToSectionPage
681                             .get(result.sectionId);
682                         if (pageToPathHeaders == null) {
683                             sectionToPageToSectionPage.put(result.sectionId, pageToPathHeaders = new EnumMap<PageId, SectionPage>(PageId.class));
684                         }
685                         SectionPage sectionPage = pageToPathHeaders.get(result.pageId);
686                         if (sectionPage == null) {
687                             sectionPage = new SectionPage(result.sectionId, result.pageId);
688                             pageToPathHeaders.put(result.pageId, sectionPage);
689                         }
690                         sectionPageToPaths.put(sectionPage, path);
691                     }
692                     return result;
693                 } catch (Exception e) {
694                     throw new IllegalArgumentException(
695                         "Probably mismatch in Page/Section enum, or too few capturing groups in regex for " + cleanPath,
696                         e);
697                 }
698             }
699         }
700 
701         private static class SectionPage implements Comparable<SectionPage> {
702             private final SectionId sectionId;
703             private final PageId pageId;
704 
SectionPage(SectionId sectionId, PageId pageId)705             public SectionPage(SectionId sectionId, PageId pageId) {
706                 this.sectionId = sectionId;
707                 this.pageId = pageId;
708             }
709 
710             @Override
compareTo(SectionPage other)711             public int compareTo(SectionPage other) {
712                 // Within each section, order alphabetically if the integer
713                 // orders are
714                 // not different.
715                 int result;
716                 if (0 != (result = sectionId.compareTo(other.sectionId))) {
717                     return result;
718                 }
719                 if (0 != (result = pageId.compareTo(other.pageId))) {
720                     return result;
721                 }
722                 return 0;
723             }
724 
725             @Override
equals(Object obj)726             public boolean equals(Object obj) {
727                 PathHeader other;
728                 try {
729                     other = (PathHeader) obj;
730                 } catch (Exception e) {
731                     return false;
732                 }
733                 return sectionId == other.sectionId && pageId == other.pageId;
734             }
735 
736             @Override
hashCode()737             public int hashCode() {
738                 return sectionId.hashCode() ^ pageId.hashCode();
739             }
740         }
741 
742         /**
743          * Returns a set of paths currently associated with the given section
744          * and page.
745          * <p>
746          * <b>Warning:</b>
747          * <ol>
748          * <li>The set may not be complete for a cldrFile unless all of paths in
749          * the file have had fromPath called. And this includes getExtraPaths().
750          * </li>
751          * <li>The set may include paths that have no value in the current
752          * cldrFile.</li>
753          * <li>The set may be empty, if the section/page aren't valid.</li>
754          * </ol>
755          * Thread-safe.
756          *
757          * @target a collection where the paths are to be returned.
758          */
getCachedPaths(SectionId sectionId, PageId page)759         public static Set<String> getCachedPaths(SectionId sectionId, PageId page) {
760             Set<String> target = new HashSet<String>();
761             synchronized (cache) {
762                 Map<PageId, SectionPage> pageToSectionPage = sectionToPageToSectionPage
763                     .get(sectionId);
764                 if (pageToSectionPage == null) {
765                     return target;
766                 }
767                 SectionPage sectionPage = pageToSectionPage.get(page);
768                 if (sectionPage == null) {
769                     return target;
770                 }
771                 Set<String> set = sectionPageToPaths.getAll(sectionPage);
772                 target.addAll(set);
773             }
774             return target;
775         }
776 
777         /**
778          * Return the Sections and Pages that are in defined, for display in
779          * menus. Both are ordered.
780          */
getSectionIdsToPageIds()781         public static Relation<SectionId, PageId> getSectionIdsToPageIds() {
782             SectionIdToPageIds.freeze(); // just in case
783             return SectionIdToPageIds;
784         }
785 
786         /**
787          * Return paths that have the designated section and page.
788          *
789          * @param sectionId
790          * @param pageId
791          * @param file
792          */
filterCldr(SectionId sectionId, PageId pageId, CLDRFile file)793         public Iterable<String> filterCldr(SectionId sectionId, PageId pageId, CLDRFile file) {
794             return new FilteredIterable(sectionId, pageId, file);
795         }
796 
797         /**
798          * Return the names for Sections and Pages that are defined, for display
799          * in menus. Both are ordered.
800          *
801          * @deprecated Use getSectionIdsToPageIds
802          */
getSectionsToPages()803         public static LinkedHashMap<String, Set<String>> getSectionsToPages() {
804             LinkedHashMap<String, Set<String>> sectionsToPages = new LinkedHashMap<String, Set<String>>();
805             for (PageId pageId : PageId.values()) {
806                 String sectionId2 = pageId.getSectionId().toString();
807                 Set<String> pages = sectionsToPages.get(sectionId2);
808                 if (pages == null) {
809                     sectionsToPages.put(sectionId2, pages = new LinkedHashSet<String>());
810                 }
811                 pages.add(pageId.toString());
812             }
813             return sectionsToPages;
814         }
815 
816         /**
817          * @deprecated, use the filterCldr with the section/page ids.
818          */
filterCldr(String section, String page, CLDRFile file)819         public Iterable<String> filterCldr(String section, String page, CLDRFile file) {
820             return new FilteredIterable(section, page, file);
821         }
822 
823         private class FilteredIterable implements Iterable<String>, SimpleIterator<String> {
824             private final SectionId sectionId;
825             private final PageId pageId;
826             private final Iterator<String> fileIterator;
827 
FilteredIterable(SectionId sectionId, PageId pageId, CLDRFile file)828             FilteredIterable(SectionId sectionId, PageId pageId, CLDRFile file) {
829                 this.sectionId = sectionId;
830                 this.pageId = pageId;
831                 this.fileIterator = file.fullIterable().iterator();
832             }
833 
FilteredIterable(String section, String page, CLDRFile file)834             public FilteredIterable(String section, String page, CLDRFile file) {
835                 this(SectionId.forString(section), PageId.forString(page), file);
836             }
837 
838             @Override
iterator()839             public Iterator<String> iterator() {
840                 return With.toIterator(this);
841             }
842 
843             @Override
next()844             public String next() {
845                 while (fileIterator.hasNext()) {
846                     String path = fileIterator.next();
847                     PathHeader pathHeader = fromPath(path);
848                     if (sectionId == pathHeader.sectionId && pageId == pathHeader.pageId) {
849                         return path;
850                     }
851                 }
852                 return null;
853             }
854         }
855 
856         private static class ChronologicalOrder {
857             private Map<String, Integer> map = new HashMap<String, Integer>();
858             private String item;
859             private int order;
860             private ChronologicalOrder toClear;
861 
ChronologicalOrder(ChronologicalOrder toClear)862             ChronologicalOrder(ChronologicalOrder toClear) {
863                 this.toClear = toClear;
864             }
865 
getOrder()866             int getOrder() {
867                 return order;
868             }
869 
set(String itemToOrder)870             public String set(String itemToOrder) {
871                 if (itemToOrder.startsWith("*")) {
872                     item = itemToOrder.substring(1, itemToOrder.length());
873                     return item; // keep old order
874                 }
875                 item = itemToOrder;
876                 Integer old = map.get(item);
877                 if (old != null) {
878                     order = old.intValue();
879                 } else {
880                     order = map.size();
881                     map.put(item, order);
882                     clearLower();
883                 }
884                 return item;
885             }
886 
clearLower()887             private void clearLower() {
888                 if (toClear != null) {
889                     toClear.map.clear();
890                     toClear.order = 0;
891                     toClear.clearLower();
892                 }
893             }
894         }
895 
896         static class RawData {
897             static ChronologicalOrder codeOrdering = new ChronologicalOrder(null);
898             static ChronologicalOrder headerOrdering = new ChronologicalOrder(codeOrdering);
899 
RawData(String source)900             public RawData(String source) {
901                 String[] split = SEMI.split(source);
902                 section = split[0];
903                 // HACK
904                 if (section.equals("Timezones") && split[1].equals("Indian")) {
905                     page = "Indian2";
906                 } else {
907                     page = split[1];
908                 }
909 
910                 header = headerOrdering.set(split[2]);
911                 headerOrder = headerOrdering.getOrder();
912 
913                 code = codeOrdering.set(split[3]);
914                 codeOrder = codeOrdering.getOrder();
915 
916                 status = split.length < 5 ? SurveyToolStatus.READ_WRITE : SurveyToolStatus.valueOf(split[4]);
917             }
918 
919             public final String section;
920             public final String page;
921             public final String header;
922             public final int headerOrder;
923             public final String code;
924             public final int codeOrder;
925             public final SurveyToolStatus status;
926 
927             @Override
928             public String toString() {
929                 return section + "\t"
930                     + page + "\t"
931                     + header + "\t" + headerOrder + "\t"
932                     + code + "\t" + codeOrder + "\t"
933                     + status;
934             }
935         }
936 
937         static class PathHeaderTransform implements Transform<String, RawData> {
938             @Override
939             public RawData transform(String source) {
940                 return new RawData(source);
941             }
942         }
943 
944         /**
945          * Internal data, for testing and debugging.
946          *
947          * @deprecated
948          */
949         public class CounterData extends Row.R4<String, RawData, String, String> {
950             public CounterData(String a, RawData b, String c) {
951                 super(a, b, c == null ? "no sample" : c, c == null ? "no sample" : fromPath(c)
952                     .toString());
953             }
954         }
955 
956         /**
957          * Get the internal data, for testing and debugging.
958          *
959          * @deprecated
960          */
961         public Counter<CounterData> getInternalCounter() {
962             synchronized (lookup) {
963                 Counter<CounterData> result = new Counter<CounterData>();
964                 for (Map.Entry<Finder, RawData> foo : lookup) {
965                     Finder finder = foo.getKey();
966                     RawData data = foo.getValue();
967                     long count = counter.get(data);
968                     result.add(new CounterData(finder.toString(), data, samples.get(data)), count);
969                 }
970                 return result;
971             }
972         }
973 
974         static Map<String, Transform<String, String>> functionMap = new HashMap<String, Transform<String, String>>();
975         static String[] months = { "Jan", "Feb", "Mar",
976             "Apr", "May", "Jun",
977             "Jul", "Aug", "Sep",
978             "Oct", "Nov", "Dec",
979         "Und" };
980         static List<String> days = Arrays.asList("sun", "mon",
981             "tue", "wed", "thu",
982             "fri", "sat");
983         static List<String> unitOrder = DtdData.unitOrder.getOrder();
984         static final MapComparator<String> dayPeriods = new MapComparator<String>().add(
985             "am", "pm", "midnight", "noon",
986             "morning1", "morning2", "afternoon1", "afternoon2", "evening1", "evening2", "night1", "night2").freeze();
987         // static Map<String, String> likelySubtags =
988         // supplementalDataInfo.getLikelySubtags();
989         static LikelySubtags likelySubtags = new LikelySubtags();
990         static HyphenSplitter hyphenSplitter = new HyphenSplitter();
991         static Transform<String, String> catFromTerritory;
992         static Transform<String, String> catFromTimezone;
993         static {
994             // Put any new functions used in PathHeader.txt in here.
995             // To change the order of items within a section or heading, set
996             // order/suborder to be the relative position of the current item.
997             functionMap.put("month", new Transform<String, String>() {
998                 public String transform(String source) {
999                     int m = Integer.parseInt(source);
1000                     order = m;
1001                     return months[m - 1];
1002                 }
1003             });
1004             functionMap.put("count", new Transform<String, String>() {
1005                 public String transform(String source) {
1006                     suborder = new SubstringOrder(source);
1007                     return source;
1008                 }
1009             });
1010             functionMap.put("count2", new Transform<String, String>() {
1011                 public String transform(String source) {
1012                     int pos = source.indexOf('-');
1013                     source = pos + source.substring(pos);
1014                     suborder = new SubstringOrder(source); // make 10000-...
1015                     // into 5-
1016                     return source;
1017                 }
1018             });
1019             functionMap.put("currencySymbol", new Transform<String, String>() {
1020                 public String transform(String source) {
1021                     order = 901;
1022                     if (source.endsWith("narrow")) {
1023                         order = 902;
1024                     }
1025                     if (source.endsWith("variant")) {
1026                         order = 903;
1027                     }
1028                     return source;
1029                 }
1030             });
1031             functionMap.put("unitCount", new Transform<String, String>() {
1032                 public String transform(String source) {
1033                     String[] unitLengths = { "long", "short", "narrow" };
1034                     int pos = 9;
1035                     for (int i = 0; i < unitLengths.length; i++) {
1036                         if (source.startsWith(unitLengths[i])) {
1037                             pos = i;
1038                             continue;
1039                         }
1040                     }
1041                     order = pos;
1042                     suborder = new SubstringOrder(pos + "-" + source); //
1043                     return source;
1044                 }
1045             });
1046             functionMap.put("day", new Transform<String, String>() {
1047                 public String transform(String source) {
1048                     int m = days.indexOf(source);
1049                     order = m;
1050                     return source;
1051                 }
1052             });
1053             functionMap.put("dayPeriod", new Transform<String, String>() {
1054                 public String transform(String source) {
1055                     try {
1056                         order = dayPeriods.getNumericOrder(source);
1057                     } catch (Exception e) {
1058                         // if an old item is tried, like "evening", this will fail.
1059                         // so that old data still works, hack this.
1060                         order = Math.abs(source.hashCode() << 16);
1061                     }
1062                     return source;
1063                 }
1064             });
1065             functionMap.put("calendar", new Transform<String, String>() {
1066                 Map<String, String> fixNames = Builder.with(new HashMap<String, String>())
1067                     .put("islamicc", "Islamic Civil")
1068                     .put("roc", "Minguo")
1069                     .put("Ethioaa", "Ethiopic Amete Alem")
1070                     .put("Gregory", "Gregorian")
1071                     .put("iso8601", "ISO 8601")
1072                     .freeze();
1073 
1074                 public String transform(String source) {
1075                     String result = fixNames.get(source);
1076                     return result != null ? result : UCharacter.toTitleCase(source, null);
1077                 }
1078             });
1079 
1080             functionMap.put("calField", new Transform<String, String>() {
1081                 public String transform(String source) {
1082                     String[] fields = source.split(":", 3);
1083                     order = 0;
1084                     final List<String> widthValues = Arrays.asList(
1085                         "wide", "abbreviated", "short", "narrow");
1086                     final List<String> calendarFieldValues = Arrays.asList(
1087                         "Eras",
1088                         "Quarters",
1089                         "Months",
1090                         "Days",
1091                         "DayPeriods",
1092                         "Formats");
1093                     final List<String> calendarFormatTypes = Arrays.asList(
1094                         "Standard",
1095                         "Flexible",
1096                         "Intervals");
1097                     final List<String> calendarContextTypes = Arrays.asList(
1098                         "none",
1099                         "format",
1100                         "stand-alone");
1101                     final List<String> calendarFormatSubtypes = Arrays.asList(
1102                         "date",
1103                         "time",
1104                         "time12",
1105                         "time24",
1106                         "dateTime",
1107                         "fallback");
1108 
1109                     Map<String, String> fixNames = Builder.with(new HashMap<String, String>())
1110                         .put("DayPeriods", "Day Periods")
1111                         .put("format", "Formatting")
1112                         .put("stand-alone", "Standalone")
1113                         .put("none", "")
1114                         .put("date", "Date Formats")
1115                         .put("time", "Time Formats")
1116                         .put("time12", "12 Hour Time Formats")
1117                         .put("time24", "24 Hour Time Formats")
1118                         .put("dateTime", "Date & Time Combination Formats")
1119                         .freeze();
1120 
1121                     if (calendarFieldValues.contains(fields[0])) {
1122                         order = calendarFieldValues.indexOf(fields[0]) * 100;
1123                     } else {
1124                         order = calendarFieldValues.size() * 100;
1125                     }
1126 
1127                     if (fields[0].equals("Formats")) {
1128                         if (calendarFormatTypes.contains(fields[1])) {
1129                             order += calendarFormatTypes.indexOf(fields[1]) * 10;
1130                         } else {
1131                             order += calendarFormatTypes.size() * 10;
1132                         }
1133                         if (calendarFormatSubtypes.contains(fields[2])) {
1134                             order += calendarFormatSubtypes.indexOf(fields[2]);
1135                         } else {
1136                             order += calendarFormatSubtypes.size();
1137                         }
1138                     } else {
1139                         if (widthValues.contains(fields[1])) {
1140                             order += widthValues.indexOf(fields[1]) * 10;
1141                         } else {
1142                             order += widthValues.size() * 10;
1143                         }
1144                         if (calendarContextTypes.contains(fields[2])) {
1145                             order += calendarContextTypes.indexOf(fields[2]);
1146                         } else {
1147                             order += calendarContextTypes.size();
1148                         }
1149                     }
1150 
1151                     String[] fixedFields = new String[fields.length];
1152                     for (int i = 0; i < fields.length; i++) {
1153                         String s = fixNames.get(fields[i]);
1154                         fixedFields[i] = s != null ? s : fields[i];
1155                     }
1156 
1157                     return fixedFields[0] +
1158                         " - " + fixedFields[1] +
1159                         (fixedFields[2].length() > 0 ? " - " + fixedFields[2] : "");
1160                 }
1161             });
1162 
1163             functionMap.put("titlecase", new Transform<String, String>() {
1164                 public String transform(String source) {
1165                     return UCharacter.toTitleCase(source, null);
1166                 }
1167             });
1168             functionMap.put("categoryFromScript", new Transform<String, String>() {
1169                 public String transform(String source) {
1170                     String script = hyphenSplitter.split(source);
1171                     Info info = ScriptMetadata.getInfo(script);
1172                     if (info == null) {
1173                         info = ScriptMetadata.getInfo("Zzzz");
1174                     }
1175                     order = 100 - info.idUsage.ordinal();
1176                     return info.idUsage.name;
1177                 }
1178             });
1179             functionMap.put("categoryFromKey", new Transform<String, String>() {
1180                 Map<String, String> fixNames = Builder.with(new HashMap<String, String>())
1181                     .put("lb", "Line Break")
1182                     .put("hc", "Hour Cycle")
1183                     .put("ms", "Measurement System")
1184                     .put("cf", "Currency Format")
1185                     .freeze();
1186 
1187                 public String transform(String source) {
1188                     String fixedName = fixNames.get(source);
1189                     return fixedName != null ? fixedName : source;
1190                 }
1191             });
1192             functionMap.put("languageSection", new Transform<String, String>() {
1193                 char[] languageRangeStartPoints = { 'A', 'E', 'K', 'O', 'T' };
1194                 char[] languageRangeEndPoints = { 'D', 'J', 'N', 'S', 'Z' };
1195 
1196                 public String transform(String source0) {
1197                     char firstLetter = getEnglishFirstLetter(source0).charAt(0);
1198                     for (int i = 0; i < languageRangeStartPoints.length; i++) {
1199                         if (firstLetter >= languageRangeStartPoints[i] && firstLetter <= languageRangeEndPoints[i]) {
1200                             return "Languages (" + Character.toUpperCase(languageRangeStartPoints[i]) + "-" + Character.toUpperCase(languageRangeEndPoints[i])
1201                             + ")";
1202                         }
1203                     }
1204                     return "Languages";
1205                 }
1206             });
1207             functionMap.put("firstLetter", new Transform<String, String>() {
1208                 public String transform(String source0) {
1209                     return getEnglishFirstLetter(source0);
1210                 }
1211             });
1212             functionMap.put("languageSort", new Transform<String, String>() {
1213                 public String transform(String source0) {
1214                     String languageOnlyPart;
1215                     int underscorePos = source0.indexOf("_");
1216                     if (underscorePos > 0) {
1217                         languageOnlyPart = source0.substring(0, underscorePos);
1218                     } else {
1219                         languageOnlyPart = source0;
1220                     }
1221 
1222                     return englishFile.getName(CLDRFile.LANGUAGE_NAME, languageOnlyPart) + " \u25BA " + source0;
1223                 }
1224             });
1225             functionMap.put("scriptFromLanguage", new Transform<String, String>() {
1226                 public String transform(String source0) {
1227                     String language = hyphenSplitter.split(source0);
1228                     String script = likelySubtags.getLikelyScript(language);
1229                     if (script == null) {
1230                         script = likelySubtags.getLikelyScript(language);
1231                     }
1232                     String scriptName = englishFile.getName(CLDRFile.SCRIPT_NAME, script);
1233                     return "Languages in " + (script.equals("Hans") || script.equals("Hant") ? "Han Script"
1234                         : scriptName.endsWith(" Script") ? scriptName
1235                             : scriptName + " Script");
1236                 }
1237             });
1238             functionMap.put("categoryFromTerritory",
1239                 catFromTerritory = new Transform<String, String>() {
1240                 public String transform(String source) {
1241                     String territory = getSubdivisionsTerritory(source, null);
1242                     String container = Containment.getContainer(territory);
1243                     order = Containment.getOrder(territory);
1244                     return englishFile.getName(CLDRFile.TERRITORY_NAME, container);
1245                 }
1246             });
1247             functionMap.put("territorySection", new Transform<String, String>() {
1248                 final Set<String> specialRegions = new HashSet<String>(Arrays.asList("EZ", "EU", "QO", "UN", "ZZ"));
1249 
1250                 public String transform(String source0) {
1251                     // support subdivisions
1252                     String theTerritory = getSubdivisionsTerritory(source0, null);
1253                     try {
1254                         if (specialRegions.contains(theTerritory)
1255                             || theTerritory.charAt(0) < 'A' && Integer.valueOf(theTerritory) > 0) {
1256                             return "Geographic Regions";
1257                         }
1258                     } catch (NumberFormatException ex) {
1259                     }
1260                     String theContinent = Containment.getContinent(theTerritory);
1261                     String theSubContinent;
1262                     switch (theContinent) { // was Integer.valueOf
1263                     case "019": // Americas - For the territorySection, we just group North America & South America
1264                         final String subcontinent = Containment.getSubcontinent(theTerritory);
1265                         theSubContinent = subcontinent.equals("005") ? "005" : "003"; // was Integer.valueOf(subcontinent) == 5
1266                         return "Territories (" + englishFile.getName(CLDRFile.TERRITORY_NAME, theSubContinent) + ")";
1267                     case "001":
1268                     case "ZZ":
1269                         return "Geographic Regions"; // not in containment
1270                     default:
1271                         return "Territories (" + englishFile.getName(CLDRFile.TERRITORY_NAME, theContinent) + ")";
1272                     }
1273                 }
1274             });
1275             functionMap.put("categoryFromTimezone",
1276                 catFromTimezone = new Transform<String, String>() {
1277                 public String transform(String source0) {
1278                     String territory = Containment.getRegionFromZone(source0);
1279                     if (territory == null) {
1280                         territory = "ZZ";
1281                     }
1282                     return catFromTerritory.transform(territory);
1283                 }
1284             });
1285             functionMap.put("timeZonePage", new Transform<String, String>() {
1286                 Set<String> singlePageTerritories = new HashSet<String>(Arrays.asList("AQ", "RU", "ZZ"));
1287 
1288                 public String transform(String source0) {
1289                     String theTerritory = Containment.getRegionFromZone(source0);
1290                     if (theTerritory == null || theTerritory == "001") {
1291                         theTerritory = "ZZ";
1292                     }
1293                     if (singlePageTerritories.contains(theTerritory)) {
1294                         return englishFile.getName(CLDRFile.TERRITORY_NAME, theTerritory);
1295                     }
1296                     String theContinent = Containment.getContinent(theTerritory);
1297                     final String subcontinent = Containment.getSubcontinent(theTerritory);
1298                     String theSubContinent;
1299                     switch (Integer.valueOf(theContinent)) {
1300                     case 9: // Oceania - For the timeZonePage, we group Australasia on one page, and the rest of Oceania on the other.
1301                         try {
1302                             theSubContinent = subcontinent.equals("053") ? "053" : "009"; // was Integer.valueOf(subcontinent) == 53
1303                         } catch (NumberFormatException ex) {
1304                             theSubContinent = "009";
1305                         }
1306                         return englishFile.getName(CLDRFile.TERRITORY_NAME, theSubContinent);
1307                     case 19: // Americas - For the timeZonePage, we just group North America & South America
1308                         theSubContinent = Integer.valueOf(subcontinent) == 5 ? "005" : "003";
1309                         return englishFile.getName(CLDRFile.TERRITORY_NAME, theSubContinent);
1310                     case 142: // Asia
1311                         return englishFile.getName(CLDRFile.TERRITORY_NAME, subcontinent);
1312                     default:
1313                         return englishFile.getName(CLDRFile.TERRITORY_NAME, theContinent);
1314                     }
1315                 }
1316             });
1317 
1318             functionMap.put("timezoneSorting", new Transform<String, String>() {
1319                 public String transform(String source) {
1320                     final List<String> codeValues = Arrays.asList(
1321                         "generic-long",
1322                         "generic-short",
1323                         "standard-long",
1324                         "standard-short",
1325                         "daylight-long",
1326                         "daylight-short");
1327                     if (codeValues.contains(source)) {
1328                         order = codeValues.indexOf(source);
1329                     } else {
1330                         order = codeValues.size();
1331                     }
1332                     return source;
1333                 }
1334             });
1335 
1336             functionMap.put("tzdpField", new Transform<String, String>() {
1337                 public String transform(String source) {
1338                     Map<String, String> fieldNames = Builder.with(new HashMap<String, String>())
1339                         .put("regionFormat", "Region Format - Generic")
1340                         .put("regionFormat-standard", "Region Format - Standard")
1341                         .put("regionFormat-daylight", "Region Format - Daylight")
1342                         .put("gmtFormat", "GMT Format")
1343                         .put("hourFormat", "GMT Hours/Minutes Format")
1344                         .put("gmtZeroFormat", "GMT Zero Format")
1345                         .put("fallbackFormat", "Location Fallback Format")
1346                         .freeze();
1347                     final List<String> fieldOrder = Arrays.asList(
1348                         "regionFormat",
1349                         "regionFormat-standard",
1350                         "regionFormat-daylight",
1351                         "gmtFormat",
1352                         "hourFormat",
1353                         "gmtZeroFormat",
1354                         "fallbackFormat");
1355 
1356                     if (fieldOrder.contains(source)) {
1357                         order = fieldOrder.indexOf(source);
1358                     } else {
1359                         order = fieldOrder.size();
1360                     }
1361 
1362                     String result = fieldNames.get(source);
1363                     return result == null ? source : result;
1364                 }
1365             });
1366             functionMap.put("unit", new Transform<String, String>() {
1367                 public String transform(String source) {
1368                     int m = unitOrder.indexOf(source);
1369                     order = m;
1370                     return source.substring(source.indexOf('-') + 1);
1371                 }
1372             });
1373 
1374             functionMap.put("numericSort", new Transform<String, String>() {
1375                 // Probably only works well for small values, like -5 through +4.
1376                 public String transform(String source) {
1377                     Integer pos = Integer.valueOf(source) + 5;
1378                     suborder = new SubstringOrder(pos.toString());
1379                     return source;
1380                 }
1381             });
1382 
1383             functionMap.put("metazone", new Transform<String, String>() {
1384 
1385                 public String transform(String source) {
1386                     if (PathHeader.UNIFORM_CONTINENTS) {
1387                         String container = getMetazonePageTerritory(source);
1388                         order = Containment.getOrder(container);
1389                         return englishFile.getName(CLDRFile.TERRITORY_NAME, container);
1390                     } else {
1391                         String continent = metazoneToContinent.get(source);
1392                         if (continent == null) {
1393                             continent = "UnknownT";
1394                         }
1395                         return continent;
1396                     }
1397                 }
1398             });
1399 
1400             Object[][] ctto = {
1401                 { "BUK", "MM" },
1402                 { "CSD", "RS" },
1403                 { "CSK", "CZ" },
1404                 { "DDM", "DE" },
1405                 { "EUR", "ZZ" },
1406                 { "RHD", "ZW" },
1407                 { "SUR", "RU" },
1408                 { "TPE", "TL" },
1409                 { "XAG", "ZZ" },
1410                 { "XAU", "ZZ" },
1411                 { "XBA", "ZZ" },
1412                 { "XBB", "ZZ" },
1413                 { "XBC", "ZZ" },
1414                 { "XBD", "ZZ" },
1415                 { "XDR", "ZZ" },
1416                 { "XEU", "ZZ" },
1417                 { "XFO", "ZZ" },
1418                 { "XFU", "ZZ" },
1419                 { "XPD", "ZZ" },
1420                 { "XPT", "ZZ" },
1421                 { "XRE", "ZZ" },
1422                 { "XSU", "ZZ" },
1423                 { "XTS", "ZZ" },
1424                 { "XUA", "ZZ" },
1425                 { "XXX", "ZZ" },
1426                 { "YDD", "YE" },
1427                 { "YUD", "RS" },
1428                 { "YUM", "RS" },
1429                 { "YUN", "RS" },
1430                 { "YUR", "RS" },
1431                 { "ZRN", "CD" },
1432                 { "ZRZ", "CD" },
1433             };
1434 
1435             Object[][] sctc = {
1436                 { "Northern America", "North America (C)" },
1437                 { "Central America", "North America (C)" },
1438                 { "Caribbean", "North America (C)" },
1439                 { "South America", "South America (C)" },
1440                 { "Northern Africa", "Northern Africa" },
1441                 { "Western Africa", "Western Africa" },
1442                 { "Middle Africa", "Middle Africa" },
1443                 { "Eastern Africa", "Eastern Africa" },
1444                 { "Southern Africa", "Southern Africa" },
1445                 { "Europe", "Northern/Western Europe" },
1446                 { "Northern Europe", "Northern/Western Europe" },
1447                 { "Western Europe", "Northern/Western Europe" },
1448                 { "Eastern Europe", "Southern/Eastern Europe" },
1449                 { "Southern Europe", "Southern/Eastern Europe" },
1450                 { "Western Asia", "Western Asia (C)" },
1451                 { "Central Asia", "Central Asia (C)" },
1452                 { "Eastern Asia", "Eastern Asia (C)" },
1453                 { "Southern Asia", "Southern Asia (C)" },
1454                 { "Southeast Asia", "Southeast Asia (C)" },
1455                 { "Australasia", "Oceania (C)" },
1456                 { "Melanesia", "Oceania (C)" },
1457                 { "Micronesian Region", "Oceania (C)" }, // HACK
1458                 { "Polynesia", "Oceania (C)" },
1459                 { "Unknown Region", "Unknown Region (C)" },
1460             };
1461 
1462             final Map<String, String> currencyToTerritoryOverrides = CldrUtility.asMap(ctto);
1463             final Map<String, String> subContinentToContinent = CldrUtility.asMap(sctc);
1464             final Set<String> fundCurrencies = new HashSet<String>(Arrays.asList("CHE", "CHW", "CLF", "COU", "ECV", "MXV", "USN", "USS", "UYI", "XEU", "ZAL"));
1465             final Set<String> offshoreCurrencies = new HashSet<String>(Arrays.asList("CNH"));
1466             // TODO: Put this into supplementalDataInfo ?
1467 
1468             functionMap.put("categoryFromCurrency", new Transform<String, String>() {
1469                 public String transform(String source0) {
1470                     String tenderOrNot = "";
1471                     String territory = likelySubtags.getLikelyTerritoryFromCurrency(source0);
1472                     if (territory == null) {
1473                         String tag;
1474                         if (fundCurrencies.contains(source0)) {
1475                             tag = " (fund)";
1476                         } else if (offshoreCurrencies.contains(source0)) {
1477                             tag = " (offshore)";
1478                         } else {
1479                             tag = " (old)";
1480                         }
1481                         tenderOrNot = ": " + source0 + tag;
1482                     }
1483                     if (currencyToTerritoryOverrides.keySet().contains(source0)) {
1484                         territory = currencyToTerritoryOverrides.get(source0);
1485                     } else if (territory == null) {
1486                         territory = source0.substring(0, 2);
1487                     }
1488 
1489                     if (territory.equals("ZZ")) {
1490                         order = 999;
1491                         return englishFile.getName(CLDRFile.TERRITORY_NAME, territory) + ": " + source0;
1492                     } else {
1493                         return catFromTerritory.transform(territory) + ": "
1494                             + englishFile.getName(CLDRFile.TERRITORY_NAME, territory)
1495                             + tenderOrNot;
1496                     }
1497                 }
1498             });
1499             functionMap.put("continentFromCurrency", new Transform<String, String>() {
1500                 public String transform(String source0) {
1501                     String subContinent;
1502                     String territory = likelySubtags.getLikelyTerritoryFromCurrency(source0);
1503                     if (currencyToTerritoryOverrides.keySet().contains(source0)) {
1504                         territory = currencyToTerritoryOverrides.get(source0);
1505                     } else if (territory == null) {
1506                         territory = source0.substring(0, 2);
1507                     }
1508 
1509                     if (territory.equals("ZZ")) {
1510                         order = 999;
1511                         subContinent = englishFile.getName(CLDRFile.TERRITORY_NAME, territory);
1512                     } else {
1513                         subContinent = catFromTerritory.transform(territory);
1514                     }
1515 
1516                     String result = subContinentToContinent.get(subContinent); //the continent is the last word in the territory representation
1517                     return result;
1518                 }
1519             });
1520             functionMap.put("numberingSystem", new Transform<String, String>() {
1521                 public String transform(String source0) {
1522                     if ("latn".equals(source0)) {
1523                         return "";
1524                     }
1525                     String displayName = englishFile.getStringValue("//ldml/localeDisplayNames/types/type[@key=\"numbers\"][@type=\""
1526                         + source0 + "\"]");
1527                     return "using " + (displayName == null ? source0 : displayName + " (" + source0 + ")");
1528                 }
1529             });
1530 
1531             functionMap.put("datefield", new Transform<String, String>() {
1532                 private final String[] datefield = {
1533                     "era", "era-short", "era-narrow",
1534                     "century", "century-short", "century-narrow",
1535                     "year", "year-short", "year-narrow",
1536                     "quarter", "quarter-short", "quarter-narrow",
1537                     "month", "month-short", "month-narrow",
1538                     "week", "week-short", "week-narrow",
1539                     "weekOfMonth", "weekOfMonth-short", "weekOfMonth-narrow",
1540                     "day", "day-short", "day-narrow",
1541                     "dayOfYear", "dayOfYear-short", "dayOfYear-narrow",
1542                     "weekday", "weekday-short", "weekday-narrow",
1543                     "weekdayOfMonth", "weekdayOfMonth-short", "weekdayOfMonth-narrow",
1544                     "dayperiod", "dayperiod-short", "dayperiod-narrow",
1545                     "zone", "zone-short", "zone-narrow",
1546                     "hour", "hour-short", "hour-narrow",
1547                     "minute", "minute-short", "minute-narrow",
1548                     "second", "second-short", "second-narrow",
1549                     "millisecond", "millisecond-short", "millisecond-narrow",
1550                     "microsecond", "microsecond-short", "microsecond-narrow",
1551                     "nanosecond", "nanosecond-short", "nanosecond-narrow",
1552 
1553                 };
1554 
1555                 public String transform(String source) {
1556                     order = getIndex(source, datefield);
1557                     return source;
1558                 }
1559             });
1560             // //ldml/dates/fields/field[@type="%A"]/relative[@type="%A"]
1561             functionMap.put("relativeDate", new Transform<String, String>() {
1562                 private final String[] relativeDateField = {
1563                     "year", "year-short", "year-narrow",
1564                     "quarter", "quarter-short", "quarter-narrow",
1565                     "month", "month-short", "month-narrow",
1566                     "week", "week-short", "week-narrow",
1567                     "day", "day-short", "day-narrow",
1568                     "hour", "hour-short", "hour-narrow",
1569                     "minute", "minute-short", "minute-narrow",
1570                     "second", "second-short", "second-narrow",
1571                     "sun", "sun-short", "sun-narrow",
1572                     "mon", "mon-short", "mon-narrow",
1573                     "tue", "tue-short", "tue-narrow",
1574                     "wed", "wed-short", "wed-narrow",
1575                     "thu", "thu-short", "thu-narrow",
1576                     "fri", "fri-short", "fri-narrow",
1577                     "sat", "sat-short", "sat-narrow",
1578                 };
1579                 private final String[] longNames = {
1580                     "Year", "Year Short", "Year Narrow",
1581                     "Quarter", "Quarter Short", "Quarter Narrow",
1582                     "Month", "Month Short", "Month Narrow",
1583                     "Week", "Week Short", "Week Narrow",
1584                     "Day", "Day Short", "Day Narrow",
1585                     "Hour", "Hour Short", "Hour Narrow",
1586                     "Minute", "Minute Short", "Minute Narrow",
1587                     "Second", "Second Short", "Second Narrow",
1588                     "Sunday", "Sunday Short", "Sunday Narrow",
1589                     "Monday", "Monday Short", "Monday Narrow",
1590                     "Tuesday", "Tuesday Short", "Tuesday Narrow",
1591                     "Wednesday", "Wednesday Short", "Wednesday Narrow",
1592                     "Thursday", "Thursday Short", "Thursday Narrow",
1593                     "Friday", "Friday Short", "Friday Narrow",
1594                     "Saturday", "Saturday Short", "Saturday Narrow",
1595                 };
1596 
1597                 public String transform(String source) {
1598                     order = getIndex(source, relativeDateField) + 100;
1599                     return "Relative " + longNames[getIndex(source, relativeDateField)];
1600                 }
1601             });
1602             // Sorts numberSystem items (except for decimal formats).
1603             functionMap.put("number", new Transform<String, String>() {
1604                 private final String[] symbols = { "decimal", "group",
1605                     "plusSign", "minusSign", "percentSign", "perMille",
1606                     "exponential", "superscriptingExponent",
1607                     "infinity", "nan", "list", "currencies"
1608                 };
1609 
1610                 public String transform(String source) {
1611                     String[] parts = source.split("-");
1612                     order = getIndex(parts[0], symbols);
1613                     // e.g. "currencies-one"
1614                     if (parts.length > 1) {
1615                         suborder = new SubstringOrder(parts[1]);
1616                     }
1617                     return source;
1618                 }
1619             });
1620             functionMap.put("numberFormat", new Transform<String, String>() {
1621                 public String transform(String source) {
1622                     final List<String> fieldOrder = Arrays.asList(
1623                         "standard-decimal",
1624                         "standard-currency",
1625                         "standard-currency-accounting",
1626                         "standard-percent",
1627                         "standard-scientific");
1628 
1629                     if (fieldOrder.contains(source)) {
1630                         order = fieldOrder.indexOf(source);
1631                     } else {
1632                         order = fieldOrder.size();
1633                     }
1634 
1635                     return source;
1636                 }
1637             });
1638 
1639             functionMap.put("localePattern", new Transform<String, String>() {
1640                 public String transform(String source) {
1641                     // Put localeKeyTypePattern behind localePattern and
1642                     // localeSeparator.
1643                     if (source.equals("localeKeyTypePattern")) {
1644                         order = 10;
1645                     }
1646                     return source;
1647                 }
1648             });
1649             functionMap.put("listOrder", new Transform<String, String>() {
1650                 private String[] listParts = { "2", "start", "middle", "end" };
1651 
1652                 @Override
1653                 public String transform(String source) {
1654                     order = getIndex(source, listParts);
1655                     return source;
1656                 }
1657             });
1658             functionMap.put("alphaOrder", new Transform<String, String>() {
1659                 @Override
1660                 public String transform(String source) {
1661                     order = 0;
1662                     return source;
1663                 }
1664             });
1665             functionMap.put("transform", new Transform<String, String>() {
1666                 Splitter commas = Splitter.on(',').trimResults();
1667 
1668                 @Override
1669                 public String transform(String source) {
1670                     List<String> parts = commas.splitToList(source);
1671                     return parts.get(1)
1672                         + (parts.get(0).equals("both") ? "↔︎" : "→")
1673                         + parts.get(2)
1674                         + (parts.size() > 3 ? "/" + parts.get(3) : "");
1675                 }
1676             });
1677             functionMap.put("major", new Transform<String, String>() {
1678                 @Override
1679                 public String transform(String source) {
1680                     String major = Emoji.getMajorCategory(source);
1681                     // check that result is reasonable by running through PageId.
1682                     switch(major) {
1683                     default:
1684                         PageId pageId2 = PageId.forString(major);
1685                         if (pageId2.getSectionId() != SectionId.Characters) {
1686                             if (pageId2 == PageId.Symbols) {
1687                                 pageId2 = PageId.Symbols2;
1688                             }
1689                         }
1690                         return pageId2.toString();
1691                     case "Smileys & People":
1692                         String minorCat = Emoji.getMinorCategory(source);
1693                         if (minorCat.equals("skin-tone") || minorCat.equals("hair-style")) {
1694                             return PageId.Component.toString();
1695                         } else if (!minorCat.contains("face")) {
1696                             return PageId.People.toString();
1697                         } else {
1698                             return PageId.Smileys.toString();
1699                         }
1700                     }
1701                 }
1702             });
1703             functionMap.put("minor", new Transform<String, String>() {
1704                 @Override
1705                 public String transform(String source) {
1706                     String minorCat = Emoji.getMinorCategory(source);
1707                     order = Emoji.getMinorToOrder(minorCat);
1708                     return minorCat;
1709                 }
1710             });
1711 
1712         }
1713 
1714         private static int getIndex(String item, String[] array) {
1715             for (int i = 0; i < array.length; i++) {
1716                 if (item.equals(array[i])) {
1717                     return i;
1718                 }
1719             }
1720             return -1;
1721         }
1722 
1723         private static String getEnglishFirstLetter(String s) {
1724             String languageOnlyPart;
1725             int underscorePos = s.indexOf("_");
1726             if (underscorePos > 0) {
1727                 languageOnlyPart = s.substring(0, underscorePos);
1728             } else {
1729                 languageOnlyPart = s;
1730             }
1731             return englishFile.getName(CLDRFile.LANGUAGE_NAME, languageOnlyPart).substring(0, 1).toUpperCase();
1732         }
1733 
1734         static class HyphenSplitter {
1735             String main;
1736             String extras;
1737 
1738             String split(String source) {
1739                 int hyphenPos = source.indexOf('-');
1740                 if (hyphenPos < 0) {
1741                     main = source;
1742                     extras = "";
1743                 } else {
1744                     main = source.substring(0, hyphenPos);
1745                     extras = source.substring(hyphenPos);
1746                 }
1747                 return main;
1748             }
1749         }
1750 
1751         /**
1752          * This converts "functions", like &month, and sets the order.
1753          *
1754          * @param input
1755          * @param order
1756          * @return
1757          */
1758         private static String fix(String input, int orderIn) {
1759             if (input.contains("��")) {
1760                 int debug = 0;
1761             }
1762             String oldInput = input;
1763             input = RegexLookup.replace(input, args.value);
1764             order = orderIn;
1765             suborder = null;
1766             int pos = 0;
1767             while (true) {
1768                 int functionStart = input.indexOf('&', pos);
1769                 if (functionStart < 0) {
1770                     return input;
1771                 }
1772                 int functionEnd = input.indexOf('(', functionStart);
1773                 int argEnd = input.indexOf(')', functionEnd);
1774                 Transform<String, String> func = functionMap.get(input.substring(functionStart + 1,
1775                     functionEnd));
1776                 final String arg = input.substring(functionEnd + 1, argEnd);
1777                 String temp = func.transform(arg);
1778                 if (temp == null) {
1779                     func.transform(arg);
1780                     throw new IllegalArgumentException("Function returns invalid results for «" + arg + "».");
1781                 }
1782                 input = input.substring(0, functionStart) + temp + input.substring(argEnd + 1);
1783                 pos = functionStart + temp.length();
1784             }
1785         }
1786 
1787         /**
1788          * Collect all the paths for a CLDRFile, and make sure that they have
1789          * cached PathHeaders
1790          *
1791          * @param file
1792          * @return immutable set of paths in the file
1793          */
1794         public Set<String> pathsForFile(CLDRFile file) {
1795             // make sure we cache all the path headers
1796             Set<String> filePaths = CollectionUtilities.addAll(file.fullIterable().iterator(), new HashSet<String>());
1797             for (String path : filePaths) {
1798                 try {
1799                     fromPath(path); // call to make sure cached
1800                 } catch (Throwable t) {
1801                     // ... some other exception
1802                 }
1803             }
1804             return Collections.unmodifiableSet(filePaths);
1805         }
1806 
1807         /**
1808          * Returns those regexes that were never matched.
1809          * @return
1810          */
1811         public Set<String> getUnmatchedRegexes() {
1812             Map<String, RawData> outputUnmatched = new LinkedHashMap<String, RawData>();
1813             lookup.getUnmatchedPatterns(matchersFound, outputUnmatched);
1814             return outputUnmatched.keySet();
1815         }
1816 
1817         public String getRegexInfo() {
1818             return lookup.toString();
1819         }
1820     }
1821 
1822     /**
1823      * Return the territory used for the title of the Metazone page in the
1824      * Survey Tool.
1825      *
1826      * @param source
1827      * @return
1828      */
1829     public static String getMetazonePageTerritory(String source) {
1830         String result = metazoneToPageTerritory.get(source);
1831         return result == null ? "ZZ" : result;
1832     }
1833 
1834     private static final List<String> COUNTS = Arrays.asList("displayName", "zero", "one", "two", "few", "many", "other", "per");
1835 
1836     private static int alphabeticCompare(String aa, String bb) {
1837         // A frozen Collator is thread-safe.
1838         return alphabetic.compare(aa, bb);
1839     }
1840 
1841     public enum BaseUrl {
1842         //http://st.unicode.org/smoketest/survey?_=af&strid=55053dffac611328
1843         //http://st.unicode.org/cldr-apps/survey?_=en&strid=3cd31261bf6738e1
1844         SMOKE("http://st.unicode.org/smoketest/survey"), PRODUCTION("http://st.unicode.org/cldr-apps/survey");
1845         final String base;
1846 
1847         private BaseUrl(String url) {
1848             base = url;
1849         }
1850     }
1851 
1852     /**
1853      * @deprecated, use CLDRConfig.urls().forPathHeader() instead.
1854      * @param baseUrl
1855      * @param locale
1856      * @return
1857      */
1858     public String getUrl(BaseUrl baseUrl, String locale) {
1859         return getUrl(baseUrl.base, locale);
1860     }
1861 
1862     /**
1863      * @deprecated, use CLDRConfig.urls().forPathHeader() instead.
1864      * @param baseUrl
1865      * @param locale
1866      * @return
1867      */
1868     public String getUrl(String baseUrl, String locale) {
1869         return getUrl(baseUrl, locale, getOriginalPath());
1870     }
1871 
1872     /**
1873      * Map http://st.unicode.org/smoketest/survey  to http://st.unicode.org/smoketest etc
1874      * @param str
1875      * @return
1876      */
1877     public static String trimLast(String str) {
1878         int n = str.lastIndexOf('/');
1879         if (n == -1) return "";
1880         return str.substring(0, n + 1);
1881     }
1882 
1883     /**
1884      * @deprecated use CLDRConfig.urls()
1885      * @param baseUrl
1886      * @param locale
1887      * @param path
1888      * @return
1889      */
1890     public static String getUrl(String baseUrl, String locale, String path) {
1891         return trimLast(baseUrl) + "v#/" + locale + "//" + StringId.getHexId(path);
1892     }
1893 
1894     // eg http://st.unicode.org/cldr-apps/survey?_=fr&x=Locale%20Name%20Patterns
1895     /**
1896      * @deprecated use CLDRConfig.urls()
1897      * @param baseUrl
1898      * @param locale
1899      * @param subsection
1900      * @return
1901      */
1902     public static String getPageUrl(String baseUrl, String locale, PageId subsection) {
1903         return trimLast(baseUrl) + "v#/" + locale + "/" + subsection + "/";
1904     }
1905 
1906     /**
1907      * @deprecated use CLDRConfig.urls()
1908      * @param baseUrl
1909      * @param file
1910      * @param path
1911      * @return
1912      */
1913     public static String getLinkedView(String baseUrl, CLDRFile file, String path) {
1914         String value = file.getStringValue(path);
1915         if (value == null) {
1916             return null;
1917         }
1918         return SECTION_LINK + PathHeader.getUrl(baseUrl, file.getLocaleID(), path) + "'><em>view</em></a>";
1919     }
1920 
1921     /**
1922      * If a subdivision, return the (uppercased) territory and if suffix != null, the suffix. Otherwise return the input as is.
1923      * @param input
1924      * @param suffix
1925      * @return
1926      */
1927     private static String getSubdivisionsTerritory(String input, Output<String> suffix) {
1928         String theTerritory;
1929         if (StandardCodes.LstrType.subdivision.isWellFormed(input)) {
1930             int territoryEnd = input.charAt(0) < 'A' ? 3 : 2;
1931             theTerritory = input.substring(0, territoryEnd).toUpperCase(Locale.ROOT);
1932             if (suffix != null) {
1933                 suffix.value = input.substring(territoryEnd);
1934             }
1935         } else {
1936             theTerritory = input;
1937             if (suffix != null) {
1938                 suffix.value = "";
1939             }
1940         }
1941         return theTerritory;
1942     }
1943 }
1944