1 /*
2  *******************************************************************************
3  * Copyright (C) 2011-2015, International Business Machines Corporation        *
4  * All Rights Reserved.                                                        *
5  *******************************************************************************
6  */
7 package com.ibm.icu.util;
8 
9 import java.util.ArrayList;
10 import java.util.Arrays;
11 import java.util.Collections;
12 import java.util.HashMap;
13 import java.util.List;
14 import java.util.Map;
15 import java.util.Set;
16 import java.util.TreeSet;
17 
18 import com.ibm.icu.impl.ICUResourceBundle;
19 
20 /**
21  * <code>Region</code> is the class representing a Unicode Region Code, also known as a
22  * Unicode Region Subtag, which is defined based upon the BCP 47 standard. We often think of
23  * "regions" as "countries" when defining the characteristics of a locale.  Region codes There are different
24  * types of region codes that are important to distinguish.
25  * <p>
26  *  Macroregion - A code for a "macro geographical (continental) region, geographical sub-region, or
27  *  selected economic and other grouping" as defined in
28  *  UN M.49 (http://unstats.un.org/unsd/methods/m49/m49regin.htm).
29  *  These are typically 3-digit codes, but contain some 2-letter codes, such as the LDML code QO
30  *  added for Outlying Oceania.  Not all UNM.49 codes are defined in LDML, but most of them are.
31  *  Macroregions are represented in ICU by one of three region types: WORLD ( region code 001 ),
32  *  CONTINENTS ( regions contained directly by WORLD ), and SUBCONTINENTS ( things contained directly
33  *  by a continent ).
34  *  <p>
35  *  TERRITORY - A Region that is not a Macroregion. These are typically codes for countries, but also
36  *  include areas that are not separate countries, such as the code "AQ" for Antarctica or the code
37  *  "HK" for Hong Kong (SAR China). Overseas dependencies of countries may or may not have separate
38  *  codes. The codes are typically 2-letter codes aligned with the ISO 3166 standard, but BCP47 allows
39  *  for the use of 3-digit codes in the future.
40  *  <p>
41  *  UNKNOWN - The code ZZ is defined by Unicode LDML for use to indicate that the Region is unknown,
42  *  or that the value supplied as a region was invalid.
43  *  <p>
44  *  DEPRECATED - Region codes that have been defined in the past but are no longer in modern usage,
45  *  usually due to a country splitting into multiple territories or changing its name.
46  *  <p>
47  *  GROUPING - A widely understood grouping of territories that has a well defined membership such
48  *  that a region code has been assigned for it.  Some of these are UNM.49 codes that do't fall into
49  *  the world/continent/sub-continent hierarchy, while others are just well known groupings that have
50  *  their own region code. Region "EU" (European Union) is one such region code that is a grouping.
51  *  Groupings will never be returned by the getContainingRegion() API, since a different type of region
52  *  ( WORLD, CONTINENT, or SUBCONTINENT ) will always be the containing region instead.
53  *
54  * @author       John Emmons
55  * @stable ICU 50
56  */
57 
58 public class Region implements Comparable<Region> {
59 
60     /**
61      * RegionType is an enumeration defining the different types of regions.  Current possible
62      * values are WORLD, CONTINENT, SUBCONTINENT, TERRITORY, GROUPING, DEPRECATED, and UNKNOWN.
63      *
64      * @stable ICU 50
65      */
66 
67     public enum RegionType {
68         /**
69          * Type representing the unknown region.
70          * @stable ICU 50
71          */
72         UNKNOWN,
73 
74         /**
75          * Type representing a territory.
76          * @stable ICU 50
77          */
78         TERRITORY,
79 
80         /**
81          * Type representing the whole world.
82          * @stable ICU 50
83          */
84         WORLD,
85         /**
86          * Type representing a continent.
87          * @stable ICU 50
88          */
89         CONTINENT,
90         /**
91          * Type representing a sub-continent.
92          * @stable ICU 50
93          */
94         SUBCONTINENT,
95         /**
96          * Type representing a grouping of territories that is not to be used in
97          * the normal WORLD/CONTINENT/SUBCONTINENT/TERRITORY containment tree.
98          * @stable ICU 50
99          */
100         GROUPING,
101         /**
102          * Type representing a region whose code has been deprecated, usually
103          * due to a country splitting into multiple territories or changing its name.
104          * @stable ICU 50
105          */
106         DEPRECATED,
107     }
108 
109     private String id;
110     private int code;
111     private RegionType type;
112     private Region containingRegion = null;
113     private Set<Region> containedRegions = new TreeSet<Region>();
114     private List<Region> preferredValues = null;
115 
116     private static boolean regionDataIsLoaded = false;
117 
118     private static Map<String,Region> regionIDMap = null;       // Map from ID the regions
119     private static Map<Integer,Region> numericCodeMap = null;   // Map from numeric code to the regions
120     private static Map<String,Region> regionAliases = null;     // Aliases
121 
122     private static ArrayList<Region> regions = null;            // This is the main data structure where the Regions are stored.
123     private static ArrayList<Set<Region>> availableRegions = null;
124 
125     private static final String UNKNOWN_REGION_ID = "ZZ";
126     private static final String OUTLYING_OCEANIA_REGION_ID = "QO";
127     private static final String WORLD_ID = "001";
128 
129     /*
130      * Private default constructor.  Use factory methods only.
131      */
Region()132     private Region () {}
133 
134     /*
135      * Initializes the region data from the ICU resource bundles.  The region data
136      * contains the basic relationships such as which regions are known, what the numeric
137      * codes are, any known aliases, and the territory containment data.
138      *
139      * If the region data has already loaded, then this method simply returns without doing
140      * anything meaningful.
141      *
142      */
loadRegionData()143     private static synchronized void loadRegionData() {
144 
145         if ( regionDataIsLoaded ) {
146             return;
147         }
148 
149         regionAliases = new HashMap<String,Region>();
150         regionIDMap = new HashMap<String,Region>();
151         numericCodeMap = new HashMap<Integer,Region>();
152 
153         availableRegions = new ArrayList<Set<Region>>(RegionType.values().length);
154 
155 
156         UResourceBundle regionCodes = null;
157         UResourceBundle metadataAlias = null;
158         UResourceBundle territoryAlias = null;
159         UResourceBundle codeMappings = null;
160         UResourceBundle worldContainment = null;
161         UResourceBundle territoryContainment = null;
162         UResourceBundle groupingContainment = null;
163 
164         UResourceBundle metadata = UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME,"metadata",ICUResourceBundle.ICU_DATA_CLASS_LOADER);
165         regionCodes = metadata.get("regionCodes");
166         metadataAlias = metadata.get("alias");
167         territoryAlias = metadataAlias.get("territory");
168 
169         UResourceBundle supplementalData = UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME,"supplementalData", ICUResourceBundle.ICU_DATA_CLASS_LOADER);
170         codeMappings = supplementalData.get("codeMappings");
171 
172         territoryContainment = supplementalData.get("territoryContainment");
173         worldContainment = territoryContainment.get("001");
174         groupingContainment = territoryContainment.get("grouping");
175 
176         String[] continentsArr = worldContainment.getStringArray();
177         List<String> continents = Arrays.asList(continentsArr);
178         String[] groupingArr = groupingContainment.getStringArray();
179         List<String> groupings = Arrays.asList(groupingArr);
180 
181         // First process the region codes and create the master array of regions.
182         int regionCodeSize = regionCodes.getSize();
183         regions = new ArrayList<Region>(regionCodeSize);
184         for ( int i = 0 ; i < regionCodeSize ; i++ ) {
185             Region r = new Region();
186             String id = regionCodes.getString(i);
187             r.id = id;
188             r.type = RegionType.TERRITORY; // Only temporary - figure out the real type later once the aliases are known.
189             regionIDMap.put(id, r);
190             if ( id.matches("[0-9]{3}")) {
191                 r.code = Integer.valueOf(id).intValue();
192                 numericCodeMap.put(r.code, r);
193                 r.type = RegionType.SUBCONTINENT;
194             } else {
195                 r.code = -1;
196             }
197             regions.add(r);
198         }
199 
200 
201         // Process the territory aliases
202         for ( int i = 0 ; i < territoryAlias.getSize(); i++ ) {
203             UResourceBundle res = territoryAlias.get(i);
204             String aliasFrom = res.getKey();
205             String aliasTo = res.get("replacement").getString();
206 
207             if ( regionIDMap.containsKey(aliasTo) && !regionIDMap.containsKey(aliasFrom) ) { // This is just an alias from some string to a region
208                 regionAliases.put(aliasFrom, regionIDMap.get(aliasTo));
209             } else {
210                 Region r;
211                 if ( regionIDMap.containsKey(aliasFrom) ) {  // This is a deprecated region
212                     r = regionIDMap.get(aliasFrom);
213                 } else { // Deprecated region code not in the master codes list - so need to create a deprecated region for it.
214                     r = new Region();
215                     r.id = aliasFrom;
216                     regionIDMap.put(aliasFrom, r);
217                     if ( aliasFrom.matches("[0-9]{3}")) {
218                         r.code = Integer.valueOf(aliasFrom).intValue();
219                         numericCodeMap.put(r.code, r);
220                     } else {
221                         r.code = -1;
222                     }
223                     regions.add(r);
224                 }
225                 r.type = RegionType.DEPRECATED;
226                 List<String> aliasToRegionStrings = Arrays.asList(aliasTo.split(" "));
227                 r.preferredValues = new ArrayList<Region>();
228                 for ( String s : aliasToRegionStrings ) {
229                     if (regionIDMap.containsKey(s)) {
230                         r.preferredValues.add(regionIDMap.get(s));
231                     }
232                 }
233             }
234         }
235 
236         // Process the code mappings - This will allow us to assign numeric codes to most of the territories.
237         for ( int i = 0 ; i < codeMappings.getSize(); i++ ) {
238             UResourceBundle mapping = codeMappings.get(i);
239             if ( mapping.getType() == UResourceBundle.ARRAY ) {
240                 String [] codeMappingStrings = mapping.getStringArray();
241                 String codeMappingID = codeMappingStrings[0];
242                 Integer codeMappingNumber = Integer.valueOf(codeMappingStrings[1]);
243                 String codeMapping3Letter = codeMappingStrings[2];
244 
245                 if ( regionIDMap.containsKey(codeMappingID)) {
246                     Region r = regionIDMap.get(codeMappingID);
247                     r.code = codeMappingNumber.intValue();
248                     numericCodeMap.put(r.code, r);
249                     regionAliases.put(codeMapping3Letter, r);
250                 }
251             }
252         }
253 
254         // Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS
255         Region r;
256         if ( regionIDMap.containsKey(WORLD_ID)) {
257             r = regionIDMap.get(WORLD_ID);
258             r.type = RegionType.WORLD;
259         }
260 
261         if ( regionIDMap.containsKey(UNKNOWN_REGION_ID)) {
262             r = regionIDMap.get(UNKNOWN_REGION_ID);
263             r.type = RegionType.UNKNOWN;
264         }
265 
266         for ( String continent : continents ) {
267             if (regionIDMap.containsKey(continent)) {
268                 r = regionIDMap.get(continent);
269                 r.type = RegionType.CONTINENT;
270             }
271         }
272 
273         for ( String grouping : groupings ) {
274             if (regionIDMap.containsKey(grouping)) {
275                 r = regionIDMap.get(grouping);
276                 r.type = RegionType.GROUPING;
277             }
278         }
279 
280         // Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR
281         // even though it looks like a territory code.  Need to handle it here.
282 
283         if ( regionIDMap.containsKey(OUTLYING_OCEANIA_REGION_ID)) {
284             r = regionIDMap.get(OUTLYING_OCEANIA_REGION_ID);
285             r.type = RegionType.SUBCONTINENT;
286         }
287 
288         // Load territory containment info from the supplemental data.
289         for ( int i = 0 ; i < territoryContainment.getSize(); i++ ) {
290             UResourceBundle mapping = territoryContainment.get(i);
291             String parent = mapping.getKey();
292             if (parent.equals("containedGroupings") || parent.equals("deprecated")) {
293                 continue; // handle new pseudo-parent types added in ICU data per cldrbug 7808; for now just skip.
294                 // #11232 is to do something useful with these.
295             }
296             Region parentRegion = regionIDMap.get(parent);
297             for ( int j = 0 ; j < mapping.getSize(); j++ ) {
298                 String child = mapping.getString(j);
299                 Region childRegion = regionIDMap.get(child);
300                 if ( parentRegion != null && childRegion != null ) {
301 
302                     // Add the child region to the set of regions contained by the parent
303                     parentRegion.containedRegions.add(childRegion);
304 
305                     // Set the parent region to be the containing region of the child.
306                     // Regions of type GROUPING can't be set as the parent, since another region
307                     // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent.
308                     if ( parentRegion.getType() != RegionType.GROUPING) {
309                         childRegion.containingRegion = parentRegion;
310                     }
311                 }
312             }
313         }
314 
315         // Create the availableRegions lists
316 
317         for (int i = 0 ; i < RegionType.values().length ; i++) {
318             availableRegions.add(new TreeSet<Region>());
319         }
320 
321         for ( Region ar : regions ) {
322             Set<Region> currentSet = availableRegions.get(ar.type.ordinal());
323             currentSet.add(ar);
324             availableRegions.set(ar.type.ordinal(),currentSet);
325         }
326 
327         regionDataIsLoaded = true;
328     }
329 
330     /** Returns a Region using the given region ID.  The region ID can be either a 2-letter ISO code,
331      * 3-letter ISO code,  UNM.49 numeric code, or other valid Unicode Region Code as defined by the CLDR.
332      * @param id The id of the region to be retrieved.
333      * @return The corresponding region.
334      * @throws NullPointerException if the supplied id is null.
335      * @throws IllegalArgumentException if the supplied ID cannot be canonicalized to a Region ID that is known by ICU.
336      * @stable ICU 50
337      */
338 
getInstance(String id)339     public static Region getInstance(String id) {
340 
341         if ( id == null ) {
342             throw new NullPointerException();
343         }
344 
345         loadRegionData();
346 
347         Region r = regionIDMap.get(id);
348 
349         if ( r == null ) {
350             r = regionAliases.get(id);
351         }
352 
353         if ( r == null ) {
354             throw new IllegalArgumentException("Unknown region id: " + id);
355         }
356 
357         if ( r.type == RegionType.DEPRECATED && r.preferredValues.size() == 1) {
358             r = r.preferredValues.get(0);
359         }
360 
361         return r;
362     }
363 
364 
365     /** Returns a Region using the given numeric code as defined by UNM.49
366      * @param code The numeric code of the region to be retrieved.
367      * @return The corresponding region.
368      * @throws IllegalArgumentException if the supplied numeric code is not recognized.
369      * @stable ICU 50
370      */
371 
getInstance(int code)372     public static Region getInstance(int code) {
373 
374         loadRegionData();
375 
376         Region r = numericCodeMap.get(code);
377 
378         if ( r == null ) { // Just in case there's an alias that's numeric, try to find it.
379             String pad = "";
380             if ( code < 10 ) {
381                 pad = "00";
382             } else if ( code < 100 ) {
383                 pad = "0";
384             }
385             String id = pad + Integer.toString(code);
386             r = regionAliases.get(id);
387         }
388 
389         if ( r == null ) {
390             throw new IllegalArgumentException("Unknown region code: " + code);
391         }
392 
393         if ( r.type == RegionType.DEPRECATED && r.preferredValues.size() == 1) {
394             r = r.preferredValues.get(0);
395         }
396 
397         return r;
398     }
399 
400 
401     /** Used to retrieve all available regions of a specific type.
402      *
403      * @param type The type of regions to be returned ( TERRITORY, MACROREGION, etc. )
404      * @return An unmodifiable set of all known regions that match the given type.
405      * @stable ICU 50
406      */
407 
getAvailable(RegionType type)408     public static Set<Region> getAvailable(RegionType type) {
409 
410         loadRegionData();
411         return Collections.unmodifiableSet(availableRegions.get(type.ordinal()));
412     }
413 
414 
415     /** Used to determine the macroregion that geographically contains this region.
416      *
417      * @return The region that geographically contains this region.  Returns NULL if this region is
418      *  code "001" (World) or "ZZ" (Unknown region).  For example, calling this method with region "IT" (Italy)
419      *  returns the region "039" (Southern Europe).
420      * @stable ICU 50
421      */
422 
getContainingRegion()423     public Region getContainingRegion() {
424         loadRegionData();
425         return containingRegion;
426     }
427 
428     /** Used to determine the macroregion that geographically contains this region and that matches the given type.
429      *
430      * @return The region that geographically contains this region and matches the given type.  May return NULL if
431      *  no containing region can be found that matches the given type.  For example, calling this method with region "IT" (Italy)
432      *  and type CONTINENT returns the region "150" (Europe).
433      * @stable ICU 50
434      */
435 
getContainingRegion(RegionType type)436     public Region getContainingRegion(RegionType type) {
437         loadRegionData();
438         if ( containingRegion == null ) {
439             return null;
440         }
441         if ( containingRegion.type.equals(type)) {
442             return containingRegion;
443         } else {
444             return containingRegion.getContainingRegion(type);
445         }
446     }
447 
448     /** Used to determine the sub-regions that are contained within this region.
449      *
450      * @return An unmodifiable set containing all the regions that are immediate children
451      * of this region in the region hierarchy.  These returned regions could be either macro
452      * regions, territories, or a mixture of the two, depending on the containment data as defined
453      * in CLDR.  This API may return an empty set if this region doesn't have any sub-regions.
454      * For example, calling this method with region "150" (Europe) returns a set containing
455      * the various sub regions of Europe - "039" (Southern Europe) - "151" (Eastern Europe)
456      * - "154" (Northern Europe) and "155" (Western Europe).
457      *
458      * @stable ICU 50
459      */
460 
getContainedRegions()461     public Set<Region> getContainedRegions() {
462         loadRegionData();
463         return Collections.unmodifiableSet(containedRegions);
464     }
465 
466     /** Used to determine all the regions that are contained within this region and that match the given type
467      *
468      * @return An unmodifiable set containing all the regions that are children of this region
469      * anywhere in the region hierarchy and match the given type.  This API may return an empty set
470      * if this region doesn't have any sub-regions that match the given type.
471      * For example, calling this method with region "150" (Europe) and type "TERRITORY" returns a set
472      *  containing all the territories in Europe ( "FR" (France) - "IT" (Italy) - "DE" (Germany) etc. )
473      * @stable ICU 50
474      */
475 
getContainedRegions(RegionType type)476     public Set<Region> getContainedRegions(RegionType type) {
477 
478         loadRegionData();
479 
480         Set<Region> result = new TreeSet<Region>();
481         Set<Region> cr = getContainedRegions();
482 
483         for ( Region r : cr ) {
484             if ( r.getType() == type ) {
485                 result.add(r);
486             } else {
487                 result.addAll(r.getContainedRegions(type));
488             }
489         }
490         return Collections.unmodifiableSet(result);
491     }
492 
493     /**
494      * @return For deprecated regions, return an unmodifiable list of the regions that are the preferred replacement regions for this region.
495      * Returns null for a non-deprecated region.  For example, calling this method with region "SU" (Soviet Union) would
496      * return a list of the regions containing "RU" (Russia), "AM" (Armenia), "AZ" (Azerbaijan), etc...
497      *
498      * @stable ICU 50
499      */
getPreferredValues()500     public List<Region> getPreferredValues() {
501 
502         loadRegionData();
503 
504         if ( type == RegionType.DEPRECATED) {
505             return Collections.unmodifiableList(preferredValues);
506         } else {
507             return null;
508         }
509     }
510 
511     /**
512      * @return Returns true if this region contains the supplied other region anywhere in the region hierarchy.
513      *
514      * @stable ICU 50
515      */
contains(Region other)516     public boolean contains(Region other) {
517 
518         loadRegionData();
519 
520         if (containedRegions.contains(other)) {
521             return true;
522         } else {
523             for (Region cr : containedRegions) {
524                 if (cr.contains(other)) {
525                     return true;
526                 }
527             }
528         }
529 
530         return false;
531     }
532 
533     /** Returns the string representation of this region
534      *
535      * @return The string representation of this region, which is its ID.
536      *
537      * @stable ICU 50
538      */
539 
toString()540     public String toString() {
541         return id;
542     }
543 
544     /**
545      * Returns the numeric code for this region
546      *
547      * @return The numeric code for this region. Returns a negative value if the given region does not have a numeric
548      *         code assigned to it. This is a very rare case and only occurs for a few very small territories.
549      *
550      * @stable ICU 50
551      */
552 
getNumericCode()553     public int getNumericCode() {
554         return code;
555     }
556 
557     /** Returns this region's type.
558      *
559      * @return This region's type classification, such as MACROREGION or TERRITORY.
560      *
561      * @stable ICU 50
562      */
563 
getType()564     public RegionType getType() {
565         return type;
566     }
567 
568     /**
569      * {@inheritDoc}
570      * @stable ICU 50
571      */
compareTo(Region other)572     public int compareTo(Region other) {
573         return id.compareTo(other.id);
574     }
575 }
576