1 /* 2 ******************************************************************************* 3 * Copyright (C) 2011-2015, International Business Machines Corporation * 4 * All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package com.ibm.icu.util; 8 9 import java.util.ArrayList; 10 import java.util.Arrays; 11 import java.util.Collections; 12 import java.util.HashMap; 13 import java.util.List; 14 import java.util.Map; 15 import java.util.Set; 16 import java.util.TreeSet; 17 18 import com.ibm.icu.impl.ICUResourceBundle; 19 20 /** 21 * <code>Region</code> is the class representing a Unicode Region Code, also known as a 22 * Unicode Region Subtag, which is defined based upon the BCP 47 standard. We often think of 23 * "regions" as "countries" when defining the characteristics of a locale. Region codes There are different 24 * types of region codes that are important to distinguish. 25 * <p> 26 * Macroregion - A code for a "macro geographical (continental) region, geographical sub-region, or 27 * selected economic and other grouping" as defined in 28 * UN M.49 (http://unstats.un.org/unsd/methods/m49/m49regin.htm). 29 * These are typically 3-digit codes, but contain some 2-letter codes, such as the LDML code QO 30 * added for Outlying Oceania. Not all UNM.49 codes are defined in LDML, but most of them are. 31 * Macroregions are represented in ICU by one of three region types: WORLD ( region code 001 ), 32 * CONTINENTS ( regions contained directly by WORLD ), and SUBCONTINENTS ( things contained directly 33 * by a continent ). 34 * <p> 35 * TERRITORY - A Region that is not a Macroregion. These are typically codes for countries, but also 36 * include areas that are not separate countries, such as the code "AQ" for Antarctica or the code 37 * "HK" for Hong Kong (SAR China). Overseas dependencies of countries may or may not have separate 38 * codes. The codes are typically 2-letter codes aligned with the ISO 3166 standard, but BCP47 allows 39 * for the use of 3-digit codes in the future. 40 * <p> 41 * UNKNOWN - The code ZZ is defined by Unicode LDML for use to indicate that the Region is unknown, 42 * or that the value supplied as a region was invalid. 43 * <p> 44 * DEPRECATED - Region codes that have been defined in the past but are no longer in modern usage, 45 * usually due to a country splitting into multiple territories or changing its name. 46 * <p> 47 * GROUPING - A widely understood grouping of territories that has a well defined membership such 48 * that a region code has been assigned for it. Some of these are UNM.49 codes that do't fall into 49 * the world/continent/sub-continent hierarchy, while others are just well known groupings that have 50 * their own region code. Region "EU" (European Union) is one such region code that is a grouping. 51 * Groupings will never be returned by the getContainingRegion() API, since a different type of region 52 * ( WORLD, CONTINENT, or SUBCONTINENT ) will always be the containing region instead. 53 * 54 * @author John Emmons 55 * @stable ICU 50 56 */ 57 58 public class Region implements Comparable<Region> { 59 60 /** 61 * RegionType is an enumeration defining the different types of regions. Current possible 62 * values are WORLD, CONTINENT, SUBCONTINENT, TERRITORY, GROUPING, DEPRECATED, and UNKNOWN. 63 * 64 * @stable ICU 50 65 */ 66 67 public enum RegionType { 68 /** 69 * Type representing the unknown region. 70 * @stable ICU 50 71 */ 72 UNKNOWN, 73 74 /** 75 * Type representing a territory. 76 * @stable ICU 50 77 */ 78 TERRITORY, 79 80 /** 81 * Type representing the whole world. 82 * @stable ICU 50 83 */ 84 WORLD, 85 /** 86 * Type representing a continent. 87 * @stable ICU 50 88 */ 89 CONTINENT, 90 /** 91 * Type representing a sub-continent. 92 * @stable ICU 50 93 */ 94 SUBCONTINENT, 95 /** 96 * Type representing a grouping of territories that is not to be used in 97 * the normal WORLD/CONTINENT/SUBCONTINENT/TERRITORY containment tree. 98 * @stable ICU 50 99 */ 100 GROUPING, 101 /** 102 * Type representing a region whose code has been deprecated, usually 103 * due to a country splitting into multiple territories or changing its name. 104 * @stable ICU 50 105 */ 106 DEPRECATED, 107 } 108 109 private String id; 110 private int code; 111 private RegionType type; 112 private Region containingRegion = null; 113 private Set<Region> containedRegions = new TreeSet<Region>(); 114 private List<Region> preferredValues = null; 115 116 private static boolean regionDataIsLoaded = false; 117 118 private static Map<String,Region> regionIDMap = null; // Map from ID the regions 119 private static Map<Integer,Region> numericCodeMap = null; // Map from numeric code to the regions 120 private static Map<String,Region> regionAliases = null; // Aliases 121 122 private static ArrayList<Region> regions = null; // This is the main data structure where the Regions are stored. 123 private static ArrayList<Set<Region>> availableRegions = null; 124 125 private static final String UNKNOWN_REGION_ID = "ZZ"; 126 private static final String OUTLYING_OCEANIA_REGION_ID = "QO"; 127 private static final String WORLD_ID = "001"; 128 129 /* 130 * Private default constructor. Use factory methods only. 131 */ Region()132 private Region () {} 133 134 /* 135 * Initializes the region data from the ICU resource bundles. The region data 136 * contains the basic relationships such as which regions are known, what the numeric 137 * codes are, any known aliases, and the territory containment data. 138 * 139 * If the region data has already loaded, then this method simply returns without doing 140 * anything meaningful. 141 * 142 */ loadRegionData()143 private static synchronized void loadRegionData() { 144 145 if ( regionDataIsLoaded ) { 146 return; 147 } 148 149 regionAliases = new HashMap<String,Region>(); 150 regionIDMap = new HashMap<String,Region>(); 151 numericCodeMap = new HashMap<Integer,Region>(); 152 153 availableRegions = new ArrayList<Set<Region>>(RegionType.values().length); 154 155 156 UResourceBundle regionCodes = null; 157 UResourceBundle metadataAlias = null; 158 UResourceBundle territoryAlias = null; 159 UResourceBundle codeMappings = null; 160 UResourceBundle worldContainment = null; 161 UResourceBundle territoryContainment = null; 162 UResourceBundle groupingContainment = null; 163 164 UResourceBundle metadata = UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME,"metadata",ICUResourceBundle.ICU_DATA_CLASS_LOADER); 165 regionCodes = metadata.get("regionCodes"); 166 metadataAlias = metadata.get("alias"); 167 territoryAlias = metadataAlias.get("territory"); 168 169 UResourceBundle supplementalData = UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_BASE_NAME,"supplementalData", ICUResourceBundle.ICU_DATA_CLASS_LOADER); 170 codeMappings = supplementalData.get("codeMappings"); 171 172 territoryContainment = supplementalData.get("territoryContainment"); 173 worldContainment = territoryContainment.get("001"); 174 groupingContainment = territoryContainment.get("grouping"); 175 176 String[] continentsArr = worldContainment.getStringArray(); 177 List<String> continents = Arrays.asList(continentsArr); 178 String[] groupingArr = groupingContainment.getStringArray(); 179 List<String> groupings = Arrays.asList(groupingArr); 180 181 // First process the region codes and create the master array of regions. 182 int regionCodeSize = regionCodes.getSize(); 183 regions = new ArrayList<Region>(regionCodeSize); 184 for ( int i = 0 ; i < regionCodeSize ; i++ ) { 185 Region r = new Region(); 186 String id = regionCodes.getString(i); 187 r.id = id; 188 r.type = RegionType.TERRITORY; // Only temporary - figure out the real type later once the aliases are known. 189 regionIDMap.put(id, r); 190 if ( id.matches("[0-9]{3}")) { 191 r.code = Integer.valueOf(id).intValue(); 192 numericCodeMap.put(r.code, r); 193 r.type = RegionType.SUBCONTINENT; 194 } else { 195 r.code = -1; 196 } 197 regions.add(r); 198 } 199 200 201 // Process the territory aliases 202 for ( int i = 0 ; i < territoryAlias.getSize(); i++ ) { 203 UResourceBundle res = territoryAlias.get(i); 204 String aliasFrom = res.getKey(); 205 String aliasTo = res.get("replacement").getString(); 206 207 if ( regionIDMap.containsKey(aliasTo) && !regionIDMap.containsKey(aliasFrom) ) { // This is just an alias from some string to a region 208 regionAliases.put(aliasFrom, regionIDMap.get(aliasTo)); 209 } else { 210 Region r; 211 if ( regionIDMap.containsKey(aliasFrom) ) { // This is a deprecated region 212 r = regionIDMap.get(aliasFrom); 213 } else { // Deprecated region code not in the master codes list - so need to create a deprecated region for it. 214 r = new Region(); 215 r.id = aliasFrom; 216 regionIDMap.put(aliasFrom, r); 217 if ( aliasFrom.matches("[0-9]{3}")) { 218 r.code = Integer.valueOf(aliasFrom).intValue(); 219 numericCodeMap.put(r.code, r); 220 } else { 221 r.code = -1; 222 } 223 regions.add(r); 224 } 225 r.type = RegionType.DEPRECATED; 226 List<String> aliasToRegionStrings = Arrays.asList(aliasTo.split(" ")); 227 r.preferredValues = new ArrayList<Region>(); 228 for ( String s : aliasToRegionStrings ) { 229 if (regionIDMap.containsKey(s)) { 230 r.preferredValues.add(regionIDMap.get(s)); 231 } 232 } 233 } 234 } 235 236 // Process the code mappings - This will allow us to assign numeric codes to most of the territories. 237 for ( int i = 0 ; i < codeMappings.getSize(); i++ ) { 238 UResourceBundle mapping = codeMappings.get(i); 239 if ( mapping.getType() == UResourceBundle.ARRAY ) { 240 String [] codeMappingStrings = mapping.getStringArray(); 241 String codeMappingID = codeMappingStrings[0]; 242 Integer codeMappingNumber = Integer.valueOf(codeMappingStrings[1]); 243 String codeMapping3Letter = codeMappingStrings[2]; 244 245 if ( regionIDMap.containsKey(codeMappingID)) { 246 Region r = regionIDMap.get(codeMappingID); 247 r.code = codeMappingNumber.intValue(); 248 numericCodeMap.put(r.code, r); 249 regionAliases.put(codeMapping3Letter, r); 250 } 251 } 252 } 253 254 // Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS 255 Region r; 256 if ( regionIDMap.containsKey(WORLD_ID)) { 257 r = regionIDMap.get(WORLD_ID); 258 r.type = RegionType.WORLD; 259 } 260 261 if ( regionIDMap.containsKey(UNKNOWN_REGION_ID)) { 262 r = regionIDMap.get(UNKNOWN_REGION_ID); 263 r.type = RegionType.UNKNOWN; 264 } 265 266 for ( String continent : continents ) { 267 if (regionIDMap.containsKey(continent)) { 268 r = regionIDMap.get(continent); 269 r.type = RegionType.CONTINENT; 270 } 271 } 272 273 for ( String grouping : groupings ) { 274 if (regionIDMap.containsKey(grouping)) { 275 r = regionIDMap.get(grouping); 276 r.type = RegionType.GROUPING; 277 } 278 } 279 280 // Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR 281 // even though it looks like a territory code. Need to handle it here. 282 283 if ( regionIDMap.containsKey(OUTLYING_OCEANIA_REGION_ID)) { 284 r = regionIDMap.get(OUTLYING_OCEANIA_REGION_ID); 285 r.type = RegionType.SUBCONTINENT; 286 } 287 288 // Load territory containment info from the supplemental data. 289 for ( int i = 0 ; i < territoryContainment.getSize(); i++ ) { 290 UResourceBundle mapping = territoryContainment.get(i); 291 String parent = mapping.getKey(); 292 if (parent.equals("containedGroupings") || parent.equals("deprecated")) { 293 continue; // handle new pseudo-parent types added in ICU data per cldrbug 7808; for now just skip. 294 // #11232 is to do something useful with these. 295 } 296 Region parentRegion = regionIDMap.get(parent); 297 for ( int j = 0 ; j < mapping.getSize(); j++ ) { 298 String child = mapping.getString(j); 299 Region childRegion = regionIDMap.get(child); 300 if ( parentRegion != null && childRegion != null ) { 301 302 // Add the child region to the set of regions contained by the parent 303 parentRegion.containedRegions.add(childRegion); 304 305 // Set the parent region to be the containing region of the child. 306 // Regions of type GROUPING can't be set as the parent, since another region 307 // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent. 308 if ( parentRegion.getType() != RegionType.GROUPING) { 309 childRegion.containingRegion = parentRegion; 310 } 311 } 312 } 313 } 314 315 // Create the availableRegions lists 316 317 for (int i = 0 ; i < RegionType.values().length ; i++) { 318 availableRegions.add(new TreeSet<Region>()); 319 } 320 321 for ( Region ar : regions ) { 322 Set<Region> currentSet = availableRegions.get(ar.type.ordinal()); 323 currentSet.add(ar); 324 availableRegions.set(ar.type.ordinal(),currentSet); 325 } 326 327 regionDataIsLoaded = true; 328 } 329 330 /** Returns a Region using the given region ID. The region ID can be either a 2-letter ISO code, 331 * 3-letter ISO code, UNM.49 numeric code, or other valid Unicode Region Code as defined by the CLDR. 332 * @param id The id of the region to be retrieved. 333 * @return The corresponding region. 334 * @throws NullPointerException if the supplied id is null. 335 * @throws IllegalArgumentException if the supplied ID cannot be canonicalized to a Region ID that is known by ICU. 336 * @stable ICU 50 337 */ 338 getInstance(String id)339 public static Region getInstance(String id) { 340 341 if ( id == null ) { 342 throw new NullPointerException(); 343 } 344 345 loadRegionData(); 346 347 Region r = regionIDMap.get(id); 348 349 if ( r == null ) { 350 r = regionAliases.get(id); 351 } 352 353 if ( r == null ) { 354 throw new IllegalArgumentException("Unknown region id: " + id); 355 } 356 357 if ( r.type == RegionType.DEPRECATED && r.preferredValues.size() == 1) { 358 r = r.preferredValues.get(0); 359 } 360 361 return r; 362 } 363 364 365 /** Returns a Region using the given numeric code as defined by UNM.49 366 * @param code The numeric code of the region to be retrieved. 367 * @return The corresponding region. 368 * @throws IllegalArgumentException if the supplied numeric code is not recognized. 369 * @stable ICU 50 370 */ 371 getInstance(int code)372 public static Region getInstance(int code) { 373 374 loadRegionData(); 375 376 Region r = numericCodeMap.get(code); 377 378 if ( r == null ) { // Just in case there's an alias that's numeric, try to find it. 379 String pad = ""; 380 if ( code < 10 ) { 381 pad = "00"; 382 } else if ( code < 100 ) { 383 pad = "0"; 384 } 385 String id = pad + Integer.toString(code); 386 r = regionAliases.get(id); 387 } 388 389 if ( r == null ) { 390 throw new IllegalArgumentException("Unknown region code: " + code); 391 } 392 393 if ( r.type == RegionType.DEPRECATED && r.preferredValues.size() == 1) { 394 r = r.preferredValues.get(0); 395 } 396 397 return r; 398 } 399 400 401 /** Used to retrieve all available regions of a specific type. 402 * 403 * @param type The type of regions to be returned ( TERRITORY, MACROREGION, etc. ) 404 * @return An unmodifiable set of all known regions that match the given type. 405 * @stable ICU 50 406 */ 407 getAvailable(RegionType type)408 public static Set<Region> getAvailable(RegionType type) { 409 410 loadRegionData(); 411 return Collections.unmodifiableSet(availableRegions.get(type.ordinal())); 412 } 413 414 415 /** Used to determine the macroregion that geographically contains this region. 416 * 417 * @return The region that geographically contains this region. Returns NULL if this region is 418 * code "001" (World) or "ZZ" (Unknown region). For example, calling this method with region "IT" (Italy) 419 * returns the region "039" (Southern Europe). 420 * @stable ICU 50 421 */ 422 getContainingRegion()423 public Region getContainingRegion() { 424 loadRegionData(); 425 return containingRegion; 426 } 427 428 /** Used to determine the macroregion that geographically contains this region and that matches the given type. 429 * 430 * @return The region that geographically contains this region and matches the given type. May return NULL if 431 * no containing region can be found that matches the given type. For example, calling this method with region "IT" (Italy) 432 * and type CONTINENT returns the region "150" (Europe). 433 * @stable ICU 50 434 */ 435 getContainingRegion(RegionType type)436 public Region getContainingRegion(RegionType type) { 437 loadRegionData(); 438 if ( containingRegion == null ) { 439 return null; 440 } 441 if ( containingRegion.type.equals(type)) { 442 return containingRegion; 443 } else { 444 return containingRegion.getContainingRegion(type); 445 } 446 } 447 448 /** Used to determine the sub-regions that are contained within this region. 449 * 450 * @return An unmodifiable set containing all the regions that are immediate children 451 * of this region in the region hierarchy. These returned regions could be either macro 452 * regions, territories, or a mixture of the two, depending on the containment data as defined 453 * in CLDR. This API may return an empty set if this region doesn't have any sub-regions. 454 * For example, calling this method with region "150" (Europe) returns a set containing 455 * the various sub regions of Europe - "039" (Southern Europe) - "151" (Eastern Europe) 456 * - "154" (Northern Europe) and "155" (Western Europe). 457 * 458 * @stable ICU 50 459 */ 460 getContainedRegions()461 public Set<Region> getContainedRegions() { 462 loadRegionData(); 463 return Collections.unmodifiableSet(containedRegions); 464 } 465 466 /** Used to determine all the regions that are contained within this region and that match the given type 467 * 468 * @return An unmodifiable set containing all the regions that are children of this region 469 * anywhere in the region hierarchy and match the given type. This API may return an empty set 470 * if this region doesn't have any sub-regions that match the given type. 471 * For example, calling this method with region "150" (Europe) and type "TERRITORY" returns a set 472 * containing all the territories in Europe ( "FR" (France) - "IT" (Italy) - "DE" (Germany) etc. ) 473 * @stable ICU 50 474 */ 475 getContainedRegions(RegionType type)476 public Set<Region> getContainedRegions(RegionType type) { 477 478 loadRegionData(); 479 480 Set<Region> result = new TreeSet<Region>(); 481 Set<Region> cr = getContainedRegions(); 482 483 for ( Region r : cr ) { 484 if ( r.getType() == type ) { 485 result.add(r); 486 } else { 487 result.addAll(r.getContainedRegions(type)); 488 } 489 } 490 return Collections.unmodifiableSet(result); 491 } 492 493 /** 494 * @return For deprecated regions, return an unmodifiable list of the regions that are the preferred replacement regions for this region. 495 * Returns null for a non-deprecated region. For example, calling this method with region "SU" (Soviet Union) would 496 * return a list of the regions containing "RU" (Russia), "AM" (Armenia), "AZ" (Azerbaijan), etc... 497 * 498 * @stable ICU 50 499 */ getPreferredValues()500 public List<Region> getPreferredValues() { 501 502 loadRegionData(); 503 504 if ( type == RegionType.DEPRECATED) { 505 return Collections.unmodifiableList(preferredValues); 506 } else { 507 return null; 508 } 509 } 510 511 /** 512 * @return Returns true if this region contains the supplied other region anywhere in the region hierarchy. 513 * 514 * @stable ICU 50 515 */ contains(Region other)516 public boolean contains(Region other) { 517 518 loadRegionData(); 519 520 if (containedRegions.contains(other)) { 521 return true; 522 } else { 523 for (Region cr : containedRegions) { 524 if (cr.contains(other)) { 525 return true; 526 } 527 } 528 } 529 530 return false; 531 } 532 533 /** Returns the string representation of this region 534 * 535 * @return The string representation of this region, which is its ID. 536 * 537 * @stable ICU 50 538 */ 539 toString()540 public String toString() { 541 return id; 542 } 543 544 /** 545 * Returns the numeric code for this region 546 * 547 * @return The numeric code for this region. Returns a negative value if the given region does not have a numeric 548 * code assigned to it. This is a very rare case and only occurs for a few very small territories. 549 * 550 * @stable ICU 50 551 */ 552 getNumericCode()553 public int getNumericCode() { 554 return code; 555 } 556 557 /** Returns this region's type. 558 * 559 * @return This region's type classification, such as MACROREGION or TERRITORY. 560 * 561 * @stable ICU 50 562 */ 563 getType()564 public RegionType getType() { 565 return type; 566 } 567 568 /** 569 * {@inheritDoc} 570 * @stable ICU 50 571 */ compareTo(Region other)572 public int compareTo(Region other) { 573 return id.compareTo(other.id); 574 } 575 } 576