1 /* 2 ******************************************************************************* 3 * Copyright (C) 2009-2010, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package com.ibm.icu.impl.locale; 8 9 import java.util.ArrayList; 10 import java.util.HashMap; 11 import java.util.HashSet; 12 import java.util.List; 13 import java.util.Set; 14 15 public final class InternalLocaleBuilder { 16 17 private static final boolean JDKIMPL = false; 18 19 private String _language = ""; 20 private String _script = ""; 21 private String _region = ""; 22 private String _variant = ""; 23 24 private static final CaseInsensitiveChar PRIVUSE_KEY = new CaseInsensitiveChar(LanguageTag.PRIVATEUSE.charAt(0)); 25 26 private HashMap<CaseInsensitiveChar, String> _extensions; 27 private HashSet<CaseInsensitiveString> _uattributes; 28 private HashMap<CaseInsensitiveString, String> _ukeywords; 29 30 InternalLocaleBuilder()31 public InternalLocaleBuilder() { 32 } 33 setLanguage(String language)34 public InternalLocaleBuilder setLanguage(String language) throws LocaleSyntaxException { 35 if (language == null || language.length() == 0) { 36 _language = ""; 37 } else { 38 if (!LanguageTag.isLanguage(language)) { 39 throw new LocaleSyntaxException("Ill-formed language: " + language, 0); 40 } 41 _language = language; 42 } 43 return this; 44 } 45 setScript(String script)46 public InternalLocaleBuilder setScript(String script) throws LocaleSyntaxException { 47 if (script == null || script.length() == 0) { 48 _script = ""; 49 } else { 50 if (!LanguageTag.isScript(script)) { 51 throw new LocaleSyntaxException("Ill-formed script: " + script, 0); 52 } 53 _script = script; 54 } 55 return this; 56 } 57 setRegion(String region)58 public InternalLocaleBuilder setRegion(String region) throws LocaleSyntaxException { 59 if (region == null || region.length() == 0) { 60 _region = ""; 61 } else { 62 if (!LanguageTag.isRegion(region)) { 63 throw new LocaleSyntaxException("Ill-formed region: " + region, 0); 64 } 65 _region = region; 66 } 67 return this; 68 } 69 setVariant(String variant)70 public InternalLocaleBuilder setVariant(String variant) throws LocaleSyntaxException { 71 if (variant == null || variant.length() == 0) { 72 _variant = ""; 73 } else { 74 // normalize separators to "_" 75 String var = variant.replaceAll(LanguageTag.SEP, BaseLocale.SEP); 76 int errIdx = checkVariants(var, BaseLocale.SEP); 77 if (errIdx != -1) { 78 throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx); 79 } 80 _variant = var; 81 } 82 return this; 83 } 84 addUnicodeLocaleAttribute(String attribute)85 public InternalLocaleBuilder addUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException { 86 if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) { 87 throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute); 88 } 89 // Use case insensitive string to prevent duplication 90 if (_uattributes == null) { 91 _uattributes = new HashSet<CaseInsensitiveString>(4); 92 } 93 _uattributes.add(new CaseInsensitiveString(attribute)); 94 return this; 95 } 96 removeUnicodeLocaleAttribute(String attribute)97 public InternalLocaleBuilder removeUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException { 98 if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) { 99 throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute); 100 } 101 if (_uattributes != null) { 102 _uattributes.remove(new CaseInsensitiveString(attribute)); 103 } 104 return this; 105 } 106 setUnicodeLocaleKeyword(String key, String type)107 public InternalLocaleBuilder setUnicodeLocaleKeyword(String key, String type) throws LocaleSyntaxException { 108 if (!UnicodeLocaleExtension.isKey(key)) { 109 throw new LocaleSyntaxException("Ill-formed Unicode locale keyword key: " + key); 110 } 111 112 CaseInsensitiveString cikey = new CaseInsensitiveString(key); 113 if (type == null) { 114 if (_ukeywords != null) { 115 // null type is used for remove the key 116 _ukeywords.remove(cikey); 117 } 118 } else { 119 if (type.length() != 0) { 120 // normalize separator to "-" 121 String tp = type.replaceAll(BaseLocale.SEP, LanguageTag.SEP); 122 // validate 123 StringTokenIterator itr = new StringTokenIterator(tp, LanguageTag.SEP); 124 while (!itr.isDone()) { 125 String s = itr.current(); 126 if (!UnicodeLocaleExtension.isTypeSubtag(s)) { 127 throw new LocaleSyntaxException("Ill-formed Unicode locale keyword type: " + type, itr.currentStart()); 128 } 129 itr.next(); 130 } 131 } 132 if (_ukeywords == null) { 133 _ukeywords = new HashMap<CaseInsensitiveString, String>(4); 134 } 135 _ukeywords.put(cikey, type); 136 } 137 return this; 138 } 139 setExtension(char singleton, String value)140 public InternalLocaleBuilder setExtension(char singleton, String value) throws LocaleSyntaxException { 141 // validate key 142 boolean isBcpPrivateuse = LanguageTag.isPrivateusePrefixChar(singleton); 143 if (!isBcpPrivateuse && !LanguageTag.isExtensionSingletonChar(singleton)) { 144 throw new LocaleSyntaxException("Ill-formed extension key: " + singleton); 145 } 146 147 boolean remove = (value == null || value.length() == 0); 148 CaseInsensitiveChar key = new CaseInsensitiveChar(singleton); 149 150 if (remove) { 151 if (UnicodeLocaleExtension.isSingletonChar(key.value())) { 152 // clear entire Unicode locale extension 153 if (_uattributes != null) { 154 _uattributes.clear(); 155 } 156 if (_ukeywords != null) { 157 _ukeywords.clear(); 158 } 159 } else { 160 if (_extensions != null && _extensions.containsKey(key)) { 161 _extensions.remove(key); 162 } 163 } 164 } else { 165 // validate value 166 String val = value.replaceAll(BaseLocale.SEP, LanguageTag.SEP); 167 StringTokenIterator itr = new StringTokenIterator(val, LanguageTag.SEP); 168 while (!itr.isDone()) { 169 String s = itr.current(); 170 boolean validSubtag; 171 if (isBcpPrivateuse) { 172 validSubtag = LanguageTag.isPrivateuseSubtag(s); 173 } else { 174 validSubtag = LanguageTag.isExtensionSubtag(s); 175 } 176 if (!validSubtag) { 177 throw new LocaleSyntaxException("Ill-formed extension value: " + s, itr.currentStart()); 178 } 179 itr.next(); 180 } 181 182 if (UnicodeLocaleExtension.isSingletonChar(key.value())) { 183 setUnicodeLocaleExtension(val); 184 } else { 185 if (_extensions == null) { 186 _extensions = new HashMap<CaseInsensitiveChar, String>(4); 187 } 188 _extensions.put(key, val); 189 } 190 } 191 return this; 192 } 193 194 /* 195 * Set extension/private subtags in a single string representation 196 */ setExtensions(String subtags)197 public InternalLocaleBuilder setExtensions(String subtags) throws LocaleSyntaxException { 198 if (subtags == null || subtags.length() == 0) { 199 clearExtensions(); 200 return this; 201 } 202 subtags = subtags.replaceAll(BaseLocale.SEP, LanguageTag.SEP); 203 StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP); 204 205 List<String> extensions = null; 206 String privateuse = null; 207 208 int parsed = 0; 209 int start; 210 211 // Make a list of extension subtags 212 while (!itr.isDone()) { 213 String s = itr.current(); 214 if (LanguageTag.isExtensionSingleton(s)) { 215 start = itr.currentStart(); 216 String singleton = s; 217 StringBuilder sb = new StringBuilder(singleton); 218 219 itr.next(); 220 while (!itr.isDone()) { 221 s = itr.current(); 222 if (LanguageTag.isExtensionSubtag(s)) { 223 sb.append(LanguageTag.SEP).append(s); 224 parsed = itr.currentEnd(); 225 } else { 226 break; 227 } 228 itr.next(); 229 } 230 231 if (parsed < start) { 232 throw new LocaleSyntaxException("Incomplete extension '" + singleton + "'", start); 233 } 234 235 if (extensions == null) { 236 extensions = new ArrayList<String>(4); 237 } 238 extensions.add(sb.toString()); 239 } else { 240 break; 241 } 242 } 243 if (!itr.isDone()) { 244 String s = itr.current(); 245 if (LanguageTag.isPrivateusePrefix(s)) { 246 start = itr.currentStart(); 247 StringBuilder sb = new StringBuilder(s); 248 249 itr.next(); 250 while (!itr.isDone()) { 251 s = itr.current(); 252 if (!LanguageTag.isPrivateuseSubtag(s)) { 253 break; 254 } 255 sb.append(LanguageTag.SEP).append(s); 256 parsed = itr.currentEnd(); 257 258 itr.next(); 259 } 260 if (parsed <= start) { 261 throw new LocaleSyntaxException("Incomplete privateuse:" + subtags.substring(start), start); 262 } else { 263 privateuse = sb.toString(); 264 } 265 } 266 } 267 268 if (!itr.isDone()) { 269 throw new LocaleSyntaxException("Ill-formed extension subtags:" + subtags.substring(itr.currentStart()), itr.currentStart()); 270 } 271 272 return setExtensions(extensions, privateuse); 273 } 274 275 /* 276 * Set a list of BCP47 extensions and private use subtags 277 * BCP47 extensions are already validated and well-formed, but may contain duplicates 278 */ setExtensions(List<String> bcpExtensions, String privateuse)279 private InternalLocaleBuilder setExtensions(List<String> bcpExtensions, String privateuse) { 280 clearExtensions(); 281 282 if (bcpExtensions != null && bcpExtensions.size() > 0) { 283 HashSet<CaseInsensitiveChar> processedExtensions = new HashSet<CaseInsensitiveChar>(bcpExtensions.size()); 284 for (String bcpExt : bcpExtensions) { 285 CaseInsensitiveChar key = new CaseInsensitiveChar(bcpExt.charAt(0)); 286 // ignore duplicates 287 if (!processedExtensions.contains(key)) { 288 // each extension string contains singleton, e.g. "a-abc-def" 289 if (UnicodeLocaleExtension.isSingletonChar(key.value())) { 290 setUnicodeLocaleExtension(bcpExt.substring(2)); 291 } else { 292 if (_extensions == null) { 293 _extensions = new HashMap<CaseInsensitiveChar, String>(4); 294 } 295 _extensions.put(key, bcpExt.substring(2)); 296 } 297 } 298 } 299 } 300 if (privateuse != null && privateuse.length() > 0) { 301 // privateuse string contains prefix, e.g. "x-abc-def" 302 if (_extensions == null) { 303 _extensions = new HashMap<CaseInsensitiveChar, String>(1); 304 } 305 _extensions.put(new CaseInsensitiveChar(privateuse.charAt(0)), privateuse.substring(2)); 306 } 307 308 return this; 309 } 310 311 /* 312 * Reset Builder's internal state with the given language tag 313 */ setLanguageTag(LanguageTag langtag)314 public InternalLocaleBuilder setLanguageTag(LanguageTag langtag) { 315 clear(); 316 if (langtag.getExtlangs().size() > 0) { 317 _language = langtag.getExtlangs().get(0); 318 } else { 319 String language = langtag.getLanguage(); 320 if (!language.equals(LanguageTag.UNDETERMINED)) { 321 _language = language; 322 } 323 } 324 _script = langtag.getScript(); 325 _region = langtag.getRegion(); 326 327 List<String> bcpVariants = langtag.getVariants(); 328 if (bcpVariants.size() > 0) { 329 StringBuilder var = new StringBuilder(bcpVariants.get(0)); 330 for (int i = 1; i < bcpVariants.size(); i++) { 331 var.append(BaseLocale.SEP).append(bcpVariants.get(i)); 332 } 333 _variant = var.toString(); 334 } 335 336 setExtensions(langtag.getExtensions(), langtag.getPrivateuse()); 337 338 return this; 339 } 340 setLocale(BaseLocale base, LocaleExtensions extensions)341 public InternalLocaleBuilder setLocale(BaseLocale base, LocaleExtensions extensions) throws LocaleSyntaxException { 342 String language = base.getLanguage(); 343 String script = base.getScript(); 344 String region = base.getRegion(); 345 String variant = base.getVariant(); 346 347 if (JDKIMPL) { 348 // Special backward compatibility support 349 350 // Exception 1 - ja_JP_JP 351 if (language.equals("ja") && region.equals("JP") && variant.equals("JP")) { 352 // When locale ja_JP_JP is created, ca-japanese is always there. 353 // The builder ignores the variant "JP" 354 assert("japanese".equals(extensions.getUnicodeLocaleType("ca"))); 355 variant = ""; 356 } 357 // Exception 2 - th_TH_TH 358 else if (language.equals("th") && region.equals("TH") && variant.equals("TH")) { 359 // When locale th_TH_TH is created, nu-thai is always there. 360 // The builder ignores the variant "TH" 361 assert("thai".equals(extensions.getUnicodeLocaleType("nu"))); 362 variant = ""; 363 } 364 // Exception 3 - no_NO_NY 365 else if (language.equals("no") && region.equals("NO") && variant.equals("NY")) { 366 // no_NO_NY is a valid locale and used by Java 6 or older versions. 367 // The build ignores the variant "NY" and change the language to "nn". 368 language = "nn"; 369 variant = ""; 370 } 371 } 372 373 // Validate base locale fields before updating internal state. 374 // LocaleExtensions always store validated/canonicalized values, 375 // so no checks are necessary. 376 if (language.length() > 0 && !LanguageTag.isLanguage(language)) { 377 throw new LocaleSyntaxException("Ill-formed language: " + language); 378 } 379 380 if (script.length() > 0 && !LanguageTag.isScript(script)) { 381 throw new LocaleSyntaxException("Ill-formed script: " + script); 382 } 383 384 if (region.length() > 0 && !LanguageTag.isRegion(region)) { 385 throw new LocaleSyntaxException("Ill-formed region: " + region); 386 } 387 388 if (variant.length() > 0) { 389 int errIdx = checkVariants(variant, BaseLocale.SEP); 390 if (errIdx != -1) { 391 throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx); 392 } 393 } 394 395 // The input locale is validated at this point. 396 // Now, updating builder's internal fields. 397 _language = language; 398 _script = script; 399 _region = region; 400 _variant = variant; 401 clearExtensions(); 402 403 Set<Character> extKeys = (extensions == null) ? null : extensions.getKeys(); 404 if (extKeys != null) { 405 // map extensions back to builder's internal format 406 for (Character key : extKeys) { 407 Extension e = extensions.getExtension(key); 408 if (e instanceof UnicodeLocaleExtension) { 409 UnicodeLocaleExtension ue = (UnicodeLocaleExtension)e; 410 for (String uatr : ue.getUnicodeLocaleAttributes()) { 411 if (_uattributes == null) { 412 _uattributes = new HashSet<CaseInsensitiveString>(4); 413 } 414 _uattributes.add(new CaseInsensitiveString(uatr)); 415 } 416 for (String ukey : ue.getUnicodeLocaleKeys()) { 417 if (_ukeywords == null) { 418 _ukeywords = new HashMap<CaseInsensitiveString, String>(4); 419 } 420 _ukeywords.put(new CaseInsensitiveString(ukey), ue.getUnicodeLocaleType(ukey)); 421 } 422 } else { 423 if (_extensions == null) { 424 _extensions = new HashMap<CaseInsensitiveChar, String>(4); 425 } 426 _extensions.put(new CaseInsensitiveChar(key.charValue()), e.getValue()); 427 } 428 } 429 } 430 return this; 431 } 432 clear()433 public InternalLocaleBuilder clear() { 434 _language = ""; 435 _script = ""; 436 _region = ""; 437 _variant = ""; 438 clearExtensions(); 439 return this; 440 } 441 clearExtensions()442 public InternalLocaleBuilder clearExtensions() { 443 if (_extensions != null) { 444 _extensions.clear(); 445 } 446 if (_uattributes != null) { 447 _uattributes.clear(); 448 } 449 if (_ukeywords != null) { 450 _ukeywords.clear(); 451 } 452 return this; 453 } 454 getBaseLocale()455 public BaseLocale getBaseLocale() { 456 String language = _language; 457 String script = _script; 458 String region = _region; 459 String variant = _variant; 460 461 // Special private use subtag sequence identified by "lvariant" will be 462 // interpreted as Java variant. 463 if (_extensions != null) { 464 String privuse = _extensions.get(PRIVUSE_KEY); 465 if (privuse != null) { 466 StringTokenIterator itr = new StringTokenIterator(privuse, LanguageTag.SEP); 467 boolean sawPrefix = false; 468 int privVarStart = -1; 469 while (!itr.isDone()) { 470 if (sawPrefix) { 471 privVarStart = itr.currentStart(); 472 break; 473 } 474 if (AsciiUtil.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) { 475 sawPrefix = true; 476 } 477 itr.next(); 478 } 479 if (privVarStart != -1) { 480 StringBuilder sb = new StringBuilder(variant); 481 if (sb.length() != 0) { 482 sb.append(BaseLocale.SEP); 483 } 484 sb.append(privuse.substring(privVarStart).replaceAll(LanguageTag.SEP, BaseLocale.SEP)); 485 variant = sb.toString(); 486 } 487 } 488 } 489 490 return BaseLocale.getInstance(language, script, region, variant); 491 } 492 getLocaleExtensions()493 public LocaleExtensions getLocaleExtensions() { 494 if ((_extensions == null || _extensions.size() == 0) 495 && (_uattributes == null || _uattributes.size() == 0) 496 && (_ukeywords == null || _ukeywords.size() == 0)) { 497 return LocaleExtensions.EMPTY_EXTENSIONS; 498 } 499 500 return new LocaleExtensions(_extensions, _uattributes, _ukeywords); 501 } 502 503 /* 504 * Remove special private use subtag sequence identified by "lvariant" 505 * and return the rest. Only used by LocaleExtensions 506 */ removePrivateuseVariant(String privuseVal)507 static String removePrivateuseVariant(String privuseVal) { 508 StringTokenIterator itr = new StringTokenIterator(privuseVal, LanguageTag.SEP); 509 510 // Note: privateuse value "abc-lvariant" is unchanged 511 // because no subtags after "lvariant". 512 513 int prefixStart = -1; 514 boolean sawPrivuseVar = false; 515 while (!itr.isDone()) { 516 if (prefixStart != -1) { 517 // Note: privateuse value "abc-lvariant" is unchanged 518 // because no subtags after "lvariant". 519 sawPrivuseVar = true; 520 break; 521 } 522 if (AsciiUtil.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) { 523 prefixStart = itr.currentStart(); 524 } 525 itr.next(); 526 } 527 if (!sawPrivuseVar) { 528 return privuseVal; 529 } 530 531 assert(prefixStart == 0 || prefixStart > 1); 532 return (prefixStart == 0) ? null : privuseVal.substring(0, prefixStart -1); 533 } 534 535 /* 536 * Check if the given variant subtags separated by the given 537 * separator(s) are valid 538 */ checkVariants(String variants, String sep)539 private int checkVariants(String variants, String sep) { 540 StringTokenIterator itr = new StringTokenIterator(variants, sep); 541 while (!itr.isDone()) { 542 String s = itr.current(); 543 if (!LanguageTag.isVariant(s)) { 544 return itr.currentStart(); 545 } 546 itr.next(); 547 } 548 return -1; 549 } 550 551 /* 552 * Private methods parsing Unicode Locale Extension subtags. 553 * Duplicated attributes/keywords will be ignored. 554 * The input must be a valid extension subtags (excluding singleton). 555 */ setUnicodeLocaleExtension(String subtags)556 private void setUnicodeLocaleExtension(String subtags) { 557 // wipe out existing attributes/keywords 558 if (_uattributes != null) { 559 _uattributes.clear(); 560 } 561 if (_ukeywords != null) { 562 _ukeywords.clear(); 563 } 564 565 StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP); 566 567 // parse attributes 568 while (!itr.isDone()) { 569 if (!UnicodeLocaleExtension.isAttribute(itr.current())) { 570 break; 571 } 572 if (_uattributes == null) { 573 _uattributes = new HashSet<CaseInsensitiveString>(4); 574 } 575 _uattributes.add(new CaseInsensitiveString(itr.current())); 576 itr.next(); 577 } 578 579 // parse keywords 580 CaseInsensitiveString key = null; 581 String type; 582 int typeStart = -1; 583 int typeEnd = -1; 584 while (!itr.isDone()) { 585 if (key != null) { 586 if (UnicodeLocaleExtension.isKey(itr.current())) { 587 // next keyword - emit previous one 588 assert(typeStart == -1 || typeEnd != -1); 589 type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd); 590 if (_ukeywords == null) { 591 _ukeywords = new HashMap<CaseInsensitiveString, String>(4); 592 } 593 _ukeywords.put(key, type); 594 595 // reset keyword info 596 CaseInsensitiveString tmpKey = new CaseInsensitiveString(itr.current()); 597 key = _ukeywords.containsKey(tmpKey) ? null : tmpKey; 598 typeStart = typeEnd = -1; 599 } else { 600 if (typeStart == -1) { 601 typeStart = itr.currentStart(); 602 } 603 typeEnd = itr.currentEnd(); 604 } 605 } else if (UnicodeLocaleExtension.isKey(itr.current())) { 606 // 1. first keyword or 607 // 2. next keyword, but previous one was duplicate 608 key = new CaseInsensitiveString(itr.current()); 609 if (_ukeywords != null && _ukeywords.containsKey(key)) { 610 // duplicate 611 key = null; 612 } 613 } 614 615 if (!itr.hasNext()) { 616 if (key != null) { 617 // last keyword 618 assert(typeStart == -1 || typeEnd != -1); 619 type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd); 620 if (_ukeywords == null) { 621 _ukeywords = new HashMap<CaseInsensitiveString, String>(4); 622 } 623 _ukeywords.put(key, type); 624 } 625 break; 626 } 627 628 itr.next(); 629 } 630 } 631 632 static class CaseInsensitiveString { 633 private String _s; 634 CaseInsensitiveString(String s)635 CaseInsensitiveString(String s) { 636 _s = s; 637 } 638 value()639 public String value() { 640 return _s; 641 } 642 hashCode()643 public int hashCode() { 644 return AsciiUtil.toLowerString(_s).hashCode(); 645 } 646 equals(Object obj)647 public boolean equals(Object obj) { 648 if (this == obj) { 649 return true; 650 } 651 if (!(obj instanceof CaseInsensitiveString)) { 652 return false; 653 } 654 return AsciiUtil.caseIgnoreMatch(_s, ((CaseInsensitiveString)obj).value()); 655 } 656 } 657 658 static class CaseInsensitiveChar { 659 private char _c; 660 CaseInsensitiveChar(char c)661 CaseInsensitiveChar(char c) { 662 _c = c; 663 } 664 value()665 public char value() { 666 return _c; 667 } 668 hashCode()669 public int hashCode() { 670 return AsciiUtil.toLower(_c); 671 } 672 equals(Object obj)673 public boolean equals(Object obj) { 674 if (this == obj) { 675 return true; 676 } 677 if (!(obj instanceof CaseInsensitiveChar)) { 678 return false; 679 } 680 return _c == AsciiUtil.toLower(((CaseInsensitiveChar)obj).value()); 681 } 682 683 } 684 } 685