1 /* 2 ******************************************************************************* 3 * Copyright (C) 1996-2014, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package org.unicode.cldr.util.props; 8 9 import java.io.PrintWriter; 10 import java.io.StringWriter; 11 import java.text.ParsePosition; 12 import java.util.ArrayList; 13 import java.util.Arrays; 14 import java.util.Collection; 15 import java.util.Comparator; 16 import java.util.HashMap; 17 import java.util.Iterator; 18 import java.util.LinkedHashSet; 19 import java.util.List; 20 import java.util.Map; 21 import java.util.Set; 22 import java.util.TreeMap; 23 import java.util.function.Predicate; 24 import java.util.regex.Pattern; 25 26 import com.ibm.icu.dev.util.UnicodeMap; 27 import com.ibm.icu.impl.Utility; 28 import com.ibm.icu.text.SymbolTable; 29 import com.ibm.icu.text.UFormat; 30 import com.ibm.icu.text.UTF16; 31 import com.ibm.icu.text.UnicodeMatcher; 32 import com.ibm.icu.text.UnicodeSet; 33 import com.ibm.icu.text.UnicodeSetIterator; 34 35 public abstract class UnicodeProperty extends UnicodeLabel { 36 37 public static final UnicodeSet NONCHARACTERS = new UnicodeSet("[:noncharactercodepoint:]").freeze(); 38 public static final UnicodeSet PRIVATE_USE = new UnicodeSet("[:gc=privateuse:]").freeze(); 39 public static final UnicodeSet SURROGATE = new UnicodeSet("[:gc=surrogate:]").freeze(); 40 41 public static final UnicodeSet HIGH_SURROGATES = new UnicodeSet("[\\uD800-\\uDB7F]").freeze(); 42 public static final int SAMPLE_HIGH_SURROGATE = HIGH_SURROGATES.charAt(0); 43 public static final UnicodeSet HIGH_PRIVATE_USE_SURROGATES = new UnicodeSet("[\\uDB80-\\uDBFF]").freeze(); 44 public static final int SAMPLE_HIGH_PRIVATE_USE_SURROGATE = HIGH_PRIVATE_USE_SURROGATES.charAt(0); 45 public static final UnicodeSet LOW_SURROGATES = new UnicodeSet("[\\uDC00-\\uDFFF]").freeze(); 46 public static final int SAMPLE_LOW_SURROGATE = LOW_SURROGATES.charAt(0); 47 48 public static final UnicodeSet PRIVATE_USE_AREA = new UnicodeSet("[\\uE000-\\uF8FF]").freeze(); 49 public static final int SAMPLE_PRIVATE_USE_AREA = PRIVATE_USE_AREA.charAt(0); 50 public static final UnicodeSet PRIVATE_USE_AREA_A = new UnicodeSet("[\\U000F0000-\\U000FFFFD]").freeze(); 51 public static final int SAMPLE_PRIVATE_USE_AREA_A = PRIVATE_USE_AREA_A.charAt(0); 52 public static final UnicodeSet PRIVATE_USE_AREA_B = new UnicodeSet("[\\U00100000-\\U0010FFFD]").freeze(); 53 public static final int SAMPLE_PRIVATE_USE_AREA_B = PRIVATE_USE_AREA_B.charAt(0); 54 55 // The following are special. They are used for performance, but must be changed if the version of Unicode for the UnicodeProperty changes. 56 private static UnicodeSet UNASSIGNED; 57 private static int SAMPLE_UNASSIGNED; 58 private static UnicodeSet SPECIALS; 59 private static UnicodeSet STUFF_TO_TEST; 60 private static UnicodeSet STUFF_TO_TEST_WITH_UNASSIGNED; 61 getUNASSIGNED()62 public static synchronized UnicodeSet getUNASSIGNED() { 63 if (UNASSIGNED == null) { 64 UNASSIGNED = new UnicodeSet("[:gc=unassigned:]").freeze(); 65 } 66 return UNASSIGNED; 67 } 68 contractUNASSIGNED(UnicodeSet toBeUnassigned)69 public static synchronized UnicodeSet contractUNASSIGNED(UnicodeSet toBeUnassigned) { 70 UnicodeSet temp = UNASSIGNED; 71 ResetCacheProperties(); 72 UNASSIGNED = temp == null ? toBeUnassigned.freeze() : new UnicodeSet(temp).retainAll(toBeUnassigned).freeze(); 73 return UNASSIGNED; 74 } 75 getSAMPLE_UNASSIGNED()76 public static synchronized int getSAMPLE_UNASSIGNED() { 77 if (SAMPLE_UNASSIGNED == 0) { 78 SAMPLE_UNASSIGNED = getUNASSIGNED().charAt(0); 79 } 80 return SAMPLE_UNASSIGNED; 81 } 82 getSPECIALS()83 public static synchronized UnicodeSet getSPECIALS() { 84 if (SPECIALS == null) { 85 SPECIALS = new UnicodeSet(getUNASSIGNED()).addAll(PRIVATE_USE).addAll(SURROGATE).freeze(); 86 } 87 return SPECIALS; 88 } 89 getSTUFF_TO_TEST()90 public static synchronized UnicodeSet getSTUFF_TO_TEST() { 91 if (STUFF_TO_TEST == null) { 92 STUFF_TO_TEST = new UnicodeSet(getSPECIALS()).complement() 93 .addAll(NONCHARACTERS) 94 .add(getSAMPLE_UNASSIGNED()) 95 .add(SAMPLE_HIGH_SURROGATE) 96 .add(SAMPLE_HIGH_PRIVATE_USE_SURROGATE) 97 .add(SAMPLE_LOW_SURROGATE) 98 .add(SAMPLE_PRIVATE_USE_AREA) 99 .add(SAMPLE_PRIVATE_USE_AREA_A) 100 .add(SAMPLE_PRIVATE_USE_AREA_B) 101 .freeze(); 102 } 103 return STUFF_TO_TEST; 104 } 105 getSTUFF_TO_TEST_WITH_UNASSIGNED()106 public static synchronized UnicodeSet getSTUFF_TO_TEST_WITH_UNASSIGNED() { 107 if (STUFF_TO_TEST_WITH_UNASSIGNED == null) { 108 STUFF_TO_TEST_WITH_UNASSIGNED = new UnicodeSet(getSTUFF_TO_TEST()).addAll(getUNASSIGNED()).freeze(); 109 } 110 return STUFF_TO_TEST_WITH_UNASSIGNED; 111 } 112 113 /** 114 * Reset the cache properties. Must be done if the version of Unicode is different than the ICU one, AND any UnicodeProperty has already been instantiated. 115 * TODO make this a bit more robust. 116 * @internal 117 */ ResetCacheProperties()118 public static synchronized void ResetCacheProperties() { 119 UNASSIGNED = null; 120 SAMPLE_UNASSIGNED = 0; 121 SPECIALS = null; 122 STUFF_TO_TEST = null; 123 STUFF_TO_TEST_WITH_UNASSIGNED = null; 124 } 125 126 public static boolean DEBUG = false; 127 128 public static String CHECK_NAME = "FC_NFKC_Closure"; 129 130 public static int CHECK_VALUE = 0x037A; 131 132 private String name; 133 134 private String firstNameAlias = null; 135 136 private int type; 137 138 private Map valueToFirstValueAlias = null; 139 140 private boolean hasUniformUnassigned = true; 141 142 /* 143 * Name: Unicode_1_Name Name: ISO_Comment Name: Name Name: Unicode_1_Name 144 * 145 */ 146 147 public static final int UNKNOWN = 0, BINARY = 2, EXTENDED_BINARY = 3, 148 ENUMERATED = 4, EXTENDED_ENUMERATED = 5, CATALOG = 6, 149 EXTENDED_CATALOG = 7, MISC = 8, EXTENDED_MISC = 9, STRING = 10, 150 EXTENDED_STRING = 11, NUMERIC = 12, EXTENDED_NUMERIC = 13, 151 START_TYPE = 2, LIMIT_TYPE = 14, EXTENDED_MASK = 1, 152 CORE_MASK = ~EXTENDED_MASK, BINARY_MASK = (1 << BINARY) 153 | (1 << EXTENDED_BINARY), STRING_MASK = (1 << STRING) 154 | (1 << EXTENDED_STRING), 155 STRING_OR_MISC_MASK = (1 << STRING) | (1 << EXTENDED_STRING) 156 | (1 << MISC) | (1 << EXTENDED_MISC), 157 ENUMERATED_OR_CATALOG_MASK = (1 << ENUMERATED) 158 | (1 << EXTENDED_ENUMERATED) | (1 << CATALOG) 159 | (1 << EXTENDED_CATALOG); 160 161 private static final String[] TYPE_NAMES = { "Unknown", "Unknown", 162 "Binary", "Extended Binary", "Enumerated", "Extended Enumerated", 163 "Catalog", "Extended Catalog", "Miscellaneous", 164 "Extended Miscellaneous", "String", "Extended String", "Numeric", 165 "Extended Numeric", }; 166 getTypeName(int propType)167 public static String getTypeName(int propType) { 168 return TYPE_NAMES[propType]; 169 } 170 getName()171 public final String getName() { 172 return name; 173 } 174 getType()175 public final int getType() { 176 return type; 177 } 178 getTypeName()179 public String getTypeName() { 180 return TYPE_NAMES[type]; 181 } 182 isType(int mask)183 public final boolean isType(int mask) { 184 return ((1 << type) & mask) != 0; 185 } 186 setName(String string)187 protected final void setName(String string) { 188 if (string == null) 189 throw new IllegalArgumentException("Name must not be null"); 190 name = string; 191 } 192 setType(int i)193 protected final void setType(int i) { 194 type = i; 195 } 196 getVersion()197 public String getVersion() { 198 return _getVersion(); 199 } 200 getValue(int codepoint)201 public String getValue(int codepoint) { 202 if (DEBUG && CHECK_VALUE == codepoint && CHECK_NAME.equals(getName())) { 203 String value = _getValue(codepoint); 204 System.out.println(getName() + "(" + Utility.hex(codepoint) + "):" 205 + (getType() == STRING ? Utility.hex(value) : value)); 206 return value; 207 } 208 return _getValue(codepoint); 209 } 210 211 // public String getValue(int codepoint, boolean isShort) { 212 // return getValue(codepoint); 213 // } 214 getNameAliases(List<String> result)215 public List<String> getNameAliases(List<String> result) { 216 if (result == null) 217 result = new ArrayList(1); 218 return _getNameAliases(result); 219 } 220 getValueAliases(String valueAlias, List<String> result)221 public List<String> getValueAliases(String valueAlias, List<String> result) { 222 if (result == null) 223 result = new ArrayList(1); 224 result = _getValueAliases(valueAlias, result); 225 if (!result.contains(valueAlias)) { // FIX && type < NUMERIC 226 result = _getValueAliases(valueAlias, result); // for debugging 227 throw new IllegalArgumentException("Internal error: " + getName() 228 + " doesn't contain " + valueAlias + ": " 229 + new BagFormatter().join(result)); 230 } 231 return result; 232 } 233 getAvailableValues(List<String> result)234 public List<String> getAvailableValues(List<String> result) { 235 if (result == null) 236 result = new ArrayList(1); 237 return _getAvailableValues(result); 238 } 239 _getVersion()240 protected abstract String _getVersion(); 241 _getValue(int codepoint)242 protected abstract String _getValue(int codepoint); 243 _getNameAliases(List<String> result)244 protected abstract List<String> _getNameAliases(List<String> result); 245 _getValueAliases(String valueAlias, List<String> result)246 protected abstract List<String> _getValueAliases(String valueAlias, List<String> result); 247 _getAvailableValues(List<String> result)248 protected abstract List<String> _getAvailableValues(List<String> result); 249 250 // conveniences getNameAliases()251 public final List<String> getNameAliases() { 252 return getNameAliases(null); 253 } 254 getValueAliases(String valueAlias)255 public final List<String> getValueAliases(String valueAlias) { 256 return getValueAliases(valueAlias, null); 257 } 258 getAvailableValues()259 public final List<String> getAvailableValues() { 260 return getAvailableValues(null); 261 } 262 263 @Override getValue(int codepoint, boolean getShortest)264 public final String getValue(int codepoint, boolean getShortest) { 265 String result = getValue(codepoint); 266 if (type >= MISC || result == null || !getShortest) 267 return result; 268 return getFirstValueAlias(result); 269 } 270 getFirstNameAlias()271 public final String getFirstNameAlias() { 272 if (firstNameAlias == null) { 273 firstNameAlias = getNameAliases().get(0); 274 } 275 return firstNameAlias; 276 } 277 getFirstValueAlias(String value)278 public final String getFirstValueAlias(String value) { 279 if (valueToFirstValueAlias == null) 280 _getFirstValueAliasCache(); 281 return valueToFirstValueAlias.get(value).toString(); 282 } 283 _getFirstValueAliasCache()284 private void _getFirstValueAliasCache() { 285 maxValueWidth = 0; 286 maxFirstValueAliasWidth = 0; 287 valueToFirstValueAlias = new HashMap(1); 288 Iterator it = getAvailableValues().iterator(); 289 while (it.hasNext()) { 290 String value = (String) it.next(); 291 String first = getValueAliases(value).get(0); 292 if (first == null) { // internal error 293 throw new IllegalArgumentException( 294 "Value not in value aliases: " + value); 295 } 296 if (DEBUG && CHECK_NAME.equals(getName())) { 297 System.out.println("First Alias: " + getName() + ": " + value 298 + " => " + first 299 + new BagFormatter().join(getValueAliases(value))); 300 } 301 valueToFirstValueAlias.put(value, first); 302 if (value.length() > maxValueWidth) { 303 maxValueWidth = value.length(); 304 } 305 if (first.length() > maxFirstValueAliasWidth) { 306 maxFirstValueAliasWidth = first.length(); 307 } 308 } 309 } 310 311 private int maxValueWidth = -1; 312 313 private int maxFirstValueAliasWidth = -1; 314 315 @Override getMaxWidth(boolean getShortest)316 public int getMaxWidth(boolean getShortest) { 317 if (maxValueWidth < 0) 318 _getFirstValueAliasCache(); 319 if (getShortest) 320 return maxFirstValueAliasWidth; 321 return maxValueWidth; 322 } 323 getSet(String propertyValue)324 public final UnicodeSet getSet(String propertyValue) { 325 return getSet(propertyValue, null); 326 } 327 getSet(PatternMatcher matcher)328 public final UnicodeSet getSet(PatternMatcher matcher) { 329 return getSet(matcher, null); 330 } 331 332 /** Adds the property value set to the result. Clear the result first if you don't want to keep the original contents. 333 */ getSet(String propertyValue, UnicodeSet result)334 public final UnicodeSet getSet(String propertyValue, UnicodeSet result) { 335 if (isType(BINARY_MASK)) { 336 if (binaryYesSet == null) { 337 binaryYesSet = getSet(YES_MATCHER, null); 338 } 339 Boolean binValue = binaryValueOrNull(propertyValue); 340 if (binValue == Boolean.TRUE) { 341 if (result == null) { 342 return binaryYesSet.cloneAsThawed(); 343 } else { 344 return result.addAll(binaryYesSet); 345 } 346 } else if (binValue == Boolean.FALSE && !unicodeMapHasStringKeys) { 347 // We could cache this directly, too. 348 UnicodeSet inverse = binaryYesSet.cloneAsThawed().complement(); 349 if (result == null) { 350 return inverse; 351 } else { 352 return result.addAll(inverse); 353 } 354 } 355 } 356 Predicate<String> matcher = isType(STRING_OR_MISC_MASK) ? 357 new StringEqualityMatcher(propertyValue) : new NameMatcher(propertyValue); 358 return getSet(matcher, result); 359 } 360 binaryValueOrNull(String value)361 private static final Boolean binaryValueOrNull(String value) { 362 if ("Yes".equals(value)) { // fastpath 363 return Boolean.TRUE; 364 } 365 if (value == null) { 366 return null; 367 } 368 switch (toSkeleton(value)) { 369 case "n": 370 case "no": 371 case "f": 372 case "false": 373 return Boolean.FALSE; 374 case "y": 375 case "yes": 376 case "t": 377 case "true": 378 return Boolean.TRUE; 379 default: 380 return null; 381 } 382 } 383 384 private UnicodeMap unicodeMap = null; 385 private boolean unicodeMapHasStringKeys = false; 386 private UnicodeSet binaryYesSet = null; 387 388 public static final String UNUSED = "??"; 389 getSet(Predicate matcher, UnicodeSet result)390 public UnicodeSet getSet(Predicate matcher, UnicodeSet result) { 391 if (result == null) 392 result = new UnicodeSet(); 393 boolean uniformUnassigned = hasUniformUnassigned(); 394 if (isType(STRING_OR_MISC_MASK)) { 395 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); usi.next();) { // int i = 0; i <= 0x10FFFF; ++i 396 int i = usi.codepoint; 397 String value = getValue(i); 398 if (value != null && matcher.test(value)) { 399 result.add(i); 400 } 401 } 402 return addUntested(result, uniformUnassigned); 403 } 404 List temp = new ArrayList(1); // to avoid reallocating... 405 UnicodeMap um = getUnicodeMap_internal(); 406 Iterator it = um.getAvailableValues(null).iterator(); 407 main: while (it.hasNext()) { 408 String value = (String) it.next(); 409 if (matcher.test(value)) { // fastpath 410 um.keySet(value, result); 411 continue main; 412 } 413 temp.clear(); 414 Iterator it2 = getValueAliases(value, temp).iterator(); 415 while (it2.hasNext()) { 416 String value2 = (String) it2.next(); 417 // System.out.println("Values:" + value2); 418 if (matcher.test(value2) 419 || matcher.test(toSkeleton(value2))) { 420 um.keySet(value, result); 421 continue main; 422 } 423 } 424 } 425 return result; 426 } 427 428 /* 429 * public UnicodeSet getMatchSet(UnicodeSet result) { if (result == null) 430 * result = new UnicodeSet(); addAll(matchIterator, result); return result; } 431 * 432 * public void setMatchSet(UnicodeSet set) { matchIterator = new 433 * UnicodeSetIterator(set); } 434 */ 435 436 /** 437 * Utility for debugging 438 */ getStack()439 public static String getStack() { 440 Exception e = new Exception(); 441 StringWriter sw = new StringWriter(); 442 PrintWriter pw = new PrintWriter(sw); 443 e.printStackTrace(pw); 444 pw.flush(); 445 return "Showing Stack with fake " + sw.getBuffer().toString(); 446 } 447 448 // TODO use this instead of plain strings 449 public static class Name implements Comparable { 450 private String skeleton; 451 452 private String pretty; 453 454 public final int RAW = 0, TITLE = 1, NORMAL = 2; 455 Name(String name, int style)456 public Name(String name, int style) { 457 if (name == null) 458 name = ""; 459 if (style == RAW) { 460 skeleton = pretty = name; 461 } else { 462 pretty = regularize(name, style == TITLE); 463 skeleton = toSkeleton(pretty); 464 } 465 } 466 467 @Override compareTo(Object o)468 public int compareTo(Object o) { 469 return skeleton.compareTo(((Name) o).skeleton); 470 } 471 472 @Override equals(Object o)473 public boolean equals(Object o) { 474 return skeleton.equals(((Name) o).skeleton); 475 } 476 477 @Override hashCode()478 public int hashCode() { 479 return skeleton.hashCode(); 480 } 481 482 @Override toString()483 public String toString() { 484 return pretty; 485 } 486 } 487 488 /** 489 * @return the unicode map 490 */ getUnicodeMap()491 public UnicodeMap getUnicodeMap() { 492 return getUnicodeMap(false); 493 } 494 495 /** 496 * @return the unicode map 497 */ getUnicodeMap(boolean getShortest)498 public UnicodeMap getUnicodeMap(boolean getShortest) { 499 if (!getShortest) 500 return getUnicodeMap_internal().cloneAsThawed(); 501 UnicodeMap result = new UnicodeMap(); 502 boolean uniformUnassigned = hasUniformUnassigned(); 503 504 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); usi.next();) { // int i = 0; i <= 0x10FFFF; ++i 505 int i = usi.codepoint; 506 // if (DEBUG && i == 0x41) System.out.println(i + "\t" + 507 // getValue(i)); 508 String value = getValue(i, true); 509 result.put(i, value); 510 } 511 return addUntested(result, uniformUnassigned); 512 } 513 514 /** 515 * @return the unicode map 516 */ getUnicodeMap_internal()517 public UnicodeMap getUnicodeMap_internal() { 518 if (unicodeMap == null) { 519 unicodeMap = _getUnicodeMap(); 520 Set<String> stringKeys = unicodeMap.stringKeys(); 521 unicodeMapHasStringKeys = stringKeys != null && !stringKeys.isEmpty(); 522 } 523 return unicodeMap; 524 } 525 _getUnicodeMap()526 protected UnicodeMap _getUnicodeMap() { 527 UnicodeMap result = new UnicodeMap(); 528 HashMap myIntern = new HashMap(); 529 boolean uniformUnassigned = hasUniformUnassigned(); 530 531 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); usi.next();) { // int i = 0; i <= 0x10FFFF; ++i 532 int i = usi.codepoint; 533 // if (DEBUG && i == 0x41) System.out.println(i + "\t" + 534 // getValue(i)); 535 String value = getValue(i); 536 String iValue = (String) myIntern.get(value); 537 if (iValue == null) 538 myIntern.put(value, iValue = value); 539 result.put(i, iValue); 540 } 541 addUntested(result, uniformUnassigned); 542 543 if (DEBUG) { 544 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); usi.next();) { // int i = 0; i <= 0x10FFFF; ++i 545 int i = usi.codepoint; 546 // if (DEBUG && i == 0x41) System.out.println(i + "\t" + 547 // getValue(i)); 548 String value = getValue(i); 549 String resultValue = (String) result.getValue(i); 550 if (!value.equals(resultValue)) { 551 throw new RuntimeException("Value failure at: " 552 + Utility.hex(i)); 553 } 554 } 555 } 556 if (DEBUG && CHECK_NAME.equals(getName())) { 557 System.out.println(getName() + ":\t" + getClass().getName() + "\t" 558 + getVersion()); 559 System.out.println(getStack()); 560 System.out.println(result); 561 } 562 return result; 563 } 564 getStuffToTest(boolean uniformUnassigned)565 private static UnicodeSetIterator getStuffToTest(boolean uniformUnassigned) { 566 return new UnicodeSetIterator(uniformUnassigned ? getSTUFF_TO_TEST() : getSTUFF_TO_TEST_WITH_UNASSIGNED()); 567 } 568 569 /** 570 * Really ought to create a Collection UniqueList, that forces uniqueness. 571 * But for now... 572 */ addUnique(Object obj, Collection result)573 public static Collection addUnique(Object obj, Collection result) { 574 if (obj != null && !result.contains(obj)) 575 result.add(obj); 576 return result; 577 } 578 579 /** 580 * Utility for managing property & non-string value aliases 581 */ 582 public static final Comparator PROPERTY_COMPARATOR = new Comparator() { 583 @Override 584 public int compare(Object o1, Object o2) { 585 return compareNames((String) o1, (String) o2); 586 } 587 }; 588 589 /** 590 * Utility for managing property & non-string value aliases 591 * 592 */ 593 // TODO optimize equalNames(String a, String b)594 public static boolean equalNames(String a, String b) { 595 if (a == b) 596 return true; 597 if (a == null) 598 return false; 599 return toSkeleton(a).equals(toSkeleton(b)); 600 } 601 602 /** 603 * Utility for managing property & non-string value aliases 604 */ 605 // TODO optimize compareNames(String a, String b)606 public static int compareNames(String a, String b) { 607 if (a == b) 608 return 0; 609 if (a == null) 610 return -1; 611 if (b == null) 612 return 1; 613 return toSkeleton(a).compareTo(toSkeleton(b)); 614 } 615 616 /** 617 * Utility for managing property & non-string value aliases 618 */ 619 // TODO account for special names, tibetan, hangul toSkeleton(String source)620 public static String toSkeleton(String source) { 621 if (source == null) 622 return null; 623 StringBuilder skeletonBuffer = new StringBuilder(); 624 boolean gotOne = false; 625 // remove spaces, '_', '-' 626 // we can do this with char, since no surrogates are involved 627 for (int i = 0; i < source.length(); ++i) { 628 char ch = source.charAt(i); 629 if (i > 0 && (ch == '_' || ch == ' ' || ch == '-')) { 630 gotOne = true; 631 } else { 632 char ch2 = Character.toLowerCase(ch); 633 if (ch2 != ch) { 634 gotOne = true; 635 skeletonBuffer.append(ch2); 636 } else { 637 skeletonBuffer.append(ch); 638 } 639 } 640 } 641 if (!gotOne) 642 return source; // avoid string creation 643 return skeletonBuffer.toString(); 644 } 645 646 // get the name skeleton toNameSkeleton(String source)647 public static String toNameSkeleton(String source) { 648 if (source == null) 649 return null; 650 StringBuffer result = new StringBuffer(); 651 // remove spaces, medial '-' 652 // we can do this with char, since no surrogates are involved 653 for (int i = 0; i < source.length(); ++i) { 654 char ch = source.charAt(i); 655 if (('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z') 656 || ch == '<' || ch == '>') { 657 result.append(ch); 658 } else if (ch == ' ') { 659 // don't copy ever 660 } else if (ch == '-') { 661 // only copy non-medials AND trailing O-E 662 if (0 == i 663 || i == source.length() - 1 664 || source.charAt(i - 1) == ' ' 665 || source.charAt(i + 1) == ' ' 666 || (i == source.length() - 2 667 && source.charAt(i - 1) == 'O' && source 668 .charAt(i + 1) == 'E')) { 669 System.out.println("****** EXCEPTION " + source); 670 result.append(ch); 671 } 672 // otherwise don't copy 673 } else { 674 throw new IllegalArgumentException("Illegal Name Char: U+" 675 + Utility.hex(ch) + ", " + ch); 676 } 677 } 678 return result.toString(); 679 } 680 681 /** 682 * These routines use the Java functions, because they only need to act on 683 * ASCII Changes space, - into _, inserts _ between lower and UPPER. 684 */ regularize(String source, boolean titlecaseStart)685 public static String regularize(String source, boolean titlecaseStart) { 686 if (source == null) 687 return source; 688 /* 689 * if (source.equals("noBreak")) { // HACK if (titlecaseStart) return 690 * "NoBreak"; return source; } 691 */ 692 StringBuffer result = new StringBuffer(); 693 int lastCat = -1; 694 boolean haveFirstCased = true; 695 for (int i = 0; i < source.length(); ++i) { 696 char c = source.charAt(i); 697 if (c == ' ' || c == '-' || c == '_') { 698 c = '_'; 699 haveFirstCased = true; 700 } 701 if (c == '=') 702 haveFirstCased = true; 703 int cat = Character.getType(c); 704 if (lastCat == Character.LOWERCASE_LETTER 705 && cat == Character.UPPERCASE_LETTER) { 706 result.append('_'); 707 } 708 if (haveFirstCased 709 && (cat == Character.LOWERCASE_LETTER 710 || cat == Character.TITLECASE_LETTER || cat == Character.UPPERCASE_LETTER)) { 711 if (titlecaseStart) { 712 c = Character.toUpperCase(c); 713 } 714 haveFirstCased = false; 715 } 716 result.append(c); 717 lastCat = cat; 718 } 719 return result.toString(); 720 } 721 722 /** 723 * Utility function for comparing codepoint to string without generating new 724 * string. 725 * 726 * @param codepoint 727 * @param other 728 * @return true if the codepoint equals the string 729 */ equals(int codepoint, String other)730 public static final boolean equals(int codepoint, String other) { 731 if (other == null) return false; 732 if (other.length() == 1) { 733 return codepoint == other.charAt(0); 734 } 735 if (other.length() == 2) { 736 return other.equals(UTF16.valueOf(codepoint)); 737 } 738 return false; 739 } 740 741 /** 742 * Utility function for comparing objects that may be null 743 * string. 744 */ equals(T a, T b)745 public static final <T extends Object> boolean equals(T a, T b) { 746 return a == null ? b == null 747 : b == null ? false 748 : a.equals(b); 749 } 750 751 /** 752 * Utility that should be on UnicodeSet 753 * 754 * @param source 755 * @param result 756 */ addAll(UnicodeSetIterator source, UnicodeSet result)757 static public void addAll(UnicodeSetIterator source, UnicodeSet result) { 758 while (source.nextRange()) { 759 if (source.codepoint == UnicodeSetIterator.IS_STRING) { 760 result.add(source.string); 761 } else { 762 result.add(source.codepoint, source.codepointEnd); 763 } 764 } 765 } 766 767 /** 768 * Really ought to create a Collection UniqueList, that forces uniqueness. 769 * But for now... 770 */ addAllUnique(Collection source, Collection result)771 public static Collection addAllUnique(Collection source, Collection result) { 772 for (Iterator it = source.iterator(); it.hasNext();) { 773 addUnique(it.next(), result); 774 } 775 return result; 776 } 777 778 /** 779 * Really ought to create a Collection UniqueList, that forces uniqueness. 780 * But for now... 781 */ addAllUnique(Object[] source, Collection result)782 public static Collection addAllUnique(Object[] source, Collection result) { 783 for (int i = 0; i < source.length; ++i) { 784 addUnique(source[i], result); 785 } 786 return result; 787 } 788 789 static public class Factory { 790 static boolean DEBUG = false; 791 792 Map<String, UnicodeProperty> canonicalNames = new TreeMap<>(); 793 794 Map skeletonNames = new TreeMap(); 795 796 Map propertyCache = new HashMap(1); 797 add(UnicodeProperty sp)798 public final Factory add(UnicodeProperty sp) { 799 String name2 = sp.getName(); 800 if (name2.length() == 0) { 801 throw new IllegalArgumentException(); 802 } 803 canonicalNames.put(name2, sp); 804 skeletonNames.put(toSkeleton(name2), sp); 805 List c = sp.getNameAliases(new ArrayList(1)); 806 Iterator it = c.iterator(); 807 while (it.hasNext()) { 808 skeletonNames.put(toSkeleton((String) it.next()), sp); 809 } 810 return this; 811 } 812 getProperty(String propertyAlias)813 public UnicodeProperty getProperty(String propertyAlias) { 814 return (UnicodeProperty) skeletonNames 815 .get(toSkeleton(propertyAlias)); 816 } 817 getAvailableNames()818 public final List<String> getAvailableNames() { 819 return getAvailableNames(null); 820 } 821 getAvailableNames(List<String> result)822 public final List<String> getAvailableNames(List<String> result) { 823 if (result == null) 824 result = new ArrayList(1); 825 Iterator it = canonicalNames.keySet().iterator(); 826 while (it.hasNext()) { 827 addUnique(it.next(), result); 828 } 829 return result; 830 } 831 getAvailableNames(int propertyTypeMask)832 public final List getAvailableNames(int propertyTypeMask) { 833 return getAvailableNames(propertyTypeMask, null); 834 } 835 getAvailableNames(int propertyTypeMask, List result)836 public final List getAvailableNames(int propertyTypeMask, List result) { 837 if (result == null) 838 result = new ArrayList(1); 839 Iterator it = canonicalNames.keySet().iterator(); 840 while (it.hasNext()) { 841 String item = (String) it.next(); 842 UnicodeProperty property = getProperty(item); 843 if (DEBUG) 844 System.out.println("Properties: " + item + "," 845 + property.getType()); 846 if (!property.isType(propertyTypeMask)) { 847 // System.out.println("Masking: " + property.getType() + "," 848 // + propertyTypeMask); 849 continue; 850 } 851 addUnique(property.getName(), result); 852 } 853 return result; 854 } 855 856 InversePatternMatcher inverseMatcher = new InversePatternMatcher(); 857 858 /** 859 * Format is: propname ('=' | '!=') propvalue ( '|' propValue )* 860 */ getSet(String propAndValue, PatternMatcher matcher, UnicodeSet result)861 public final UnicodeSet getSet(String propAndValue, 862 PatternMatcher matcher, UnicodeSet result) { 863 int equalPos = propAndValue.indexOf('='); 864 String prop = propAndValue.substring(0, equalPos); 865 String value = propAndValue.substring(equalPos + 1); 866 boolean negative = false; 867 if (prop.endsWith("!")) { 868 prop = prop.substring(0, prop.length() - 1); 869 negative = true; 870 } 871 prop = prop.trim(); 872 UnicodeProperty up = getProperty(prop); 873 if (matcher == null) { 874 matcher = new SimpleMatcher(value, up 875 .isType(STRING_OR_MISC_MASK) ? null 876 : PROPERTY_COMPARATOR); 877 } 878 if (negative) { 879 inverseMatcher.set(matcher); 880 matcher = inverseMatcher; 881 } 882 return up.getSet(matcher.set(value), result); 883 } 884 getSet(String propAndValue, PatternMatcher matcher)885 public final UnicodeSet getSet(String propAndValue, 886 PatternMatcher matcher) { 887 return getSet(propAndValue, matcher, null); 888 } 889 getSet(String propAndValue)890 public final UnicodeSet getSet(String propAndValue) { 891 return getSet(propAndValue, null, null); 892 } 893 getSymbolTable(String prefix)894 public final SymbolTable getSymbolTable(String prefix) { 895 return new PropertySymbolTable(prefix); 896 } 897 898 private class MyXSymbolTable extends UnicodeSet.XSymbolTable { 899 @Override applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result)900 public boolean applyPropertyAlias(String propertyName, 901 String propertyValue, UnicodeSet result) { 902 if (false) 903 System.out.println(propertyName + "=" + propertyValue); 904 UnicodeProperty prop = getProperty(propertyName); 905 if (prop == null) 906 return false; 907 result.clear(); 908 UnicodeSet x = prop.getSet(propertyValue, result); 909 return x.size() != 0; 910 } 911 } 912 getXSymbolTable()913 public final UnicodeSet.XSymbolTable getXSymbolTable() { 914 return new MyXSymbolTable(); 915 } 916 917 private class PropertySymbolTable implements SymbolTable { 918 static final boolean DEBUG = false; 919 920 private String prefix; 921 922 RegexMatcher regexMatcher = new RegexMatcher(); 923 PropertySymbolTable(String prefix)924 PropertySymbolTable(String prefix) { 925 this.prefix = prefix; 926 } 927 928 @Override lookup(String s)929 public char[] lookup(String s) { 930 if (DEBUG) 931 System.out.println("\t(" + prefix + ")Looking up " + s); 932 // ensure, again, that prefix matches 933 int start = prefix.length(); 934 if (!s.regionMatches(true, 0, prefix, 0, start)) 935 return null; 936 937 int pos = s.indexOf(':', start); 938 if (pos < 0) { // should never happen 939 throw new IllegalArgumentException( 940 "Internal Error: missing =: " + s + "\r\n"); 941 } 942 UnicodeProperty prop = getProperty(s.substring(start, pos)); 943 if (prop == null) { 944 throw new IllegalArgumentException("Invalid Property in: " 945 + s + "\r\nUse " + showSet(getAvailableNames())); 946 } 947 String value = s.substring(pos + 1); 948 UnicodeSet set; 949 if (value.startsWith("\u00AB")) { // regex! 950 set = prop.getSet(regexMatcher.set(value.substring(1, value 951 .length() - 1))); 952 } else { 953 set = prop.getSet(value); 954 } 955 if (set.size() == 0) { 956 throw new IllegalArgumentException( 957 "Empty Property-Value in: " + s + "\r\nUse " 958 + showSet(prop.getAvailableValues())); 959 } 960 if (DEBUG) 961 System.out.println("\t(" + prefix + ")Returning " 962 + set.toPattern(true)); 963 return set.toPattern(true).toCharArray(); // really ugly 964 } 965 showSet(List list)966 private String showSet(List list) { 967 StringBuffer result = new StringBuffer("["); 968 boolean first = true; 969 for (Iterator it = list.iterator(); it.hasNext();) { 970 if (!first) 971 result.append(", "); 972 else 973 first = false; 974 result.append(it.next().toString()); 975 } 976 result.append("]"); 977 return result.toString(); 978 } 979 980 @Override lookupMatcher(int ch)981 public UnicodeMatcher lookupMatcher(int ch) { 982 return null; 983 } 984 985 @Override parseReference(String text, ParsePosition pos, int limit)986 public String parseReference(String text, ParsePosition pos, 987 int limit) { 988 if (DEBUG) 989 System.out.println("\t(" + prefix + ")Parsing <" 990 + text.substring(pos.getIndex(), limit) + ">"); 991 int start = pos.getIndex(); 992 // ensure that it starts with 'prefix' 993 if (!text 994 .regionMatches(true, start, prefix, 0, prefix.length())) 995 return null; 996 start += prefix.length(); 997 // now see if it is of the form identifier:identifier 998 int i = getIdentifier(text, start, limit); 999 if (i == start) 1000 return null; 1001 String prop = text.substring(start, i); 1002 String value = "true"; 1003 if (i < limit) { 1004 if (text.charAt(i) == ':') { 1005 int j; 1006 if (text.charAt(i + 1) == '\u00AB') { // regular 1007 // expression 1008 j = text.indexOf('\u00BB', i + 2) + 1; // include 1009 // last 1010 // character 1011 if (j <= 0) 1012 return null; 1013 } else { 1014 j = getIdentifier(text, i + 1, limit); 1015 } 1016 value = text.substring(i + 1, j); 1017 i = j; 1018 } 1019 } 1020 pos.setIndex(i); 1021 if (DEBUG) 1022 System.out.println("\t(" + prefix + ")Parsed <" + prop 1023 + ">=<" + value + ">"); 1024 return prefix + prop + ":" + value; 1025 } 1026 getIdentifier(String text, int start, int limit)1027 private int getIdentifier(String text, int start, int limit) { 1028 if (DEBUG) 1029 System.out.println("\tGetID <" 1030 + text.substring(start, limit) + ">"); 1031 int cp = 0; 1032 int i; 1033 for (i = start; i < limit; i += UTF16.getCharCount(cp)) { 1034 cp = UTF16.charAt(text, i); 1035 if (!com.ibm.icu.lang.UCharacter 1036 .isUnicodeIdentifierPart(cp) 1037 && cp != '.') { 1038 break; 1039 } 1040 } 1041 if (DEBUG) 1042 System.out.println("\tGotID <" + text.substring(start, i) 1043 + ">"); 1044 return i; 1045 } 1046 } 1047 } 1048 1049 public static class FilteredProperty extends UnicodeProperty { 1050 private UnicodeProperty property; 1051 1052 protected StringFilter filter; 1053 1054 protected UnicodeSetIterator matchIterator = new UnicodeSetIterator( 1055 new UnicodeSet(0, 0x10FFFF)); 1056 1057 protected HashMap backmap; 1058 1059 boolean allowValueAliasCollisions = false; 1060 FilteredProperty(UnicodeProperty property, StringFilter filter)1061 public FilteredProperty(UnicodeProperty property, StringFilter filter) { 1062 this.property = property; 1063 this.filter = filter; 1064 } 1065 getFilter()1066 public StringFilter getFilter() { 1067 return filter; 1068 } 1069 setFilter(StringFilter filter)1070 public UnicodeProperty setFilter(StringFilter filter) { 1071 this.filter = filter; 1072 return this; 1073 } 1074 1075 List temp = new ArrayList(1); 1076 1077 @Override _getAvailableValues(List result)1078 public List _getAvailableValues(List result) { 1079 temp.clear(); 1080 return filter.addUnique(property.getAvailableValues(temp), result); 1081 } 1082 1083 @Override _getNameAliases(List result)1084 public List _getNameAliases(List result) { 1085 temp.clear(); 1086 return filter.addUnique(property.getNameAliases(temp), result); 1087 } 1088 1089 @Override _getValue(int codepoint)1090 public String _getValue(int codepoint) { 1091 return filter.remap(property.getValue(codepoint)); 1092 } 1093 1094 @Override _getValueAliases(String valueAlias, List result)1095 public List _getValueAliases(String valueAlias, List result) { 1096 if (backmap == null) { 1097 backmap = new HashMap(1); 1098 temp.clear(); 1099 Iterator it = property.getAvailableValues(temp).iterator(); 1100 while (it.hasNext()) { 1101 String item = (String) it.next(); 1102 String mappedItem = filter.remap(item); 1103 if (backmap.get(mappedItem) != null 1104 && !allowValueAliasCollisions) { 1105 throw new IllegalArgumentException( 1106 "Filter makes values collide! " + item + ", " 1107 + mappedItem); 1108 } 1109 backmap.put(mappedItem, item); 1110 } 1111 } 1112 valueAlias = (String) backmap.get(valueAlias); 1113 temp.clear(); 1114 return filter.addUnique(property.getValueAliases(valueAlias, temp), 1115 result); 1116 } 1117 1118 @Override _getVersion()1119 public String _getVersion() { 1120 return property.getVersion(); 1121 } 1122 isAllowValueAliasCollisions()1123 public boolean isAllowValueAliasCollisions() { 1124 return allowValueAliasCollisions; 1125 } 1126 setAllowValueAliasCollisions(boolean b)1127 public FilteredProperty setAllowValueAliasCollisions(boolean b) { 1128 allowValueAliasCollisions = b; 1129 return this; 1130 } 1131 1132 } 1133 1134 public static abstract class StringFilter implements Cloneable { remap(String original)1135 public abstract String remap(String original); 1136 addUnique(Collection source, List result)1137 public final List addUnique(Collection source, List result) { 1138 if (result == null) 1139 result = new ArrayList(1); 1140 Iterator it = source.iterator(); 1141 while (it.hasNext()) { 1142 UnicodeProperty.addUnique(remap((String) it.next()), result); 1143 } 1144 return result; 1145 } 1146 /* 1147 * public Object clone() { try { return super.clone(); } catch 1148 * (CloneNotSupportedException e) { throw new 1149 * IllegalStateException("Should never happen."); } } 1150 */ 1151 } 1152 1153 public static class MapFilter extends StringFilter { 1154 private Map valueMap; 1155 MapFilter(Map valueMap)1156 public MapFilter(Map valueMap) { 1157 this.valueMap = valueMap; 1158 } 1159 1160 @Override remap(String original)1161 public String remap(String original) { 1162 Object changed = valueMap.get(original); 1163 return changed == null ? original : (String) changed; 1164 } 1165 getMap()1166 public Map getMap() { 1167 return valueMap; 1168 } 1169 } 1170 1171 /** Matches using .equals(). */ 1172 private static final class StringEqualityMatcher implements Predicate<String> { 1173 private final String pattern; 1174 StringEqualityMatcher(String pattern)1175 StringEqualityMatcher(String pattern) { 1176 this.pattern = pattern; 1177 } 1178 1179 @Override test(String value)1180 public boolean test(String value) { 1181 return pattern.equals(value); 1182 } 1183 } 1184 1185 /** Matches skeleton strings. Computes the pattern skeleton only once. */ 1186 private static final class NameMatcher implements Predicate<String> { 1187 private final String pattern; 1188 private final String skeleton; 1189 NameMatcher(String pattern)1190 NameMatcher(String pattern) { 1191 this.pattern = pattern; 1192 this.skeleton = toSkeleton(pattern); 1193 } 1194 1195 @Override test(String value)1196 public boolean test(String value) { 1197 return pattern.equals(value) || skeleton.equals(toSkeleton(value)); 1198 } 1199 } 1200 1201 private static final NameMatcher YES_MATCHER = new NameMatcher("Yes"); 1202 1203 public interface PatternMatcher extends Predicate { set(String pattern)1204 PatternMatcher set(String pattern); 1205 } 1206 1207 public static class InversePatternMatcher implements PatternMatcher { 1208 PatternMatcher other; 1209 set(PatternMatcher toInverse)1210 public PatternMatcher set(PatternMatcher toInverse) { 1211 other = toInverse; 1212 return this; 1213 } 1214 1215 @Override test(Object value)1216 public boolean test(Object value) { 1217 return !other.test(value); 1218 } 1219 1220 @Override set(String pattern)1221 public PatternMatcher set(String pattern) { 1222 other.set(pattern); 1223 return this; 1224 } 1225 } 1226 1227 public static class SimpleMatcher implements PatternMatcher { 1228 Comparator comparator; 1229 1230 String pattern; 1231 SimpleMatcher(String pattern, Comparator comparator)1232 public SimpleMatcher(String pattern, Comparator comparator) { 1233 this.comparator = comparator; 1234 this.pattern = pattern; 1235 } 1236 1237 @Override test(Object value)1238 public boolean test(Object value) { 1239 if (comparator == null) 1240 return pattern.equals(value); 1241 return comparator.compare(pattern, value) == 0; 1242 } 1243 1244 @Override set(String pattern)1245 public PatternMatcher set(String pattern) { 1246 this.pattern = pattern; 1247 return this; 1248 } 1249 } 1250 1251 public static class RegexMatcher implements UnicodeProperty.PatternMatcher { 1252 private java.util.regex.Matcher matcher; 1253 1254 @Override set(String pattern)1255 public UnicodeProperty.PatternMatcher set(String pattern) { 1256 matcher = Pattern.compile(pattern).matcher(""); 1257 return this; 1258 } 1259 UFormat foo; 1260 @Override test(Object value)1261 public boolean test(Object value) { 1262 matcher.reset(value.toString()); 1263 return matcher.find(); 1264 } 1265 } 1266 1267 public enum AliasAddAction {IGNORE_IF_MISSING, REQUIRE_MAIN_ALIAS, ADD_MAIN_ALIAS} 1268 1269 public static abstract class BaseProperty extends UnicodeProperty { 1270 private static final String[] NO_VALUES = {"No", "N", "F", "False"}; 1271 1272 private static final String[] YES_VALUES = {"Yes", "Y", "T", "True"}; 1273 1274 /** 1275 * 1276 */ 1277 private static final String[][] YES_NO_ALIASES = new String[][] {YES_VALUES, NO_VALUES}; 1278 1279 protected List propertyAliases = new ArrayList(1); 1280 1281 protected Map toValueAliases; 1282 1283 protected String version; 1284 setMain(String alias, String shortAlias, int propertyType, String version)1285 public BaseProperty setMain(String alias, String shortAlias, 1286 int propertyType, String version) { 1287 setName(alias); 1288 setType(propertyType); 1289 propertyAliases.add(shortAlias); 1290 propertyAliases.add(alias); 1291 if (propertyType == BINARY) { 1292 addValueAliases(YES_NO_ALIASES, AliasAddAction.ADD_MAIN_ALIAS); 1293 } 1294 this.version = version; 1295 return this; 1296 } 1297 1298 @Override _getVersion()1299 public String _getVersion() { 1300 return version; 1301 } 1302 1303 @Override _getNameAliases(List result)1304 public List _getNameAliases(List result) { 1305 addAllUnique(propertyAliases, result); 1306 return result; 1307 } 1308 addValueAliases(String[][] valueAndAlternates, AliasAddAction aliasAddAction)1309 public BaseProperty addValueAliases(String[][] valueAndAlternates, 1310 AliasAddAction aliasAddAction) { 1311 if (toValueAliases == null) 1312 _fixValueAliases(); 1313 for (int i = 0; i < valueAndAlternates.length; ++i) { 1314 for (int j = 1; j < valueAndAlternates[0].length; ++j) { 1315 addValueAlias(valueAndAlternates[i][0], 1316 valueAndAlternates[i][j], aliasAddAction); 1317 } 1318 } 1319 return this; 1320 } 1321 addValueAlias(String value, String valueAlias, AliasAddAction aliasAddAction)1322 public void addValueAlias(String value, String valueAlias, 1323 AliasAddAction aliasAddAction) { 1324 List result = (List) toValueAliases.get(value); 1325 if (result == null) { 1326 switch(aliasAddAction) { 1327 case IGNORE_IF_MISSING: return; 1328 case REQUIRE_MAIN_ALIAS: throw new IllegalArgumentException("Can't add alias for mising value: " + value); 1329 case ADD_MAIN_ALIAS: 1330 toValueAliases.put(value, result = new ArrayList(0)); 1331 break; 1332 } 1333 } 1334 addUnique(value, result); 1335 addUnique(valueAlias, result); 1336 } 1337 1338 @Override _getValueAliases(String valueAlias, List result)1339 protected List _getValueAliases(String valueAlias, List result) { 1340 if (toValueAliases == null) 1341 _fixValueAliases(); 1342 List a = (List) toValueAliases.get(valueAlias); 1343 if (a != null) 1344 addAllUnique(a, result); 1345 return result; 1346 } 1347 _fixValueAliases()1348 protected void _fixValueAliases() { 1349 if (toValueAliases == null) 1350 toValueAliases = new HashMap(1); 1351 for (Iterator it = getAvailableValues().iterator(); it.hasNext();) { 1352 Object value = it.next(); 1353 _ensureValueInAliases(value); 1354 } 1355 } 1356 _ensureValueInAliases(Object value)1357 protected void _ensureValueInAliases(Object value) { 1358 List result = (List) toValueAliases.get(value); 1359 if (result == null) 1360 toValueAliases.put(value, result = new ArrayList(1)); 1361 addUnique(value, result); 1362 } 1363 swapFirst2ValueAliases()1364 public BaseProperty swapFirst2ValueAliases() { 1365 for (Iterator it = toValueAliases.keySet().iterator(); it.hasNext();) { 1366 List list = (List) toValueAliases.get(it.next()); 1367 if (list.size() < 2) 1368 continue; 1369 Object first = list.get(0); 1370 list.set(0, list.get(1)); 1371 list.set(1, first); 1372 } 1373 return this; 1374 } 1375 1376 /** 1377 * @param string 1378 * @return 1379 */ addName(String string)1380 public UnicodeProperty addName(String string) { 1381 throw new UnsupportedOperationException(); 1382 } 1383 1384 } 1385 1386 public static abstract class SimpleProperty extends BaseProperty { 1387 LinkedHashSet values; 1388 1389 @Override addName(String alias)1390 public UnicodeProperty addName(String alias) { 1391 propertyAliases.add(alias); 1392 return this; 1393 } 1394 setValues(String valueAlias)1395 public SimpleProperty setValues(String valueAlias) { 1396 _addToValues(valueAlias, null); 1397 return this; 1398 } 1399 addAliases(String valueAlias, String... aliases)1400 public SimpleProperty addAliases(String valueAlias, String... aliases) { 1401 _addToValues(valueAlias, null); 1402 return this; 1403 } 1404 setValues(String[] valueAliases, String[] alternateValueAliases)1405 public SimpleProperty setValues(String[] valueAliases, 1406 String[] alternateValueAliases) { 1407 for (int i = 0; i < valueAliases.length; ++i) { 1408 if (valueAliases[i].equals(UNUSED)) 1409 continue; 1410 _addToValues( 1411 valueAliases[i], 1412 alternateValueAliases != null ? alternateValueAliases[i] 1413 : null); 1414 } 1415 return this; 1416 } 1417 setValues(List valueAliases)1418 public SimpleProperty setValues(List valueAliases) { 1419 this.values = new LinkedHashSet(valueAliases); 1420 for (Iterator it = this.values.iterator(); it.hasNext();) { 1421 _addToValues((String) it.next(), null); 1422 } 1423 return this; 1424 } 1425 1426 @Override _getAvailableValues(List result)1427 public List _getAvailableValues(List result) { 1428 if (values == null) 1429 _fillValues(); 1430 result.addAll(values); 1431 return result; 1432 } 1433 _fillValues()1434 protected void _fillValues() { 1435 List newvalues = (List) getUnicodeMap_internal() 1436 .getAvailableValues(new ArrayList()); 1437 for (Iterator it = newvalues.iterator(); it.hasNext();) { 1438 _addToValues((String) it.next(), null); 1439 } 1440 } 1441 _addToValues(String item, String alias)1442 private void _addToValues(String item, String alias) { 1443 if (values == null) 1444 values = new LinkedHashSet(); 1445 if (toValueAliases == null) 1446 _fixValueAliases(); 1447 addUnique(item, values); 1448 _ensureValueInAliases(item); 1449 addValueAlias(item, alias, AliasAddAction.REQUIRE_MAIN_ALIAS); 1450 } 1451 /* public String _getVersion() { 1452 return version; 1453 } 1454 */ 1455 } 1456 1457 public static class UnicodeMapProperty extends BaseProperty { 1458 /* 1459 * Example of usage: 1460 * new UnicodeProperty.UnicodeMapProperty() { 1461 { 1462 unicodeMap = new UnicodeMap(); 1463 unicodeMap.setErrorOnReset(true); 1464 unicodeMap.put(0xD, "CR"); 1465 unicodeMap.put(0xA, "LF"); 1466 UnicodeProperty cat = getProperty("General_Category"); 1467 UnicodeSet temp = cat.getSet("Line_Separator") 1468 .addAll(cat.getSet("Paragraph_Separator")) 1469 .addAll(cat.getSet("Control")) 1470 .addAll(cat.getSet("Format")) 1471 .remove(0xD).remove(0xA).remove(0x200C).remove(0x200D); 1472 unicodeMap.putAll(temp, "Control"); 1473 UnicodeSet graphemeExtend = getProperty("Grapheme_Extend").getSet("true"); 1474 unicodeMap.putAll(graphemeExtend,"Extend"); 1475 UnicodeProperty hangul = getProperty("Hangul_Syllable_Type"); 1476 unicodeMap.putAll(hangul.getSet("L"),"L"); 1477 unicodeMap.putAll(hangul.getSet("V"),"V"); 1478 unicodeMap.putAll(hangul.getSet("T"),"T"); 1479 unicodeMap.putAll(hangul.getSet("LV"),"LV"); 1480 unicodeMap.putAll(hangul.getSet("LVT"),"LVT"); 1481 unicodeMap.setMissing("Other"); 1482 } 1483 }.setMain("Grapheme_Cluster_Break", "GCB", UnicodeProperty.ENUMERATED, version) 1484 */ 1485 protected UnicodeMap unicodeMap; 1486 1487 @Override _getUnicodeMap()1488 protected UnicodeMap _getUnicodeMap() { 1489 return unicodeMap; 1490 } 1491 set(UnicodeMap map)1492 public UnicodeMapProperty set(UnicodeMap map) { 1493 unicodeMap = map.freeze(); 1494 return this; 1495 } 1496 1497 @Override _getValue(int codepoint)1498 protected String _getValue(int codepoint) { 1499 return (String) unicodeMap.getValue(codepoint); 1500 } 1501 1502 /* protected List _getValueAliases(String valueAlias, List result) { 1503 if (!unicodeMap.getAvailableValues().contains(valueAlias)) return result; 1504 result.add(valueAlias); 1505 return result; // no other aliases 1506 } 1507 */@Override _getAvailableValues(List result)1508 protected List _getAvailableValues(List result) { 1509 unicodeMap.getAvailableValues(result); 1510 if (toValueAliases != null) { 1511 for (Object s : toValueAliases.keySet()) { 1512 if (!result.contains(s)) { 1513 result.add(s); 1514 } 1515 } 1516 } 1517 return result; 1518 } 1519 } 1520 isValidValue(String propertyValue)1521 public boolean isValidValue(String propertyValue) { 1522 if (isType(STRING_OR_MISC_MASK)) { 1523 return true; 1524 } 1525 Collection<String> values = getAvailableValues(); 1526 for (String valueAlias : values) { 1527 if (UnicodeProperty.compareNames(valueAlias, propertyValue) == 0) { 1528 return true; 1529 } 1530 for (String valueAlias2 : (Collection<String>) getValueAliases(valueAlias)) { 1531 if (UnicodeProperty.compareNames(valueAlias2, propertyValue) == 0) { 1532 return true; 1533 } 1534 } 1535 } 1536 return false; 1537 } 1538 getValueAliases()1539 public List<String> getValueAliases() { 1540 List<String> result = new ArrayList(); 1541 if (isType(STRING_OR_MISC_MASK)) { 1542 return result; 1543 } 1544 Collection<String> values = getAvailableValues(); 1545 for (String valueAlias : values) { 1546 UnicodeProperty.addAllUnique(getValueAliases(valueAlias), result); 1547 } 1548 result.removeAll(values); 1549 return result; 1550 } 1551 1552 addUntested(UnicodeSet result, boolean uniformUnassigned)1553 public static UnicodeSet addUntested(UnicodeSet result, boolean uniformUnassigned) { 1554 if (uniformUnassigned && result.contains(UnicodeProperty.getSAMPLE_UNASSIGNED())) { 1555 result.addAll(UnicodeProperty.getUNASSIGNED()); 1556 } 1557 1558 if (result.contains(UnicodeProperty.SAMPLE_HIGH_SURROGATE)) { 1559 result.addAll(UnicodeProperty.HIGH_SURROGATES); 1560 } 1561 if (result.contains(UnicodeProperty.SAMPLE_HIGH_PRIVATE_USE_SURROGATE)) { 1562 result.addAll(UnicodeProperty.HIGH_PRIVATE_USE_SURROGATES); 1563 } 1564 if (result.contains(UnicodeProperty.SAMPLE_LOW_SURROGATE)) { 1565 result.addAll(UnicodeProperty.LOW_SURROGATES); 1566 } 1567 1568 if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA)) { 1569 result.addAll(UnicodeProperty.PRIVATE_USE_AREA); 1570 } 1571 if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_A)) { 1572 result.addAll(UnicodeProperty.PRIVATE_USE_AREA_A); 1573 } 1574 if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_B)) { 1575 result.addAll(UnicodeProperty.PRIVATE_USE_AREA_B); 1576 } 1577 1578 return result; 1579 } 1580 addUntested(UnicodeMap result, boolean uniformUnassigned)1581 public static UnicodeMap addUntested(UnicodeMap result, boolean uniformUnassigned) { 1582 Object temp; 1583 if (uniformUnassigned && null != (temp = result.get(UnicodeProperty.getSAMPLE_UNASSIGNED()))) { 1584 result.putAll(UnicodeProperty.getUNASSIGNED(), temp); 1585 } 1586 1587 if (null != (temp = result.get(UnicodeProperty.SAMPLE_HIGH_SURROGATE))) { 1588 result.putAll(UnicodeProperty.HIGH_SURROGATES, temp); 1589 } 1590 if (null != (temp = result.get(UnicodeProperty.SAMPLE_HIGH_PRIVATE_USE_SURROGATE))) { 1591 result.putAll(UnicodeProperty.HIGH_PRIVATE_USE_SURROGATES, temp); 1592 } 1593 if (null != (temp = result.get(UnicodeProperty.SAMPLE_LOW_SURROGATE))) { 1594 result.putAll(UnicodeProperty.LOW_SURROGATES, temp); 1595 } 1596 1597 if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA))) { 1598 result.putAll(UnicodeProperty.PRIVATE_USE_AREA, temp); 1599 } 1600 if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_A))) { 1601 result.putAll(UnicodeProperty.PRIVATE_USE_AREA_A, temp); 1602 } 1603 if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_B))) { 1604 result.putAll(UnicodeProperty.PRIVATE_USE_AREA_B, temp); 1605 } 1606 return result; 1607 } 1608 isDefault(int cp)1609 public boolean isDefault(int cp) { 1610 String value = getValue(cp); 1611 if (isType(STRING_OR_MISC_MASK)) { 1612 return equals(cp, value); 1613 } 1614 String defaultValue = getValue(getSAMPLE_UNASSIGNED()); 1615 return defaultValue == null ? value == null : defaultValue.equals(value); 1616 } 1617 hasUniformUnassigned()1618 public boolean hasUniformUnassigned() { 1619 return hasUniformUnassigned; 1620 } setUniformUnassigned(boolean hasUniformUnassigned)1621 protected UnicodeProperty setUniformUnassigned(boolean hasUniformUnassigned) { 1622 this.hasUniformUnassigned = hasUniformUnassigned; 1623 return this; 1624 } 1625 1626 public static class UnicodeSetProperty extends BaseProperty { 1627 protected UnicodeSet unicodeSet; 1628 private static final String[] YESNO_ARRAY = new String[]{"Yes", "No"}; 1629 private static final List YESNO = Arrays.asList(YESNO_ARRAY); 1630 set(UnicodeSet set)1631 public UnicodeSetProperty set(UnicodeSet set) { 1632 unicodeSet = set.freeze(); 1633 return this; 1634 } 1635 set(String string)1636 public UnicodeSetProperty set(String string) { 1637 // TODO Auto-generated method stub 1638 return set(new UnicodeSet(string).freeze()); 1639 } 1640 1641 @Override _getValue(int codepoint)1642 protected String _getValue(int codepoint) { 1643 return YESNO_ARRAY[unicodeSet.contains(codepoint) ? 0 : 1]; 1644 } 1645 1646 @Override _getAvailableValues(List result)1647 protected List _getAvailableValues(List result) { 1648 return YESNO; 1649 } 1650 } 1651 1652 // private static class StringTransformProperty extends SimpleProperty { 1653 // Transform<String,String> transform; 1654 // 1655 // public StringTransformProperty(Transform<String,String> transform, boolean hasUniformUnassigned) { 1656 // this.transform = transform; 1657 // setUniformUnassigned(hasUniformUnassigned); 1658 // } 1659 // protected String _getValue(int codepoint) { 1660 // return transform.transform(UTF16.valueOf(codepoint)); 1661 // } 1662 // } 1663 // 1664 // private static class CodepointTransformProperty extends SimpleProperty { 1665 // Transform<Integer,String> transform; 1666 // 1667 // public CodepointTransformProperty(Transform<Integer,String> transform, boolean hasUniformUnassigned) { 1668 // this.transform = transform; 1669 // setUniformUnassigned(hasUniformUnassigned); 1670 // } 1671 // protected String _getValue(int codepoint) { 1672 // return transform.transform(codepoint); 1673 // } 1674 // } 1675 } 1676 1677