1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1996-2016, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.dev.util; 10 11 import java.util.Collection; 12 import java.util.Collections; 13 import java.util.Comparator; 14 import java.util.HashSet; 15 import java.util.Iterator; 16 import java.util.LinkedHashSet; 17 import java.util.Map; 18 import java.util.Map.Entry; 19 import java.util.Set; 20 import java.util.TreeMap; 21 import java.util.TreeSet; 22 23 import com.ibm.icu.impl.Utility; 24 import com.ibm.icu.text.StringTransform; 25 import com.ibm.icu.text.UTF16; 26 import com.ibm.icu.text.UnicodeSet; 27 import com.ibm.icu.text.UnicodeSetIterator; 28 import com.ibm.icu.util.Freezable; 29 30 /** 31 * Class for mapping Unicode characters and strings to values, optimized for single code points, 32 * where ranges of code points have the same value. 33 * Much smaller storage than using HashMap, and much faster and more compact than 34 * a list of UnicodeSets. The API design mimics Map<String,T> but can't extend it due to some 35 * necessary changes (much as UnicodeSet mimics Set<String>). Note that nulls are not permitted as values; 36 * that is, a put(x,null) is the same as remove(x).<br> 37 * At this point "" is also not allowed as a key, although that may change. 38 * @author markdavis 39 * 40 * @internal CLDR 41 */ 42 43 public final class UnicodeMap<T> implements Cloneable, Freezable<UnicodeMap<T>>, StringTransform, Iterable<String> { 44 /** 45 * For serialization 46 */ 47 //private static final long serialVersionUID = -6540936876295804105L; 48 static final boolean ASSERTIONS = false; 49 static final long GROWTH_PERCENT = 200; // 100 is no growth! 50 static final long GROWTH_GAP = 10; // extra bump! 51 52 private int length; 53 // two parallel arrays to save memory. Wish Java had structs. 54 private int[] transitions; 55 /* package private */ T[] values; 56 57 private LinkedHashSet<T> availableValues = new LinkedHashSet<T>(); 58 private transient boolean staleAvailableValues; 59 60 private transient boolean errorOnReset; 61 private volatile transient boolean locked; 62 private int lastIndex; 63 private TreeMap<String,T> stringMap; 64 clear()65 { clear(); } 66 UnicodeMap()67 public UnicodeMap() { 68 } 69 UnicodeMap(UnicodeMap other)70 public UnicodeMap(UnicodeMap other) { 71 this.putAll(other); 72 } 73 clear()74 public UnicodeMap<T> clear() { 75 if (locked) { 76 throw new UnsupportedOperationException("Attempt to modify locked object"); 77 } 78 length = 2; 79 transitions = new int[] {0,0x110000,0,0,0,0,0,0,0,0}; 80 values = (T[]) new Object[10]; 81 82 availableValues.clear(); 83 staleAvailableValues = false; 84 85 errorOnReset = false; 86 lastIndex = 0; 87 stringMap = null; 88 return this; 89 } 90 91 /* Boilerplate */ equals(Object other)92 public boolean equals(Object other) { 93 if (other == null) return false; 94 try { 95 UnicodeMap that = (UnicodeMap) other; 96 if (length != that.length) return false; 97 for (int i = 0; i < length-1; ++i) { 98 if (transitions[i] != that.transitions[i]) return false; 99 if (!areEqual(values[i], that.values[i])) return false; 100 } 101 return true; 102 } catch (ClassCastException e) { 103 return false; 104 } 105 } 106 areEqual(Object a , Object b)107 public static boolean areEqual(Object a , Object b) { 108 if (a == b) return true; 109 if (a == null || b == null) return false; 110 return a.equals(b); 111 } 112 hashCode()113 public int hashCode() { 114 int result = length; 115 // TODO might want to abbreviate this for speed. 116 for (int i = 0; i < length-1; ++i) { 117 result = 37*result + transitions[i]; 118 result = 37*result; 119 if (values[i] != null) { 120 result += values[i].hashCode(); 121 } 122 } 123 if (stringMap != null) { 124 result = 37*result + stringMap.hashCode(); 125 } 126 return result; 127 } 128 129 /** 130 * Standard clone. Warning, as with Collections, does not do deep clone. 131 */ cloneAsThawed()132 public UnicodeMap<T> cloneAsThawed() { 133 UnicodeMap<T> that = new UnicodeMap<T>(); 134 that.length = length; 135 that.transitions = (int[]) transitions.clone(); 136 that.values = (T[]) values.clone(); 137 that.availableValues = new LinkedHashSet<T>(availableValues); 138 that.locked = false; 139 that.stringMap = stringMap == null ? null : (TreeMap<String, T>) stringMap.clone(); 140 return that; 141 } 142 143 /* for internal consistency checking */ 144 _checkInvariants()145 void _checkInvariants() { 146 if (length < 2 147 || length > transitions.length 148 || transitions.length != values.length) { 149 throw new IllegalArgumentException("Invariant failed: Lengths bad"); 150 } 151 for (int i = 1; i < length-1; ++i) { 152 if (areEqual(values[i-1], values[i])) { 153 throw new IllegalArgumentException("Invariant failed: values shared at " 154 + "\t" + Utility.hex(i-1) + ": <" + values[i-1] + ">" 155 + "\t" + Utility.hex(i) + ": <" + values[i] + ">" 156 ); 157 } 158 } 159 if (transitions[0] != 0 || transitions[length-1] != 0x110000) { 160 throw new IllegalArgumentException("Invariant failed: bounds set wrong"); 161 } 162 for (int i = 1; i < length-1; ++i) { 163 if (transitions[i-1] >= transitions[i]) { 164 throw new IllegalArgumentException("Invariant failed: not monotonic" 165 + "\t" + Utility.hex(i-1) + ": " + transitions[i-1] 166 + "\t" + Utility.hex(i) + ": " + transitions[i] 167 ); 168 } 169 } 170 } 171 172 /** 173 * Finds an index such that inversionList[i] <= codepoint < inversionList[i+1] 174 * Assumes that 0 <= codepoint <= 0x10FFFF 175 * @param codepoint 176 * @return the index 177 */ _findIndex(int c)178 private int _findIndex(int c) { 179 int lo = 0; 180 int hi = length - 1; 181 int i = (lo + hi) >>> 1; 182 // invariant: c >= list[lo] 183 // invariant: c < list[hi] 184 while (i != lo) { 185 if (c < transitions[i]) { 186 hi = i; 187 } else { 188 lo = i; 189 } 190 i = (lo + hi) >>> 1; 191 } 192 if (ASSERTIONS) _checkFind(c, lo); 193 return lo; 194 } 195 _checkFind(int codepoint, int value)196 private void _checkFind(int codepoint, int value) { 197 int other = __findIndex(codepoint); 198 if (other != value) { 199 throw new IllegalArgumentException("Invariant failed: binary search" 200 + "\t" + Utility.hex(codepoint) + ": " + value 201 + "\tshould be: " + other); 202 } 203 } 204 __findIndex(int codepoint)205 private int __findIndex(int codepoint) { 206 for (int i = length-1; i > 0; --i) { 207 if (transitions[i] <= codepoint) return i; 208 } 209 return 0; 210 } 211 212 /* 213 * Try indexed lookup 214 215 static final int SHIFT = 8; 216 int[] starts = new int[0x10FFFF>>SHIFT]; // lowest transition index where codepoint>>x can be found 217 boolean startsValid = false; 218 private int findIndex(int codepoint) { 219 if (!startsValid) { 220 int start = 0; 221 for (int i = 1; i < length; ++i) { 222 223 } 224 } 225 for (int i = length-1; i > 0; --i) { 226 if (transitions[i] <= codepoint) return i; 227 } 228 return 0; 229 } 230 */ 231 232 /** 233 * Remove the items from index through index+count-1. 234 * Logically reduces the size of the internal arrays. 235 * @param index 236 * @param count 237 */ _removeAt(int index, int count)238 private void _removeAt(int index, int count) { 239 for (int i = index + count; i < length; ++i) { 240 transitions[i-count] = transitions[i]; 241 values[i-count] = values[i]; 242 } 243 length -= count; 244 } 245 /** 246 * Add a gap from index to index+count-1. 247 * The values there are undefined, and must be set. 248 * Logically grows arrays to accomodate. Actual growth is limited 249 * @param index 250 * @param count 251 */ _insertGapAt(int index, int count)252 private void _insertGapAt(int index, int count) { 253 int newLength = length + count; 254 int[] oldtransitions = transitions; 255 T[] oldvalues = values; 256 if (newLength > transitions.length) { 257 int allocation = (int) (GROWTH_GAP + (newLength * GROWTH_PERCENT) / 100); 258 transitions = new int[allocation]; 259 values = (T[]) new Object[allocation]; 260 for (int i = 0; i < index; ++i) { 261 transitions[i] = oldtransitions[i]; 262 values[i] = oldvalues[i]; 263 } 264 } 265 for (int i = length - 1; i >= index; --i) { 266 transitions[i+count] = oldtransitions[i]; 267 values[i+count] = oldvalues[i]; 268 } 269 length = newLength; 270 } 271 272 /** 273 * Associates code point with value. Removes any previous association. 274 * All code that calls this MUST check for frozen first! 275 * @param codepoint 276 * @param value 277 * @return this, for chaining 278 */ _put(int codepoint, T value)279 private UnicodeMap _put(int codepoint, T value) { 280 // Warning: baseIndex is an invariant; must 281 // be defined such that transitions[baseIndex] < codepoint 282 // at end of this routine. 283 int baseIndex; 284 if (transitions[lastIndex] <= codepoint 285 && codepoint < transitions[lastIndex+1]) { 286 baseIndex = lastIndex; 287 } else { 288 baseIndex = _findIndex(codepoint); 289 } 290 int limitIndex = baseIndex + 1; 291 // cases are (a) value is already set 292 if (areEqual(values[baseIndex], value)) return this; 293 if (locked) { 294 throw new UnsupportedOperationException("Attempt to modify locked object"); 295 } 296 if (errorOnReset && values[baseIndex] != null) { 297 throw new UnsupportedOperationException("Attempt to reset value for " + Utility.hex(codepoint) 298 + " when that is disallowed. Old: " + values[baseIndex] + "; New: " + value); 299 } 300 301 // adjust the available values 302 staleAvailableValues = true; 303 availableValues.add(value); // add if not there already 304 305 int baseCP = transitions[baseIndex]; 306 int limitCP = transitions[limitIndex]; 307 // we now start walking through the difference case, 308 // based on whether we are at the start or end of range 309 // and whether the range is a single character or multiple 310 311 if (baseCP == codepoint) { 312 // CASE: At very start of range 313 boolean connectsWithPrevious = 314 baseIndex != 0 && areEqual(value, values[baseIndex-1]); 315 316 if (limitCP == codepoint + 1) { 317 // CASE: Single codepoint range 318 boolean connectsWithFollowing = 319 baseIndex < length - 2 && areEqual(value, values[limitIndex]); // was -1 320 321 if (connectsWithPrevious) { 322 // A1a connects with previous & following, so remove index 323 if (connectsWithFollowing) { 324 _removeAt(baseIndex, 2); 325 } else { 326 _removeAt(baseIndex, 1); // extend previous 327 } 328 --baseIndex; // fix up 329 } else if (connectsWithFollowing) { 330 _removeAt(baseIndex, 1); // extend following backwards 331 transitions[baseIndex] = codepoint; 332 } else { 333 // doesn't connect on either side, just reset 334 values[baseIndex] = value; 335 } 336 } else if (connectsWithPrevious) { 337 // A.1: start of multi codepoint range 338 // if connects 339 ++transitions[baseIndex]; // extend previous 340 } else { 341 // otherwise insert new transition 342 transitions[baseIndex] = codepoint+1; // fix following range 343 _insertGapAt(baseIndex, 1); 344 values[baseIndex] = value; 345 transitions[baseIndex] = codepoint; 346 } 347 } else if (limitCP == codepoint + 1) { 348 // CASE: at end of range 349 // if connects, just back up range 350 boolean connectsWithFollowing = 351 baseIndex < length - 2 && areEqual(value, values[limitIndex]); // was -1 352 353 if (connectsWithFollowing) { 354 --transitions[limitIndex]; 355 return this; 356 } else { 357 _insertGapAt(limitIndex, 1); 358 transitions[limitIndex] = codepoint; 359 values[limitIndex] = value; 360 } 361 } else { 362 // CASE: in middle of range 363 // insert gap, then set the new range 364 _insertGapAt(++baseIndex,2); 365 transitions[baseIndex] = codepoint; 366 values[baseIndex] = value; 367 transitions[baseIndex+1] = codepoint + 1; 368 values[baseIndex+1] = values[baseIndex-1]; // copy lower range values 369 } 370 lastIndex = baseIndex; // store for next time 371 return this; 372 } 373 374 private UnicodeMap _putAll(int startCodePoint, int endCodePoint, T value) { 375 // TODO optimize 376 for (int i = startCodePoint; i <= endCodePoint; ++i) { 377 _put(i, value); 378 if (ASSERTIONS) _checkInvariants(); 379 } 380 return this; 381 } 382 383 /** 384 * Sets the codepoint value. 385 * @param codepoint 386 * @param value 387 * @return this (for chaining) 388 */ 389 public UnicodeMap<T> put(int codepoint, T value) { 390 if (codepoint < 0 || codepoint > 0x10FFFF) { 391 throw new IllegalArgumentException("Codepoint out of range: " + codepoint); 392 } 393 _put(codepoint, value); 394 if (ASSERTIONS) _checkInvariants(); 395 return this; 396 } 397 398 /** 399 * Sets the codepoint value. 400 * @param codepoint 401 * @param value 402 * @return this (for chaining) 403 */ 404 public UnicodeMap<T> put(String string, T value) { 405 int v = UnicodeSet.getSingleCodePoint(string); 406 if (v == Integer.MAX_VALUE) { 407 if (locked) { 408 throw new UnsupportedOperationException("Attempt to modify locked object"); 409 } 410 if (value != null) { 411 if (stringMap == null) { 412 stringMap = new TreeMap<String,T>(); 413 } 414 stringMap.put(string, value); 415 staleAvailableValues = true; 416 } else if (stringMap != null) { 417 if (stringMap.remove(string) != null) { 418 staleAvailableValues = true; 419 } 420 } 421 return this; 422 } 423 return put(v, value); 424 } 425 426 /** 427 * Adds bunch o' codepoints; otherwise like put. 428 * @param codepoints 429 * @param value 430 * @return this (for chaining) 431 */ 432 public UnicodeMap<T> putAll(UnicodeSet codepoints, T value) { 433 UnicodeSetIterator it = new UnicodeSetIterator(codepoints); 434 while (it.nextRange()) { 435 if (it.string == null) { 436 _putAll(it.codepoint, it.codepointEnd, value); 437 } else { 438 put(it.string, value); 439 } 440 } 441 return this; 442 } 443 444 /** 445 * Adds bunch o' codepoints; otherwise like add. 446 * @param startCodePoint 447 * @param endCodePoint 448 * @param value 449 * @return this (for chaining) 450 */ 451 public UnicodeMap<T> putAll(int startCodePoint, int endCodePoint, T value) { 452 if (locked) { 453 throw new UnsupportedOperationException("Attempt to modify locked object"); 454 } 455 if (startCodePoint < 0 || endCodePoint > 0x10FFFF) { 456 throw new IllegalArgumentException("Codepoint out of range: " 457 + Utility.hex(startCodePoint) + ".." + Utility.hex(endCodePoint)); 458 } 459 return _putAll(startCodePoint, endCodePoint, value); 460 } 461 462 /** 463 * Add all the (main) values from a UnicodeMap 464 * @param unicodeMap the property to add to the map 465 * @return this (for chaining) 466 */ 467 public UnicodeMap<T> putAll(UnicodeMap<T> unicodeMap) { 468 for (int i = 0; i < unicodeMap.length; ++i) { 469 T value = unicodeMap.values[i]; 470 if (value != null) { 471 _putAll(unicodeMap.transitions[i], unicodeMap.transitions[i+1]-1, value); 472 } 473 if (ASSERTIONS) _checkInvariants(); 474 } 475 if (unicodeMap.stringMap != null && !unicodeMap.stringMap.isEmpty()) { 476 if (stringMap == null) { 477 stringMap = new TreeMap<String,T>(); 478 } 479 stringMap.putAll(unicodeMap.stringMap); 480 } 481 return this; 482 } 483 484 /** 485 * Add all the (main) values from a Unicode property 486 * @param prop the property to add to the map 487 * @return this (for chaining) 488 */ 489 public UnicodeMap<T> putAllFiltered(UnicodeMap<T> prop, UnicodeSet filter) { 490 // TODO optimize 491 for (UnicodeSetIterator it = new UnicodeSetIterator(filter); it.next();) { 492 if (it.codepoint != UnicodeSetIterator.IS_STRING) { 493 T value = prop.getValue(it.codepoint); 494 if (value != null) { 495 _put(it.codepoint, value); 496 } 497 } 498 } 499 // now do the strings 500 for (String key : filter.strings()) { 501 T value = prop.get(key); 502 if (value != null) { 503 put(key, value); 504 } 505 } 506 return this; 507 } 508 509 /** 510 * Set the currently unmapped Unicode code points to the given value. 511 * @param value the value to set 512 * @return this (for chaining) 513 */ 514 public UnicodeMap<T> setMissing(T value) { 515 // fast path, if value not yet present 516 if (!getAvailableValues().contains(value)) { 517 staleAvailableValues = true; 518 availableValues.add(value); 519 for (int i = 0; i < length; ++i) { 520 if (values[i] == null) values[i] = value; 521 } 522 return this; 523 } else { 524 return putAll(keySet(null), value); 525 } 526 } 527 /** 528 * Returns the keyset consisting of all the keys that would produce the given value. Deposits into 529 * result if it is not null. Remember to clear if you just want 530 * the new values. 531 */ 532 public UnicodeSet keySet(T value, UnicodeSet result) { 533 if (result == null) result = new UnicodeSet(); 534 for (int i = 0; i < length - 1; ++i) { 535 if (areEqual(value, values[i])) { 536 result.add(transitions[i], transitions[i+1]-1); 537 } 538 } 539 if (value != null && stringMap != null) { 540 for (String key : stringMap.keySet()) { 541 T newValue = stringMap.get(key); 542 if (value.equals(newValue)) { 543 result.add((String)key); 544 } 545 } 546 } 547 return result; 548 } 549 550 /** 551 * Returns the keyset consisting of all the keys that would produce the given value. 552 * the new values. 553 */ 554 public UnicodeSet keySet(T value) { 555 return keySet(value,null); 556 } 557 558 /** 559 * Returns the keyset consisting of all the keys that would produce (non-null) values. 560 */ 561 public UnicodeSet keySet() { 562 UnicodeSet result = new UnicodeSet(); 563 for (int i = 0; i < length - 1; ++i) { 564 if (values[i] != null) { 565 result.add(transitions[i], transitions[i+1]-1); 566 } 567 } 568 if (stringMap != null) { 569 result.addAll(stringMap.keySet()); 570 } 571 return result; 572 } 573 574 /** 575 * Returns the list of possible values. Deposits each non-null value into 576 * result. Creates result if it is null. Remember to clear result if 577 * you are not appending to existing collection. 578 * @param result 579 * @return result 580 */ 581 public <U extends Collection<T>> U values(U result) { 582 if (staleAvailableValues) { 583 // collect all the current values 584 // retain them in the availableValues 585 Set<T> temp = new HashSet<T>(); 586 for (int i = 0; i < length - 1; ++i) { 587 if (values[i] != null) temp.add(values[i]); 588 } 589 availableValues.retainAll(temp); 590 if (stringMap != null) { 591 availableValues.addAll(stringMap.values()); 592 } 593 staleAvailableValues = false; 594 } 595 if (result == null) { 596 result = (U) new LinkedHashSet<T>(availableValues.size()); 597 } 598 result.addAll(availableValues); 599 return result; 600 } 601 602 /** 603 * Convenience method 604 */ 605 public Set<T> values() { 606 return getAvailableValues(null); 607 } 608 /** 609 * Gets the value associated with a given code point. 610 * Returns null, if there is no such value. 611 * @param codepoint 612 * @return the value 613 */ 614 public T get(int codepoint) { 615 if (codepoint < 0 || codepoint > 0x10FFFF) { 616 throw new IllegalArgumentException("Codepoint out of range: " + codepoint); 617 } 618 return values[_findIndex(codepoint)]; 619 } 620 621 /** 622 * Gets the value associated with a given code point. 623 * Returns null, if there is no such value. 624 * @param codepoint 625 * @return the value 626 */ 627 public T get(String value) { 628 if (UTF16.hasMoreCodePointsThan(value, 1)) { 629 if (stringMap == null) { 630 return null; 631 } 632 return stringMap.get(value); 633 } 634 return getValue(UTF16.charAt(value, 0)); 635 } 636 637 638 /** 639 * Change a new string from the source string according to the mappings. 640 * For each code point cp, if getValue(cp) is null, append the character, otherwise append getValue(cp).toString() 641 * TODO: extend to strings 642 * @param source 643 * @return 644 */ 645 public String transform(String source) { 646 StringBuffer result = new StringBuffer(); 647 int cp; 648 for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) { 649 cp = UTF16.charAt(source, i); 650 T mResult = getValue(cp); 651 if (mResult != null) { 652 result.append(mResult); 653 } else { 654 UTF16.append(result, cp); 655 } 656 } 657 return result.toString(); 658 } 659 660 /** 661 * Used to add complex values, where the value isn't replaced but in some sense composed 662 * @author markdavis 663 */ 664 public abstract static class Composer<T> { 665 /** 666 * This will be called with either a string or a code point. The result is the new value for that item. 667 * If the codepoint is used, the string is null; if the string is used, the codepoint is -1. 668 * @param a 669 * @param b 670 */ 671 public abstract T compose(int codePoint, String string, T a, T b); 672 } 673 674 public UnicodeMap<T> composeWith(UnicodeMap<T> other, Composer<T> composer) { 675 for (T value : other.getAvailableValues()) { 676 UnicodeSet set = other.keySet(value); 677 composeWith(set, value, composer); 678 } 679 return this; 680 } 681 682 public UnicodeMap<T> composeWith(UnicodeSet set, T value, Composer<T> composer) { 683 for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) { 684 int i = it.codepoint; 685 if (i == UnicodeSetIterator.IS_STRING) { 686 String s = it.string; 687 T v1 = getValue(s); 688 T v3 = composer.compose(-1, s, v1, value); 689 if (v1 != v3 && (v1 == null || !v1.equals(v3))) { 690 put(s, v3); 691 } 692 } else { 693 T v1 = getValue(i); 694 T v3 = composer.compose(i, null, v1, value); 695 if (v1 != v3 && (v1 == null || !v1.equals(v3))) { 696 put(i, v3); 697 } 698 } 699 } 700 return this; 701 } 702 703 public String toString() { 704 return toString(null); 705 } 706 707 public String toString(Comparator<T> collected) { 708 StringBuffer result = new StringBuffer(); 709 if (collected == null) { 710 for (int i = 0; i < length-1; ++i) { 711 T value = values[i]; 712 if (value == null) continue; 713 int start = transitions[i]; 714 int end = transitions[i+1]-1; 715 result.append(Utility.hex(start)); 716 if (start != end) result.append("-").append(Utility.hex(end)); 717 result.append("=").append(value.toString()).append("\n"); 718 } 719 if (stringMap != null) { 720 for (String s : stringMap.keySet()) { 721 result.append(Utility.hex(s)).append("=").append(stringMap.get(s).toString()).append("\n"); 722 } 723 } 724 } else { 725 Set<T> set = values(new TreeSet<T>(collected)); 726 for (Iterator<T> it = set.iterator(); it.hasNext();) { 727 T value = it.next(); 728 UnicodeSet s = keySet(value); 729 result.append(value).append("=").append(s.toString()).append("\n"); 730 } 731 } 732 return result.toString(); 733 } 734 /** 735 * @return Returns the errorOnReset value. 736 */ 737 public boolean getErrorOnReset() { 738 return errorOnReset; 739 } 740 /** 741 * Puts the UnicodeMap into a state whereby new mappings are accepted, but changes to old mappings cause an exception. 742 * @param errorOnReset The errorOnReset to set. 743 */ 744 public UnicodeMap<T> setErrorOnReset(boolean errorOnReset) { 745 this.errorOnReset = errorOnReset; 746 return this; 747 } 748 749 /* (non-Javadoc) 750 * @see com.ibm.icu.dev.test.util.Freezable#isFrozen() 751 */ 752 public boolean isFrozen() { 753 // TODO Auto-generated method stub 754 return locked; 755 } 756 757 /* (non-Javadoc) 758 * @see com.ibm.icu.dev.test.util.Freezable#lock() 759 */ 760 public UnicodeMap<T> freeze() { 761 locked = true; 762 return this; 763 } 764 765 /** 766 * Utility to find the maximal common prefix of two strings. 767 * TODO: fix supplemental support 768 */ 769 static public int findCommonPrefix(String last, String s) { 770 int minLen = Math.min(last.length(), s.length()); 771 for (int i = 0; i < minLen; ++i) { 772 if (last.charAt(i) != s.charAt(i)) return i; 773 } 774 return minLen; 775 } 776 777 /** 778 * Get the number of ranges; used for getRangeStart/End. The ranges together cover all of the single-codepoint keys in the UnicodeMap. Other keys can be gotten with getStrings(). 779 */ 780 public int getRangeCount() { 781 return length-1; 782 } 783 784 /** 785 * Get the start of a range. All code points between start and end are in the UnicodeMap's keyset. 786 */ 787 public int getRangeStart(int range) { 788 return transitions[range]; 789 } 790 791 /** 792 * Get the start of a range. All code points between start and end are in the UnicodeMap's keyset. 793 */ 794 public int getRangeEnd(int range) { 795 return transitions[range+1] - 1; 796 } 797 798 /** 799 * Get the value for the range. 800 */ 801 public T getRangeValue(int range) { 802 return values[range]; 803 } 804 805 /** 806 * Get the strings that are not in the ranges. Returns null if there are none. 807 * @return 808 */ 809 public Set<String> getNonRangeStrings() { 810 if (stringMap == null || stringMap.isEmpty()) { 811 return null; 812 } 813 return Collections.unmodifiableSet(stringMap.keySet()); 814 } 815 816 static final boolean DEBUG_WRITE = false; 817 818 /* (non-Javadoc) 819 * @see java.util.Map#containsKey(java.lang.Object) 820 */ 821 public boolean containsKey(String key) { 822 return getValue(key) != null; 823 } 824 825 /* (non-Javadoc) 826 * @see java.util.Map#containsKey(java.lang.Object) 827 */ 828 public boolean containsKey(int key) { 829 return getValue(key) != null; 830 } 831 832 /* (non-Javadoc) 833 * @see java.util.Map#containsValue(java.lang.Object) 834 */ 835 public boolean containsValue(T value) { 836 // TODO Optimize 837 return getAvailableValues().contains(value); 838 } 839 840 /* (non-Javadoc) 841 * @see java.util.Map#isEmpty() 842 */ 843 public boolean isEmpty() { 844 return size() == 0; 845 } 846 847 /* (non-Javadoc) 848 * @see java.util.Map#putAll(java.util.Map) 849 */ 850 public UnicodeMap<T> putAll(Map<? extends String, ? extends T> map) { 851 for (String key : map.keySet()) { 852 put(key,map.get(key)); 853 } 854 return this; 855 } 856 857 /** 858 * Utility for extracting map 859 * @deprecated 860 */ 861 public UnicodeMap<T> putAllIn(Map<? super String, ? super T> map) { 862 for (String key : keySet()) { 863 map.put(key, get(key)); 864 } 865 return this; 866 } 867 868 /** 869 * Utility for extracting map 870 */ 871 public <U extends Map<String, T>> U putAllInto(U map) { 872 for (EntryRange<T> entry : entryRanges()) { 873 if (entry.string != null) { 874 break; 875 } 876 for (int cp = entry.codepoint; cp <= entry.codepointEnd; ++cp) { 877 map.put(UTF16.valueOf(cp), entry.value); 878 } 879 } 880 map.putAll(stringMap); 881 return map; 882 } 883 884 /** 885 * Utility for extracting map 886 */ 887 public <U extends Map<Integer, T>> U putAllCodepointsInto(U map) { 888 for (EntryRange<T> entry : entryRanges()) { 889 if (entry.string != null) { 890 break; 891 } 892 for (int cp = entry.codepoint; cp <= entry.codepointEnd; ++cp) { 893 map.put(cp, entry.value); 894 } 895 } 896 return map; 897 } 898 899 /* (non-Javadoc) 900 * @see java.util.Map#remove(java.lang.Object) 901 */ 902 public UnicodeMap<T> remove(String key) { 903 return put(key, null); 904 } 905 906 /* (non-Javadoc) 907 * @see java.util.Map#remove(java.lang.Object) 908 */ 909 public UnicodeMap<T> remove(int key) { 910 return put(key, null); 911 } 912 913 /* (non-Javadoc) 914 * @see java.util.Map#size() 915 */ 916 public int size() { 917 int result = stringMap == null ? 0 : stringMap.size(); 918 for (int i = 0; i < length-1; ++i) { 919 T value = values[i]; 920 if (value == null) continue; 921 result += transitions[i+1] - transitions[i]; 922 } 923 return result; 924 } 925 926 /* (non-Javadoc) 927 * @see java.util.Map#entrySet() 928 */ 929 public Iterable<Entry<String,T>> entrySet() { 930 return new EntrySetX(); 931 } 932 933 private class EntrySetX implements Iterable<Entry<String, T>> { 934 public Iterator<Entry<String, T>> iterator() { 935 return new IteratorX(); 936 } 937 public String toString() { 938 StringBuffer b = new StringBuffer(); 939 for (Iterator it = iterator(); it.hasNext();) { 940 Object item = it.next(); 941 b.append(item.toString()).append(' '); 942 } 943 return b.toString(); 944 } 945 } 946 947 private class IteratorX implements Iterator<Entry<String, T>> { 948 Iterator<String> iterator = keySet().iterator(); 949 950 /* (non-Javadoc) 951 * @see java.util.Iterator#hasNext() 952 */ 953 public boolean hasNext() { 954 return iterator.hasNext(); 955 } 956 957 /* (non-Javadoc) 958 * @see java.util.Iterator#next() 959 */ 960 public Entry<String, T> next() { 961 String key = iterator.next(); 962 return new ImmutableEntry(key, get(key)); 963 } 964 965 /* (non-Javadoc) 966 * @see java.util.Iterator#remove() 967 */ 968 public void remove() { 969 throw new UnsupportedOperationException(); 970 } 971 972 } 973 974 /** 975 * Struct-like class used to iterate over a UnicodeMap in a for loop. 976 * If the value is a string, then codepoint == codepointEnd == -1. Otherwise the string is null; 977 * Caution: The contents may change during the iteration! 978 */ 979 public static class EntryRange<T> { 980 public int codepoint; 981 public int codepointEnd; 982 public String string; 983 public T value; 984 @Override 985 public String toString() { 986 return (string != null ? Utility.hex(string) 987 : Utility.hex(codepoint) + (codepoint == codepointEnd ? "" : ".." + Utility.hex(codepointEnd))) 988 + "=" + value; 989 } 990 } 991 992 /** 993 * Returns an Iterable over EntryRange, designed for efficient for loops over UnicodeMaps. 994 * Caution: For efficiency, the EntryRange may be reused, so the EntryRange may change on each iteration! 995 * The value is guaranteed never to be null. The entryRange.string values (non-null) are after all the ranges. 996 * @return entry range, for for loops 997 */ 998 public Iterable<EntryRange<T>> entryRanges() { 999 return new EntryRanges(); 1000 } 1001 1002 private class EntryRanges implements Iterable<EntryRange<T>>, Iterator<EntryRange<T>> { 1003 private int pos; 1004 private EntryRange<T> result = new EntryRange<T>(); 1005 private int lastRealRange = values[length-2] == null ? length - 2 : length - 1; 1006 private Iterator<Entry<String, T>> stringIterator = stringMap == null ? null : stringMap.entrySet().iterator(); 1007 1008 public Iterator<EntryRange<T>> iterator() { 1009 return this; 1010 } 1011 public boolean hasNext() { 1012 return pos < lastRealRange || (stringIterator != null && stringIterator.hasNext()); 1013 } 1014 public EntryRange<T> next() { 1015 // a range may be null, but then the next one must not be (except the final range) 1016 if (pos < lastRealRange) { 1017 T temp = values[pos]; 1018 if (temp == null) { 1019 temp = values[++pos]; 1020 } 1021 result.codepoint = transitions[pos]; 1022 result.codepointEnd = transitions[pos+1]-1; 1023 result.string = null; 1024 result.value = temp; 1025 ++pos; 1026 } else { 1027 Entry<String, T> entry = stringIterator.next(); 1028 result.codepoint = result.codepointEnd = -1; 1029 result.string = entry.getKey(); 1030 result.value = entry.getValue(); 1031 } 1032 return result; 1033 } 1034 public void remove() { 1035 throw new UnsupportedOperationException(); 1036 } 1037 } 1038 1039 /* (non-Javadoc) 1040 * @see java.lang.Iterable#iterator() 1041 */ 1042 public Iterator<String> iterator() { 1043 return keySet().iterator(); 1044 } 1045 1046 /** 1047 * Old form for compatibility 1048 */ 1049 public T getValue(String key) { 1050 return get(key); 1051 } 1052 1053 /** 1054 * Old form for compatibility 1055 */ 1056 public T getValue(int key) { 1057 // TODO Auto-generated method stub 1058 return get(key); 1059 } 1060 1061 /** 1062 * Old form for compatibility 1063 */ 1064 public Collection<T> getAvailableValues() { 1065 return values(); 1066 } 1067 1068 /** 1069 * Old form for compatibility 1070 */ 1071 public <U extends Collection<T>> U getAvailableValues(U result) { 1072 return values(result); 1073 } 1074 1075 /** 1076 * Old form for compatibility 1077 */ 1078 public UnicodeSet getSet(T value) { 1079 return keySet(value); 1080 } 1081 1082 /** 1083 * Old form for compatibility 1084 */ 1085 public UnicodeSet getSet(T value, UnicodeSet result) { 1086 return keySet(value, result); 1087 } 1088 1089 // This is to support compressed serialization. It works; just commented out for now as we shift to Generics 1090 // TODO Fix once generics are cleaned up. 1091 // // TODO Fix to serialize more than just strings. 1092 // // Only if all the items are strings will we do the following compression 1093 // // Otherwise we'll just use Java Serialization, bulky as it is 1094 // public void writeExternal(ObjectOutput out1) throws IOException { 1095 // DataOutputCompressor sc = new DataOutputCompressor(out1); 1096 // // if all objects are strings 1097 // Collection<T> availableVals = getAvailableValues(); 1098 // boolean allStrings = allAreString(availableVals); 1099 // sc.writeBoolean(allStrings); 1100 // Map object_index = new LinkedHashMap(); 1101 // if (allAreString(availableVals)) { 1102 // sc.writeStringSet(new TreeSet(availableVals), object_index); 1103 // } else { 1104 // sc.writeCollection(availableVals, object_index); 1105 // } 1106 // sc.writeUInt(length); 1107 // int lastTransition = -1; 1108 // int lastValueNumber = 0; 1109 // if (DEBUG_WRITE) System.out.println("Trans count: " + length); 1110 // for (int i = 0; i < length; ++i) { 1111 // int valueNumber = ((Integer)object_index.get(values[i])).intValue(); 1112 // if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + valueNumber); 1113 // 1114 // int deltaTransition = transitions[i] - lastTransition; 1115 // lastTransition = transitions[i]; 1116 // int deltaValueNumber = valueNumber - lastValueNumber; 1117 // lastValueNumber = valueNumber; 1118 // 1119 // deltaValueNumber <<= 1; // make room for one bit 1120 // boolean canCombine = deltaTransition == 1; 1121 // if (canCombine) deltaValueNumber |= 1; 1122 // sc.writeInt(deltaValueNumber); 1123 // if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + deltaValueNumber); 1124 // if (!canCombine) { 1125 // sc.writeUInt(deltaTransition); 1126 // if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition); 1127 // } 1128 // } 1129 // sc.flush(); 1130 // } 1131 // 1132 // /** 1133 // * 1134 // */ 1135 // private boolean allAreString(Collection<T> availableValues2) { 1136 // //if (true) return false; 1137 // for (Iterator<T> it = availableValues2.iterator(); it.hasNext();) { 1138 // if (!(it.next() instanceof String)) return false; 1139 // } 1140 // return true; 1141 // } 1142 // 1143 // public void readExternal(ObjectInput in1) throws IOException, ClassNotFoundException { 1144 // DataInputCompressor sc = new DataInputCompressor(in1); 1145 // boolean allStrings = sc.readBoolean(); 1146 // T[] valuesList; 1147 // availableValues = new LinkedHashSet(); 1148 // if (allStrings) { 1149 // valuesList = sc.readStringSet(availableValues); 1150 // } else { 1151 // valuesList = sc.readCollection(availableValues); 1152 // } 1153 // length = sc.readUInt(); 1154 // transitions = new int[length]; 1155 // if (DEBUG_WRITE) System.out.println("Trans count: " + length); 1156 // values = (T[]) new Object[length]; 1157 // int currentTransition = -1; 1158 // int currentValue = 0; 1159 // int deltaTransition; 1160 // for (int i = 0; i < length; ++i) { 1161 // int temp = sc.readInt(); 1162 // if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + temp); 1163 // boolean combined = (temp & 1) != 0; 1164 // temp >>= 1; 1165 // values[i] = valuesList[currentValue += temp]; 1166 // if (!combined) { 1167 // deltaTransition = sc.readUInt(); 1168 // if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition); 1169 // } else { 1170 // deltaTransition = 1; 1171 // } 1172 // transitions[i] = currentTransition += deltaTransition; // delta value 1173 // if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + currentValue); 1174 // } 1175 // } 1176 1177 public final UnicodeMap<T> removeAll(UnicodeSet set) { 1178 return putAll(set, null); 1179 } 1180 1181 public final UnicodeMap<T> removeAll(UnicodeMap<T> reference) { 1182 return removeRetainAll(reference, true); 1183 } 1184 1185 public final UnicodeMap<T> retainAll(UnicodeSet set) { 1186 UnicodeSet toNuke = new UnicodeSet(); 1187 // TODO Optimize 1188 for (EntryRange<T> ae : entryRanges()) { 1189 if (ae.string != null) { 1190 if (!set.contains(ae.string)) { 1191 toNuke.add(ae.string); 1192 } 1193 } else { 1194 for (int i = ae.codepoint; i <= ae.codepointEnd; ++i) { 1195 if (!set.contains(i)) { 1196 toNuke.add(i); 1197 } 1198 } 1199 } 1200 } 1201 return putAll(toNuke, null); 1202 } 1203 1204 public final UnicodeMap<T> retainAll(UnicodeMap<T> reference) { 1205 return removeRetainAll(reference, false); 1206 } 1207 1208 private final UnicodeMap<T> removeRetainAll(UnicodeMap<T> reference, boolean remove) { 1209 UnicodeSet toNuke = new UnicodeSet(); 1210 // TODO Optimize 1211 for (EntryRange<T> ae : entryRanges()) { 1212 if (ae.string != null) { 1213 if (ae.value.equals(reference.get(ae.string)) == remove) { 1214 toNuke.add(ae.string); 1215 } 1216 } else { 1217 for (int i = ae.codepoint; i <= ae.codepointEnd; ++i) { 1218 if (ae.value.equals(reference.get(i)) == remove) { 1219 toNuke.add(i); 1220 } 1221 } 1222 } 1223 } 1224 return putAll(toNuke, null); 1225 } 1226 1227 /** 1228 * Returns the keys that consist of multiple code points. 1229 * @return 1230 */ 1231 public final Set<String> stringKeys() { 1232 return getNonRangeStrings(); 1233 } 1234 1235 /** 1236 * Gets the inverse of this map, adding to the target. Like putAllIn 1237 * @return 1238 */ 1239 public <U extends Map<T,UnicodeSet>> U addInverseTo(U target) { 1240 for (T value : values()) { 1241 UnicodeSet uset = getSet(value); 1242 target.put(value, uset); 1243 } 1244 return target; 1245 } 1246 1247 /** 1248 * Freeze an inverse map. 1249 * @param target 1250 * @return 1251 */ 1252 public static <T> Map<T,UnicodeSet> freeze(Map<T,UnicodeSet> target) { 1253 for (UnicodeSet entry : target.values()) { 1254 entry.freeze(); 1255 } 1256 return Collections.unmodifiableMap(target); 1257 } 1258 1259 /** 1260 * @param target 1261 * @return 1262 */ 1263 public UnicodeMap<T> putAllInverse(Map<T, UnicodeSet> source) { 1264 for (Entry<T, UnicodeSet> entry : source.entrySet()) { 1265 putAll(entry.getValue(), entry.getKey()); 1266 } 1267 return this; 1268 } 1269 } 1270