1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  *******************************************************************************
5  * Copyright (C) 1996-2016, International Business Machines Corporation and    *
6  * others. All Rights Reserved.                                                *
7  *******************************************************************************
8  */
9 package com.ibm.icu.dev.util;
10 
11 import java.util.Collection;
12 import java.util.Collections;
13 import java.util.Comparator;
14 import java.util.HashSet;
15 import java.util.Iterator;
16 import java.util.LinkedHashSet;
17 import java.util.Map;
18 import java.util.Map.Entry;
19 import java.util.Set;
20 import java.util.TreeMap;
21 import java.util.TreeSet;
22 
23 import com.ibm.icu.impl.Utility;
24 import com.ibm.icu.text.StringTransform;
25 import com.ibm.icu.text.UTF16;
26 import com.ibm.icu.text.UnicodeSet;
27 import com.ibm.icu.text.UnicodeSetIterator;
28 import com.ibm.icu.util.Freezable;
29 
30 /**
31  * Class for mapping Unicode characters and strings to values, optimized for single code points,
32  * where ranges of code points have the same value.
33  * Much smaller storage than using HashMap, and much faster and more compact than
34  * a list of UnicodeSets. The API design mimics Map<String,T> but can't extend it due to some
35  * necessary changes (much as UnicodeSet mimics Set<String>). Note that nulls are not permitted as values;
36  * that is, a put(x,null) is the same as remove(x).<br>
37  * At this point "" is also not allowed as a key, although that may change.
38  * @author markdavis
39  *
40  * @internal CLDR
41  */
42 
43 public final class UnicodeMap<T> implements Cloneable, Freezable<UnicodeMap<T>>, StringTransform, Iterable<String> {
44     /**
45      * For serialization
46      */
47     //private static final long serialVersionUID = -6540936876295804105L;
48     static final boolean ASSERTIONS = false;
49     static final long GROWTH_PERCENT = 200; // 100 is no growth!
50     static final long GROWTH_GAP = 10; // extra bump!
51 
52     private int length;
53     // two parallel arrays to save memory. Wish Java had structs.
54     private int[] transitions;
55     /* package private */ T[] values;
56 
57     private LinkedHashSet<T> availableValues = new LinkedHashSet<T>();
58     private transient boolean staleAvailableValues;
59 
60     private transient boolean errorOnReset;
61     private volatile transient boolean locked;
62     private int lastIndex;
63     private TreeMap<String,T> stringMap;
64 
clear()65     { clear(); }
66 
UnicodeMap()67     public UnicodeMap() {
68     }
69 
UnicodeMap(UnicodeMap other)70     public UnicodeMap(UnicodeMap other) {
71         this.putAll(other);
72     }
73 
clear()74     public UnicodeMap<T> clear() {
75         if (locked) {
76             throw new UnsupportedOperationException("Attempt to modify locked object");
77         }
78         length = 2;
79         transitions = new int[] {0,0x110000,0,0,0,0,0,0,0,0};
80         values = (T[]) new Object[10];
81 
82         availableValues.clear();
83         staleAvailableValues = false;
84 
85         errorOnReset = false;
86         lastIndex = 0;
87         stringMap = null;
88         return this;
89     }
90 
91     /* Boilerplate */
equals(Object other)92     public boolean equals(Object other) {
93         if (other == null) return false;
94         try {
95             UnicodeMap that = (UnicodeMap) other;
96             if (length != that.length) return false;
97             for (int i = 0; i < length-1; ++i) {
98                 if (transitions[i] != that.transitions[i]) return false;
99                 if (!areEqual(values[i], that.values[i])) return false;
100             }
101             return true;
102         } catch (ClassCastException e) {
103             return false;
104         }
105     }
106 
areEqual(Object a , Object b)107     public static boolean areEqual(Object a , Object b) {
108         if (a == b) return true;
109         if (a == null || b == null) return false;
110         return a.equals(b);
111     }
112 
hashCode()113     public int hashCode() {
114         int result = length;
115         // TODO might want to abbreviate this for speed.
116         for (int i = 0; i < length-1; ++i) {
117             result = 37*result + transitions[i];
118             result = 37*result;
119             if (values[i] != null) {
120                  result += values[i].hashCode();
121             }
122         }
123         if (stringMap != null) {
124             result = 37*result + stringMap.hashCode();
125         }
126         return result;
127     }
128 
129     /**
130      * Standard clone. Warning, as with Collections, does not do deep clone.
131      */
cloneAsThawed()132     public UnicodeMap<T> cloneAsThawed() {
133         UnicodeMap<T> that = new UnicodeMap<T>();
134         that.length = length;
135         that.transitions = (int[]) transitions.clone();
136         that.values = (T[]) values.clone();
137         that.availableValues = new LinkedHashSet<T>(availableValues);
138         that.locked = false;
139         that.stringMap = stringMap == null ? null : (TreeMap<String, T>) stringMap.clone();
140         return that;
141     }
142 
143     /* for internal consistency checking */
144 
_checkInvariants()145     void _checkInvariants() {
146         if (length < 2
147                 || length > transitions.length
148                 || transitions.length != values.length) {
149             throw new IllegalArgumentException("Invariant failed: Lengths bad");
150         }
151         for (int i = 1; i < length-1; ++i) {
152             if (areEqual(values[i-1], values[i])) {
153                 throw new IllegalArgumentException("Invariant failed: values shared at "
154                         + "\t" + Utility.hex(i-1) + ": <" + values[i-1] + ">"
155                         + "\t" + Utility.hex(i) + ": <" + values[i] + ">"
156                 );
157             }
158         }
159         if (transitions[0] != 0 || transitions[length-1] != 0x110000) {
160             throw new IllegalArgumentException("Invariant failed: bounds set wrong");
161         }
162         for (int i = 1; i < length-1; ++i) {
163             if (transitions[i-1] >= transitions[i]) {
164                 throw new IllegalArgumentException("Invariant failed: not monotonic"
165                         + "\t" + Utility.hex(i-1) + ": " + transitions[i-1]
166                                                                        + "\t" + Utility.hex(i) + ": " + transitions[i]
167                 );
168             }
169         }
170     }
171 
172     /**
173      * Finds an index such that inversionList[i] <= codepoint < inversionList[i+1]
174      * Assumes that 0 <= codepoint <= 0x10FFFF
175      * @param codepoint
176      * @return the index
177      */
_findIndex(int c)178     private int _findIndex(int c) {
179         int lo = 0;
180         int hi = length - 1;
181         int i = (lo + hi) >>> 1;
182         // invariant: c >= list[lo]
183         // invariant: c < list[hi]
184         while (i != lo) {
185             if (c < transitions[i]) {
186                 hi = i;
187             } else {
188                 lo = i;
189             }
190             i = (lo + hi) >>> 1;
191         }
192         if (ASSERTIONS) _checkFind(c, lo);
193         return lo;
194     }
195 
_checkFind(int codepoint, int value)196     private void _checkFind(int codepoint, int value) {
197         int other = __findIndex(codepoint);
198         if (other != value) {
199             throw new IllegalArgumentException("Invariant failed: binary search"
200                     + "\t" + Utility.hex(codepoint) + ": " + value
201                     + "\tshould be: " + other);
202         }
203     }
204 
__findIndex(int codepoint)205     private int __findIndex(int codepoint) {
206         for (int i = length-1; i > 0; --i) {
207             if (transitions[i] <= codepoint) return i;
208         }
209         return 0;
210     }
211 
212     /*
213      * Try indexed lookup
214 
215     static final int SHIFT = 8;
216     int[] starts = new int[0x10FFFF>>SHIFT]; // lowest transition index where codepoint>>x can be found
217     boolean startsValid = false;
218     private int findIndex(int codepoint) {
219         if (!startsValid) {
220             int start = 0;
221             for (int i = 1; i < length; ++i) {
222 
223             }
224         }
225         for (int i = length-1; i > 0; --i) {
226            if (transitions[i] <= codepoint) return i;
227        }
228        return 0;
229    }
230      */
231 
232     /**
233      * Remove the items from index through index+count-1.
234      * Logically reduces the size of the internal arrays.
235      * @param index
236      * @param count
237      */
_removeAt(int index, int count)238     private void _removeAt(int index, int count) {
239         for (int i = index + count; i < length; ++i) {
240             transitions[i-count] = transitions[i];
241             values[i-count] = values[i];
242         }
243         length -= count;
244     }
245     /**
246      * Add a gap from index to index+count-1.
247      * The values there are undefined, and must be set.
248      * Logically grows arrays to accomodate. Actual growth is limited
249      * @param index
250      * @param count
251      */
_insertGapAt(int index, int count)252     private void _insertGapAt(int index, int count) {
253         int newLength = length + count;
254         int[] oldtransitions = transitions;
255         T[] oldvalues = values;
256         if (newLength > transitions.length) {
257             int allocation = (int) (GROWTH_GAP + (newLength * GROWTH_PERCENT) / 100);
258             transitions = new int[allocation];
259             values = (T[]) new Object[allocation];
260             for (int i = 0; i < index; ++i) {
261                 transitions[i] = oldtransitions[i];
262                 values[i] = oldvalues[i];
263             }
264         }
265         for (int i = length - 1; i >= index; --i) {
266             transitions[i+count] = oldtransitions[i];
267             values[i+count] = oldvalues[i];
268         }
269         length = newLength;
270     }
271 
272     /**
273      * Associates code point with value. Removes any previous association.
274      * All code that calls this MUST check for frozen first!
275      * @param codepoint
276      * @param value
277      * @return this, for chaining
278      */
_put(int codepoint, T value)279     private UnicodeMap _put(int codepoint, T value) {
280         // Warning: baseIndex is an invariant; must
281         // be defined such that transitions[baseIndex] < codepoint
282         // at end of this routine.
283         int baseIndex;
284         if (transitions[lastIndex] <= codepoint
285                 && codepoint < transitions[lastIndex+1]) {
286             baseIndex = lastIndex;
287         } else {
288             baseIndex = _findIndex(codepoint);
289         }
290         int limitIndex = baseIndex + 1;
291         // cases are (a) value is already set
292         if (areEqual(values[baseIndex], value)) return this;
293         if (locked) {
294             throw new UnsupportedOperationException("Attempt to modify locked object");
295         }
296         if (errorOnReset && values[baseIndex] != null) {
297             throw new UnsupportedOperationException("Attempt to reset value for " + Utility.hex(codepoint)
298                     + " when that is disallowed. Old: " + values[baseIndex] + "; New: " + value);
299         }
300 
301         // adjust the available values
302         staleAvailableValues = true;
303         availableValues.add(value); // add if not there already
304 
305         int baseCP = transitions[baseIndex];
306         int limitCP = transitions[limitIndex];
307         // we now start walking through the difference case,
308         // based on whether we are at the start or end of range
309         // and whether the range is a single character or multiple
310 
311         if (baseCP == codepoint) {
312             // CASE: At very start of range
313             boolean connectsWithPrevious =
314                 baseIndex != 0 && areEqual(value, values[baseIndex-1]);
315 
316             if (limitCP == codepoint + 1) {
317                 // CASE: Single codepoint range
318                 boolean connectsWithFollowing =
319                     baseIndex < length - 2 && areEqual(value, values[limitIndex]); // was -1
320 
321                 if (connectsWithPrevious) {
322                     // A1a connects with previous & following, so remove index
323                     if (connectsWithFollowing) {
324                         _removeAt(baseIndex, 2);
325                     } else {
326                         _removeAt(baseIndex, 1); // extend previous
327                     }
328                     --baseIndex; // fix up
329                 } else if (connectsWithFollowing) {
330                     _removeAt(baseIndex, 1); // extend following backwards
331                     transitions[baseIndex] = codepoint;
332                 } else {
333                     // doesn't connect on either side, just reset
334                     values[baseIndex] = value;
335                 }
336             } else if (connectsWithPrevious) {
337                 // A.1: start of multi codepoint range
338                 // if connects
339                 ++transitions[baseIndex]; // extend previous
340             } else {
341                 // otherwise insert new transition
342                 transitions[baseIndex] = codepoint+1; // fix following range
343                 _insertGapAt(baseIndex, 1);
344                 values[baseIndex] = value;
345                 transitions[baseIndex] = codepoint;
346             }
347         } else if (limitCP == codepoint + 1) {
348             // CASE: at end of range
349             // if connects, just back up range
350             boolean connectsWithFollowing =
351                 baseIndex < length - 2 && areEqual(value, values[limitIndex]); // was -1
352 
353             if (connectsWithFollowing) {
354                 --transitions[limitIndex];
355                 return this;
356             } else {
357                 _insertGapAt(limitIndex, 1);
358                 transitions[limitIndex] = codepoint;
359                 values[limitIndex] = value;
360             }
361         } else {
362             // CASE: in middle of range
363             // insert gap, then set the new range
364             _insertGapAt(++baseIndex,2);
365             transitions[baseIndex] = codepoint;
366             values[baseIndex] = value;
367             transitions[baseIndex+1] = codepoint + 1;
368             values[baseIndex+1] = values[baseIndex-1]; // copy lower range values
369         }
370         lastIndex = baseIndex; // store for next time
371         return this;
372     }
373 
374     private UnicodeMap _putAll(int startCodePoint, int endCodePoint, T value) {
375         // TODO optimize
376         for (int i = startCodePoint; i <= endCodePoint; ++i) {
377             _put(i, value);
378             if (ASSERTIONS) _checkInvariants();
379         }
380         return this;
381     }
382 
383     /**
384      * Sets the codepoint value.
385      * @param codepoint
386      * @param value
387      * @return this (for chaining)
388      */
389     public UnicodeMap<T> put(int codepoint, T value) {
390         if (codepoint < 0 || codepoint > 0x10FFFF) {
391             throw new IllegalArgumentException("Codepoint out of range: " + codepoint);
392         }
393         _put(codepoint, value);
394         if (ASSERTIONS) _checkInvariants();
395         return this;
396     }
397 
398     /**
399      * Sets the codepoint value.
400      * @param codepoint
401      * @param value
402      * @return this (for chaining)
403      */
404     public UnicodeMap<T> put(String string, T value) {
405         int v = UnicodeSet.getSingleCodePoint(string);
406         if (v == Integer.MAX_VALUE) {
407             if (locked) {
408                 throw new UnsupportedOperationException("Attempt to modify locked object");
409             }
410             if (value != null) {
411                 if (stringMap == null) {
412                     stringMap = new TreeMap<String,T>();
413                 }
414                 stringMap.put(string, value);
415                 staleAvailableValues = true;
416             } else if (stringMap != null) {
417                 if (stringMap.remove(string) != null) {
418                     staleAvailableValues = true;
419                 }
420             }
421             return this;
422         }
423         return put(v, value);
424     }
425 
426     /**
427      * Adds bunch o' codepoints; otherwise like put.
428      * @param codepoints
429      * @param value
430      * @return this (for chaining)
431      */
432     public UnicodeMap<T> putAll(UnicodeSet codepoints, T value) {
433         UnicodeSetIterator it = new UnicodeSetIterator(codepoints);
434         while (it.nextRange()) {
435             if (it.string == null) {
436                 _putAll(it.codepoint, it.codepointEnd, value);
437             } else {
438                 put(it.string, value);
439             }
440         }
441         return this;
442     }
443 
444     /**
445      * Adds bunch o' codepoints; otherwise like add.
446      * @param startCodePoint
447      * @param endCodePoint
448      * @param value
449      * @return this (for chaining)
450      */
451     public UnicodeMap<T> putAll(int startCodePoint, int endCodePoint, T value) {
452         if (locked) {
453             throw new UnsupportedOperationException("Attempt to modify locked object");
454         }
455         if (startCodePoint < 0 || endCodePoint > 0x10FFFF) {
456             throw new IllegalArgumentException("Codepoint out of range: "
457                     + Utility.hex(startCodePoint) + ".." + Utility.hex(endCodePoint));
458         }
459         return _putAll(startCodePoint, endCodePoint, value);
460     }
461 
462     /**
463      * Add all the (main) values from a UnicodeMap
464      * @param unicodeMap the property to add to the map
465      * @return this (for chaining)
466      */
467     public UnicodeMap<T> putAll(UnicodeMap<T> unicodeMap) {
468         for (int i = 0; i < unicodeMap.length; ++i) {
469             T value = unicodeMap.values[i];
470             if (value != null) {
471                 _putAll(unicodeMap.transitions[i], unicodeMap.transitions[i+1]-1, value);
472             }
473             if (ASSERTIONS) _checkInvariants();
474         }
475         if (unicodeMap.stringMap != null && !unicodeMap.stringMap.isEmpty()) {
476             if (stringMap == null) {
477                 stringMap = new TreeMap<String,T>();
478             }
479             stringMap.putAll(unicodeMap.stringMap);
480         }
481         return this;
482     }
483 
484     /**
485      * Add all the (main) values from a Unicode property
486      * @param prop the property to add to the map
487      * @return this (for chaining)
488      */
489     public UnicodeMap<T> putAllFiltered(UnicodeMap<T> prop, UnicodeSet filter) {
490         // TODO optimize
491         for (UnicodeSetIterator it = new UnicodeSetIterator(filter); it.next();) {
492             if (it.codepoint != UnicodeSetIterator.IS_STRING) {
493                 T value = prop.getValue(it.codepoint);
494                 if (value != null) {
495                     _put(it.codepoint, value);
496                 }
497             }
498         }
499         // now do the strings
500         for (String key : filter.strings()) {
501             T value = prop.get(key);
502             if (value != null) {
503                 put(key, value);
504             }
505         }
506         return this;
507     }
508 
509     /**
510      * Set the currently unmapped Unicode code points to the given value.
511      * @param value the value to set
512      * @return this (for chaining)
513      */
514     public UnicodeMap<T> setMissing(T value) {
515         // fast path, if value not yet present
516         if (!getAvailableValues().contains(value)) {
517             staleAvailableValues = true;
518             availableValues.add(value);
519             for (int i = 0; i < length; ++i) {
520                 if (values[i] == null) values[i] = value;
521             }
522             return this;
523         } else {
524             return putAll(keySet(null), value);
525         }
526     }
527     /**
528      * Returns the keyset consisting of all the keys that would produce the given value. Deposits into
529      * result if it is not null. Remember to clear if you just want
530      * the new values.
531      */
532     public UnicodeSet keySet(T value, UnicodeSet result) {
533         if (result == null) result = new UnicodeSet();
534         for (int i = 0; i < length - 1; ++i) {
535             if (areEqual(value, values[i])) {
536                 result.add(transitions[i], transitions[i+1]-1);
537             }
538         }
539         if (value != null && stringMap != null) {
540             for (String key : stringMap.keySet()) {
541                 T newValue = stringMap.get(key);
542                 if (value.equals(newValue)) {
543                     result.add((String)key);
544                 }
545             }
546         }
547         return result;
548     }
549 
550     /**
551      * Returns the keyset consisting of all the keys that would produce the given value.
552      * the new values.
553      */
554     public UnicodeSet keySet(T value) {
555         return keySet(value,null);
556     }
557 
558     /**
559      * Returns the keyset consisting of all the keys that would produce (non-null) values.
560      */
561     public UnicodeSet keySet() {
562         UnicodeSet result = new UnicodeSet();
563         for (int i = 0; i < length - 1; ++i) {
564             if (values[i] != null) {
565                 result.add(transitions[i], transitions[i+1]-1);
566             }
567         }
568         if (stringMap != null) {
569             result.addAll(stringMap.keySet());
570         }
571         return result;
572     }
573 
574     /**
575      * Returns the list of possible values. Deposits each non-null value into
576      * result. Creates result if it is null. Remember to clear result if
577      * you are not appending to existing collection.
578      * @param result
579      * @return result
580      */
581     public <U extends Collection<T>> U values(U result) {
582         if (staleAvailableValues) {
583             // collect all the current values
584             // retain them in the availableValues
585             Set<T> temp = new HashSet<T>();
586             for (int i = 0; i < length - 1; ++i) {
587                 if (values[i] != null) temp.add(values[i]);
588             }
589             availableValues.retainAll(temp);
590             if (stringMap != null) {
591                 availableValues.addAll(stringMap.values());
592             }
593             staleAvailableValues = false;
594         }
595         if (result == null) {
596             result = (U) new LinkedHashSet<T>(availableValues.size());
597         }
598         result.addAll(availableValues);
599         return result;
600     }
601 
602     /**
603      * Convenience method
604      */
605     public Set<T> values() {
606         return getAvailableValues(null);
607     }
608     /**
609      * Gets the value associated with a given code point.
610      * Returns null, if there is no such value.
611      * @param codepoint
612      * @return the value
613      */
614     public T get(int codepoint) {
615         if (codepoint < 0 || codepoint > 0x10FFFF) {
616             throw new IllegalArgumentException("Codepoint out of range: " + codepoint);
617         }
618         return values[_findIndex(codepoint)];
619     }
620 
621     /**
622      * Gets the value associated with a given code point.
623      * Returns null, if there is no such value.
624      * @param codepoint
625      * @return the value
626      */
627     public T get(String value) {
628         if (UTF16.hasMoreCodePointsThan(value, 1)) {
629             if (stringMap == null) {
630                 return null;
631             }
632             return stringMap.get(value);
633         }
634         return getValue(UTF16.charAt(value, 0));
635     }
636 
637 
638     /**
639      * Change a new string from the source string according to the mappings.
640      * For each code point cp, if getValue(cp) is null, append the character, otherwise append getValue(cp).toString()
641      * TODO: extend to strings
642      * @param source
643      * @return
644      */
645     public String transform(String source) {
646         StringBuffer result = new StringBuffer();
647         int cp;
648         for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) {
649             cp = UTF16.charAt(source, i);
650             T mResult = getValue(cp);
651             if (mResult != null) {
652                 result.append(mResult);
653             } else {
654                 UTF16.append(result, cp);
655             }
656         }
657         return result.toString();
658     }
659 
660     /**
661      * Used to add complex values, where the value isn't replaced but in some sense composed
662      * @author markdavis
663      */
664     public abstract static class Composer<T> {
665         /**
666          * This will be called with either a string or a code point. The result is the new value for that item.
667          * If the codepoint is used, the string is null; if the string is used, the codepoint is -1.
668          * @param a
669          * @param b
670          */
671         public abstract T compose(int codePoint, String string, T a, T b);
672     }
673 
674     public UnicodeMap<T> composeWith(UnicodeMap<T> other, Composer<T> composer) {
675         for (T value : other.getAvailableValues()) {
676             UnicodeSet set = other.keySet(value);
677             composeWith(set, value, composer);
678         }
679         return this;
680     }
681 
682     public UnicodeMap<T> composeWith(UnicodeSet set, T value, Composer<T> composer) {
683         for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
684             int i = it.codepoint;
685             if (i == UnicodeSetIterator.IS_STRING) {
686                 String s = it.string;
687                 T v1 = getValue(s);
688                 T v3 = composer.compose(-1, s, v1, value);
689                 if (v1 != v3 && (v1 == null || !v1.equals(v3))) {
690                     put(s, v3);
691                 }
692             } else {
693                 T v1 = getValue(i);
694                 T v3 = composer.compose(i, null, v1, value);
695                 if (v1 != v3 && (v1 == null || !v1.equals(v3))) {
696                     put(i, v3);
697                 }
698             }
699         }
700         return this;
701     }
702 
703     public String toString() {
704         return toString(null);
705     }
706 
707     public String toString(Comparator<T> collected) {
708         StringBuffer result = new StringBuffer();
709         if (collected == null) {
710             for (int i = 0; i < length-1; ++i) {
711                 T value = values[i];
712                 if (value == null) continue;
713                 int start = transitions[i];
714                 int end = transitions[i+1]-1;
715                 result.append(Utility.hex(start));
716                 if (start != end) result.append("-").append(Utility.hex(end));
717                 result.append("=").append(value.toString()).append("\n");
718             }
719             if (stringMap != null) {
720                 for (String s : stringMap.keySet()) {
721                     result.append(Utility.hex(s)).append("=").append(stringMap.get(s).toString()).append("\n");
722                 }
723             }
724         } else {
725             Set<T> set = values(new TreeSet<T>(collected));
726             for (Iterator<T> it = set.iterator(); it.hasNext();) {
727                 T value = it.next();
728                 UnicodeSet s = keySet(value);
729                 result.append(value).append("=").append(s.toString()).append("\n");
730             }
731         }
732         return result.toString();
733     }
734     /**
735      * @return Returns the errorOnReset value.
736      */
737     public boolean getErrorOnReset() {
738         return errorOnReset;
739     }
740     /**
741      * Puts the UnicodeMap into a state whereby new mappings are accepted, but changes to old mappings cause an exception.
742      * @param errorOnReset The errorOnReset to set.
743      */
744     public UnicodeMap<T> setErrorOnReset(boolean errorOnReset) {
745         this.errorOnReset = errorOnReset;
746         return this;
747     }
748 
749     /* (non-Javadoc)
750      * @see com.ibm.icu.dev.test.util.Freezable#isFrozen()
751      */
752     public boolean isFrozen() {
753         // TODO Auto-generated method stub
754         return locked;
755     }
756 
757     /* (non-Javadoc)
758      * @see com.ibm.icu.dev.test.util.Freezable#lock()
759      */
760     public UnicodeMap<T> freeze() {
761         locked = true;
762         return this;
763     }
764 
765     /**
766      * Utility to find the maximal common prefix of two strings.
767      * TODO: fix supplemental support
768      */
769     static public int findCommonPrefix(String last, String s) {
770         int minLen = Math.min(last.length(), s.length());
771         for (int i = 0; i < minLen; ++i) {
772             if (last.charAt(i) != s.charAt(i)) return i;
773         }
774         return minLen;
775     }
776 
777     /**
778      * Get the number of ranges; used for getRangeStart/End. The ranges together cover all of the single-codepoint keys in the UnicodeMap. Other keys can be gotten with getStrings().
779      */
780     public int getRangeCount() {
781         return length-1;
782     }
783 
784     /**
785      * Get the start of a range. All code points between start and end are in the UnicodeMap's keyset.
786      */
787     public int getRangeStart(int range) {
788         return transitions[range];
789     }
790 
791     /**
792      * Get the start of a range. All code points between start and end are in the UnicodeMap's keyset.
793      */
794     public int getRangeEnd(int range) {
795         return transitions[range+1] - 1;
796     }
797 
798     /**
799      * Get the value for the range.
800      */
801     public T getRangeValue(int range) {
802         return values[range];
803     }
804 
805     /**
806      * Get the strings that are not in the ranges. Returns null if there are none.
807      * @return
808      */
809     public Set<String> getNonRangeStrings() {
810         if (stringMap == null || stringMap.isEmpty()) {
811             return null;
812         }
813         return Collections.unmodifiableSet(stringMap.keySet());
814     }
815 
816     static final boolean DEBUG_WRITE = false;
817 
818     /* (non-Javadoc)
819      * @see java.util.Map#containsKey(java.lang.Object)
820      */
821     public boolean containsKey(String key) {
822         return getValue(key) != null;
823     }
824 
825     /* (non-Javadoc)
826      * @see java.util.Map#containsKey(java.lang.Object)
827      */
828     public boolean containsKey(int key) {
829         return getValue(key) != null;
830     }
831 
832     /* (non-Javadoc)
833      * @see java.util.Map#containsValue(java.lang.Object)
834      */
835     public boolean containsValue(T value) {
836         // TODO Optimize
837         return getAvailableValues().contains(value);
838     }
839 
840     /* (non-Javadoc)
841      * @see java.util.Map#isEmpty()
842      */
843     public boolean isEmpty() {
844         return size() == 0;
845     }
846 
847     /* (non-Javadoc)
848      * @see java.util.Map#putAll(java.util.Map)
849      */
850     public UnicodeMap<T> putAll(Map<? extends String, ? extends T> map) {
851         for (String key : map.keySet()) {
852             put(key,map.get(key));
853         }
854         return this;
855     }
856 
857     /**
858      * Utility for extracting map
859      * @deprecated
860      */
861     public UnicodeMap<T> putAllIn(Map<? super String, ? super T> map) {
862         for (String key : keySet()) {
863             map.put(key, get(key));
864         }
865         return this;
866     }
867 
868     /**
869      * Utility for extracting map
870      */
871     public <U extends Map<String, T>> U putAllInto(U map) {
872         for (EntryRange<T> entry : entryRanges()) {
873             if (entry.string != null) {
874                 break;
875             }
876             for (int cp = entry.codepoint; cp <= entry.codepointEnd; ++cp) {
877                 map.put(UTF16.valueOf(cp), entry.value);
878             }
879         }
880         map.putAll(stringMap);
881         return map;
882     }
883 
884     /**
885      * Utility for extracting map
886      */
887     public <U extends Map<Integer, T>> U putAllCodepointsInto(U map) {
888         for (EntryRange<T> entry : entryRanges()) {
889             if (entry.string != null) {
890                 break;
891             }
892             for (int cp = entry.codepoint; cp <= entry.codepointEnd; ++cp) {
893                 map.put(cp, entry.value);
894             }
895         }
896         return map;
897     }
898 
899     /* (non-Javadoc)
900      * @see java.util.Map#remove(java.lang.Object)
901      */
902     public UnicodeMap<T> remove(String key) {
903         return put(key, null);
904     }
905 
906     /* (non-Javadoc)
907      * @see java.util.Map#remove(java.lang.Object)
908      */
909     public UnicodeMap<T> remove(int key) {
910         return put(key, null);
911     }
912 
913     /* (non-Javadoc)
914      * @see java.util.Map#size()
915      */
916     public int size() {
917         int result = stringMap == null ? 0 : stringMap.size();
918         for (int i = 0; i < length-1; ++i) {
919             T value = values[i];
920             if (value == null) continue;
921             result += transitions[i+1] - transitions[i];
922         }
923         return result;
924     }
925 
926     /* (non-Javadoc)
927      * @see java.util.Map#entrySet()
928      */
929     public Iterable<Entry<String,T>> entrySet() {
930         return new EntrySetX();
931     }
932 
933     private class EntrySetX implements Iterable<Entry<String, T>> {
934         public Iterator<Entry<String, T>> iterator() {
935             return new IteratorX();
936         }
937         public String toString() {
938             StringBuffer b = new StringBuffer();
939             for (Iterator it = iterator(); it.hasNext();) {
940                 Object item = it.next();
941                 b.append(item.toString()).append(' ');
942             }
943             return b.toString();
944         }
945     }
946 
947     private class IteratorX implements Iterator<Entry<String, T>> {
948         Iterator<String> iterator = keySet().iterator();
949 
950         /* (non-Javadoc)
951          * @see java.util.Iterator#hasNext()
952          */
953         public boolean hasNext() {
954             return iterator.hasNext();
955         }
956 
957         /* (non-Javadoc)
958          * @see java.util.Iterator#next()
959          */
960         public Entry<String, T> next() {
961             String key = iterator.next();
962             return new ImmutableEntry(key, get(key));
963         }
964 
965         /* (non-Javadoc)
966          * @see java.util.Iterator#remove()
967          */
968         public void remove() {
969             throw new UnsupportedOperationException();
970         }
971 
972     }
973 
974     /**
975      * Struct-like class used to iterate over a UnicodeMap in a for loop.
976      * If the value is a string, then codepoint == codepointEnd == -1. Otherwise the string is null;
977      * Caution: The contents may change during the iteration!
978      */
979     public static class EntryRange<T> {
980         public int codepoint;
981         public int codepointEnd;
982         public String string;
983         public T value;
984         @Override
985         public String toString() {
986             return (string != null ? Utility.hex(string)
987                     : Utility.hex(codepoint) + (codepoint == codepointEnd ? "" : ".." + Utility.hex(codepointEnd)))
988                     + "=" + value;
989         }
990     }
991 
992     /**
993      * Returns an Iterable over EntryRange, designed for efficient for loops over UnicodeMaps.
994      * Caution: For efficiency, the EntryRange may be reused, so the EntryRange may change on each iteration!
995      * The value is guaranteed never to be null. The entryRange.string values (non-null) are after all the ranges.
996      * @return entry range, for for loops
997      */
998     public Iterable<EntryRange<T>> entryRanges() {
999         return new EntryRanges();
1000     }
1001 
1002     private class EntryRanges implements Iterable<EntryRange<T>>, Iterator<EntryRange<T>> {
1003         private int pos;
1004         private EntryRange<T> result = new EntryRange<T>();
1005         private int lastRealRange = values[length-2] == null ? length - 2 : length - 1;
1006         private Iterator<Entry<String, T>> stringIterator = stringMap == null ? null : stringMap.entrySet().iterator();
1007 
1008         public Iterator<EntryRange<T>> iterator() {
1009             return this;
1010         }
1011         public boolean hasNext() {
1012             return pos < lastRealRange || (stringIterator != null && stringIterator.hasNext());
1013         }
1014         public EntryRange<T> next() {
1015             // a range may be null, but then the next one must not be (except the final range)
1016             if (pos < lastRealRange) {
1017                 T temp = values[pos];
1018                 if (temp == null) {
1019                     temp = values[++pos];
1020                 }
1021                 result.codepoint = transitions[pos];
1022                 result.codepointEnd = transitions[pos+1]-1;
1023                 result.string = null;
1024                 result.value = temp;
1025                 ++pos;
1026             } else {
1027                 Entry<String, T> entry = stringIterator.next();
1028                 result.codepoint = result.codepointEnd = -1;
1029                 result.string = entry.getKey();
1030                 result.value = entry.getValue();
1031             }
1032             return result;
1033         }
1034         public void remove() {
1035             throw new UnsupportedOperationException();
1036         }
1037     }
1038 
1039     /* (non-Javadoc)
1040      * @see java.lang.Iterable#iterator()
1041      */
1042     public Iterator<String> iterator() {
1043         return keySet().iterator();
1044     }
1045 
1046     /**
1047      * Old form for compatibility
1048      */
1049     public T getValue(String key) {
1050         return get(key);
1051     }
1052 
1053     /**
1054      * Old form for compatibility
1055      */
1056     public T getValue(int key) {
1057         // TODO Auto-generated method stub
1058         return get(key);
1059     }
1060 
1061     /**
1062      * Old form for compatibility
1063      */
1064     public Collection<T> getAvailableValues() {
1065         return values();
1066     }
1067 
1068     /**
1069      * Old form for compatibility
1070      */
1071     public <U extends Collection<T>> U getAvailableValues(U result) {
1072         return values(result);
1073     }
1074 
1075     /**
1076      * Old form for compatibility
1077      */
1078     public UnicodeSet getSet(T value) {
1079         return keySet(value);
1080     }
1081 
1082     /**
1083      * Old form for compatibility
1084      */
1085     public UnicodeSet getSet(T value, UnicodeSet result) {
1086         return keySet(value, result);
1087     }
1088 
1089     // This is to support compressed serialization. It works; just commented out for now as we shift to Generics
1090     // TODO Fix once generics are cleaned up.
1091     //    // TODO Fix to serialize more than just strings.
1092     //    // Only if all the items are strings will we do the following compression
1093     //    // Otherwise we'll just use Java Serialization, bulky as it is
1094     //    public void writeExternal(ObjectOutput out1) throws IOException {
1095     //        DataOutputCompressor sc = new DataOutputCompressor(out1);
1096     //        // if all objects are strings
1097     //        Collection<T> availableVals = getAvailableValues();
1098     //        boolean allStrings = allAreString(availableVals);
1099     //        sc.writeBoolean(allStrings);
1100     //        Map object_index = new LinkedHashMap();
1101     //        if (allAreString(availableVals)) {
1102     //            sc.writeStringSet(new TreeSet(availableVals), object_index);
1103     //        } else {
1104     //            sc.writeCollection(availableVals, object_index);
1105     //        }
1106     //        sc.writeUInt(length);
1107     //        int lastTransition = -1;
1108     //        int lastValueNumber = 0;
1109     //        if (DEBUG_WRITE) System.out.println("Trans count: " + length);
1110     //        for (int i = 0; i < length; ++i) {
1111     //            int valueNumber = ((Integer)object_index.get(values[i])).intValue();
1112     //            if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + valueNumber);
1113     //
1114     //            int deltaTransition = transitions[i] - lastTransition;
1115     //            lastTransition = transitions[i];
1116     //            int deltaValueNumber = valueNumber - lastValueNumber;
1117     //            lastValueNumber = valueNumber;
1118     //
1119     //            deltaValueNumber <<= 1; // make room for one bit
1120     //            boolean canCombine = deltaTransition == 1;
1121     //            if (canCombine) deltaValueNumber |= 1;
1122     //            sc.writeInt(deltaValueNumber);
1123     //            if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + deltaValueNumber);
1124     //            if (!canCombine) {
1125     //                sc.writeUInt(deltaTransition);
1126     //                if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition);
1127     //            }
1128     //        }
1129     //        sc.flush();
1130     //    }
1131     //
1132     //    /**
1133     //     *
1134     //     */
1135     //    private boolean allAreString(Collection<T> availableValues2) {
1136     //        //if (true) return false;
1137     //        for (Iterator<T> it = availableValues2.iterator(); it.hasNext();) {
1138     //            if (!(it.next() instanceof String)) return false;
1139     //        }
1140     //        return true;
1141     //    }
1142     //
1143     //    public void readExternal(ObjectInput in1) throws IOException, ClassNotFoundException {
1144     //        DataInputCompressor sc = new DataInputCompressor(in1);
1145     //        boolean allStrings = sc.readBoolean();
1146     //        T[] valuesList;
1147     //        availableValues = new LinkedHashSet();
1148     //        if (allStrings) {
1149     //            valuesList = sc.readStringSet(availableValues);
1150     //        } else {
1151     //            valuesList = sc.readCollection(availableValues);
1152     //        }
1153     //        length = sc.readUInt();
1154     //        transitions = new int[length];
1155     //        if (DEBUG_WRITE) System.out.println("Trans count: " + length);
1156     //        values = (T[]) new Object[length];
1157     //        int currentTransition = -1;
1158     //        int currentValue = 0;
1159     //        int deltaTransition;
1160     //        for (int i = 0; i < length; ++i) {
1161     //            int temp = sc.readInt();
1162     //            if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + temp);
1163     //            boolean combined = (temp & 1) != 0;
1164     //            temp >>= 1;
1165     //        values[i] = valuesList[currentValue += temp];
1166     //        if (!combined) {
1167     //            deltaTransition = sc.readUInt();
1168     //            if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition);
1169     //        } else {
1170     //            deltaTransition = 1;
1171     //        }
1172     //        transitions[i] = currentTransition += deltaTransition; // delta value
1173     //        if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + currentValue);
1174     //        }
1175     //    }
1176 
1177     public final UnicodeMap<T> removeAll(UnicodeSet set) {
1178         return putAll(set, null);
1179     }
1180 
1181     public final UnicodeMap<T> removeAll(UnicodeMap<T> reference) {
1182         return removeRetainAll(reference, true);
1183     }
1184 
1185     public final UnicodeMap<T> retainAll(UnicodeSet set) {
1186         UnicodeSet toNuke = new UnicodeSet();
1187         // TODO Optimize
1188         for (EntryRange<T> ae : entryRanges()) {
1189             if (ae.string != null) {
1190                 if (!set.contains(ae.string)) {
1191                     toNuke.add(ae.string);
1192                 }
1193             } else {
1194                 for (int i = ae.codepoint; i <= ae.codepointEnd; ++i) {
1195                     if (!set.contains(i)) {
1196                         toNuke.add(i);
1197                     }
1198                 }
1199             }
1200         }
1201         return putAll(toNuke, null);
1202     }
1203 
1204     public final UnicodeMap<T> retainAll(UnicodeMap<T> reference) {
1205         return removeRetainAll(reference, false);
1206     }
1207 
1208     private final UnicodeMap<T> removeRetainAll(UnicodeMap<T> reference, boolean remove) {
1209         UnicodeSet toNuke = new UnicodeSet();
1210         // TODO Optimize
1211         for (EntryRange<T> ae : entryRanges()) {
1212             if (ae.string != null) {
1213                 if (ae.value.equals(reference.get(ae.string)) == remove) {
1214                     toNuke.add(ae.string);
1215                 }
1216             } else {
1217                 for (int i = ae.codepoint; i <= ae.codepointEnd; ++i) {
1218                     if (ae.value.equals(reference.get(i)) == remove) {
1219                         toNuke.add(i);
1220                     }
1221                 }
1222             }
1223         }
1224         return putAll(toNuke, null);
1225     }
1226 
1227     /**
1228      * Returns the keys that consist of multiple code points.
1229      * @return
1230      */
1231     public final Set<String> stringKeys() {
1232         return getNonRangeStrings();
1233     }
1234 
1235     /**
1236      * Gets the inverse of this map, adding to the target. Like putAllIn
1237      * @return
1238      */
1239     public <U extends Map<T,UnicodeSet>> U addInverseTo(U target) {
1240         for (T value : values()) {
1241             UnicodeSet uset = getSet(value);
1242             target.put(value, uset);
1243         }
1244         return target;
1245     }
1246 
1247     /**
1248      * Freeze an inverse map.
1249      * @param target
1250      * @return
1251      */
1252     public static <T> Map<T,UnicodeSet> freeze(Map<T,UnicodeSet> target) {
1253         for (UnicodeSet entry : target.values()) {
1254             entry.freeze();
1255         }
1256         return Collections.unmodifiableMap(target);
1257     }
1258 
1259     /**
1260      * @param target
1261      * @return
1262      */
1263     public UnicodeMap<T> putAllInverse(Map<T, UnicodeSet> source) {
1264         for (Entry<T, UnicodeSet> entry : source.entrySet()) {
1265             putAll(entry.getValue(), entry.getKey());
1266         }
1267         return this;
1268     }
1269 }
1270