1 /*
2  *******************************************************************************
3  * Copyright (C) 1996-2014, International Business Machines Corporation and    *
4  * others. All Rights Reserved.                                                *
5  *******************************************************************************
6  */
7 package com.ibm.icu.dev.util;
8 
9 import java.util.ArrayList;
10 import java.util.Collection;
11 import java.util.Collections;
12 import java.util.Comparator;
13 import java.util.HashSet;
14 import java.util.Iterator;
15 import java.util.LinkedHashSet;
16 import java.util.Map;
17 import java.util.Map.Entry;
18 import java.util.Set;
19 import java.util.TreeMap;
20 import java.util.TreeSet;
21 
22 import com.ibm.icu.impl.Utility;
23 import com.ibm.icu.text.StringTransform;
24 import com.ibm.icu.text.UTF16;
25 import com.ibm.icu.text.UnicodeSet;
26 import com.ibm.icu.text.UnicodeSetIterator;
27 import com.ibm.icu.util.Freezable;
28 
29 /**
30  * Class for mapping Unicode characters and strings to values, optimized for single code points,
31  * where ranges of code points have the same value.
32  * Much smaller storage than using HashMap, and much faster and more compact than
33  * a list of UnicodeSets. The API design mimics Map<String,T> but can't extend it due to some
34  * necessary changes (much as UnicodeSet mimics Set<String>). Note that nulls are not permitted as values;
35  * that is, a put(x,null) is the same as remove(x).<br>
36  * At this point "" is also not allowed as a key, although that may change.
37  * @author markdavis
38  */
39 
40 public final class UnicodeMap<T> implements Cloneable, Freezable, StringTransform, Iterable<String> {
41     /**
42      * For serialization
43      */
44     //private static final long serialVersionUID = -6540936876295804105L;
45     static final boolean ASSERTIONS = false;
46     static final long GROWTH_PERCENT = 200; // 100 is no growth!
47     static final long GROWTH_GAP = 10; // extra bump!
48 
49     private int length;
50     // two parallel arrays to save memory. Wish Java had structs.
51     private int[] transitions;
52     /* package private */ T[] values;
53 
54     private LinkedHashSet<T> availableValues = new LinkedHashSet<T>();
55     private transient boolean staleAvailableValues;
56 
57     private transient boolean errorOnReset;
58     private volatile transient boolean locked;
59     private int lastIndex;
60     private Map<String,T> stringMap;
61 
clear()62     { clear(); }
63 
clear()64     public UnicodeMap<T> clear() {
65         if (locked) {
66             throw new UnsupportedOperationException("Attempt to modify locked object");
67         }
68         length = 2;
69         transitions = new int[] {0,0x110000,0,0,0,0,0,0,0,0};
70         values = (T[]) new Object[10];
71 
72         availableValues.clear();
73         staleAvailableValues = false;
74 
75         errorOnReset = false;
76         lastIndex = 0;
77         stringMap = null;
78         return this;
79     }
80 
81     /* Boilerplate */
equals(Object other)82     public boolean equals(Object other) {
83         if (other == null) return false;
84         try {
85             UnicodeMap that = (UnicodeMap) other;
86             if (length != that.length) return false;
87             for (int i = 0; i < length-1; ++i) {
88                 if (transitions[i] != that.transitions[i]) return false;
89                 if (!areEqual(values[i], that.values[i])) return false;
90             }
91             return true;
92         } catch (ClassCastException e) {
93             return false;
94         }
95     }
96 
areEqual(Object a , Object b)97     public static boolean areEqual(Object a , Object b) {
98         if (a == b) return true;
99         if (a == null || b == null) return false;
100         return a.equals(b);
101     }
102 
hashCode()103     public int hashCode() {
104         int result = length;
105         // TODO might want to abbreviate this for speed.
106         for (int i = 0; i < length-1; ++i) {
107             result = 37*result + transitions[i];
108             result = 37*result + values[i].hashCode();
109         }
110         return result;
111     }
112 
113     /**
114      * Standard clone. Warning, as with Collections, does not do deep clone.
115      */
cloneAsThawed()116     public UnicodeMap<T> cloneAsThawed() {
117         UnicodeMap<T> that = new UnicodeMap<T>();
118         that.length = length;
119         that.transitions = (int[]) transitions.clone();
120         that.values = (T[]) values.clone();
121         that.availableValues = new LinkedHashSet<T>(availableValues);
122         that.locked = false;
123         return that;
124     }
125 
126     /* for internal consistency checking */
127 
_checkInvariants()128     void _checkInvariants() {
129         if (length < 2
130                 || length > transitions.length
131                 || transitions.length != values.length) {
132             throw new IllegalArgumentException("Invariant failed: Lengths bad");
133         }
134         for (int i = 1; i < length-1; ++i) {
135             if (areEqual(values[i-1], values[i])) {
136                 throw new IllegalArgumentException("Invariant failed: values shared at "
137                         + "\t" + Utility.hex(i-1) + ": <" + values[i-1] + ">"
138                         + "\t" + Utility.hex(i) + ": <" + values[i] + ">"
139                 );
140             }
141         }
142         if (transitions[0] != 0 || transitions[length-1] != 0x110000) {
143             throw new IllegalArgumentException("Invariant failed: bounds set wrong");
144         }
145         for (int i = 1; i < length-1; ++i) {
146             if (transitions[i-1] >= transitions[i]) {
147                 throw new IllegalArgumentException("Invariant failed: not monotonic"
148                         + "\t" + Utility.hex(i-1) + ": " + transitions[i-1]
149                                                                        + "\t" + Utility.hex(i) + ": " + transitions[i]
150                 );
151             }
152         }
153     }
154 
155     /**
156      * Finds an index such that inversionList[i] <= codepoint < inversionList[i+1]
157      * Assumes that 0 <= codepoint <= 0x10FFFF
158      * @param codepoint
159      * @return the index
160      */
_findIndex(int c)161     private int _findIndex(int c) {
162         int lo = 0;
163         int hi = length - 1;
164         int i = (lo + hi) >>> 1;
165         // invariant: c >= list[lo]
166         // invariant: c < list[hi]
167         while (i != lo) {
168             if (c < transitions[i]) {
169                 hi = i;
170             } else {
171                 lo = i;
172             }
173             i = (lo + hi) >>> 1;
174         }
175         if (ASSERTIONS) _checkFind(c, lo);
176         return lo;
177     }
178 
_checkFind(int codepoint, int value)179     private void _checkFind(int codepoint, int value) {
180         int other = __findIndex(codepoint);
181         if (other != value) {
182             throw new IllegalArgumentException("Invariant failed: binary search"
183                     + "\t" + Utility.hex(codepoint) + ": " + value
184                     + "\tshould be: " + other);
185         }
186     }
187 
__findIndex(int codepoint)188     private int __findIndex(int codepoint) {
189         for (int i = length-1; i > 0; --i) {
190             if (transitions[i] <= codepoint) return i;
191         }
192         return 0;
193     }
194 
195     /*
196      * Try indexed lookup
197 
198     static final int SHIFT = 8;
199     int[] starts = new int[0x10FFFF>>SHIFT]; // lowest transition index where codepoint>>x can be found
200     boolean startsValid = false;
201     private int findIndex(int codepoint) {
202         if (!startsValid) {
203             int start = 0;
204             for (int i = 1; i < length; ++i) {
205 
206             }
207         }
208         for (int i = length-1; i > 0; --i) {
209            if (transitions[i] <= codepoint) return i;
210        }
211        return 0;
212    }
213      */
214 
215     /**
216      * Remove the items from index through index+count-1.
217      * Logically reduces the size of the internal arrays.
218      * @param index
219      * @param count
220      */
_removeAt(int index, int count)221     private void _removeAt(int index, int count) {
222         for (int i = index + count; i < length; ++i) {
223             transitions[i-count] = transitions[i];
224             values[i-count] = values[i];
225         }
226         length -= count;
227     }
228     /**
229      * Add a gap from index to index+count-1.
230      * The values there are undefined, and must be set.
231      * Logically grows arrays to accomodate. Actual growth is limited
232      * @param index
233      * @param count
234      */
_insertGapAt(int index, int count)235     private void _insertGapAt(int index, int count) {
236         int newLength = length + count;
237         int[] oldtransitions = transitions;
238         T[] oldvalues = values;
239         if (newLength > transitions.length) {
240             int allocation = (int) (GROWTH_GAP + (newLength * GROWTH_PERCENT) / 100);
241             transitions = new int[allocation];
242             values = (T[]) new Object[allocation];
243             for (int i = 0; i < index; ++i) {
244                 transitions[i] = oldtransitions[i];
245                 values[i] = oldvalues[i];
246             }
247         }
248         for (int i = length - 1; i >= index; --i) {
249             transitions[i+count] = oldtransitions[i];
250             values[i+count] = oldvalues[i];
251         }
252         length = newLength;
253     }
254 
255     /**
256      * Associates code point with value. Removes any previous association.
257      * All code that calls this MUST check for frozen first!
258      * @param codepoint
259      * @param value
260      * @return this, for chaining
261      */
_put(int codepoint, T value)262     private UnicodeMap _put(int codepoint, T value) {
263         // Warning: baseIndex is an invariant; must
264         // be defined such that transitions[baseIndex] < codepoint
265         // at end of this routine.
266         int baseIndex;
267         if (transitions[lastIndex] <= codepoint
268                 && codepoint < transitions[lastIndex+1]) {
269             baseIndex = lastIndex;
270         } else {
271             baseIndex = _findIndex(codepoint);
272         }
273         int limitIndex = baseIndex + 1;
274         // cases are (a) value is already set
275         if (areEqual(values[baseIndex], value)) return this;
276         if (locked) {
277             throw new UnsupportedOperationException("Attempt to modify locked object");
278         }
279         if (errorOnReset && values[baseIndex] != null) {
280             throw new UnsupportedOperationException("Attempt to reset value for " + Utility.hex(codepoint)
281                     + " when that is disallowed. Old: " + values[baseIndex] + "; New: " + value);
282         }
283 
284         // adjust the available values
285         staleAvailableValues = true;
286         availableValues.add(value); // add if not there already
287 
288         int baseCP = transitions[baseIndex];
289         int limitCP = transitions[limitIndex];
290         // we now start walking through the difference case,
291         // based on whether we are at the start or end of range
292         // and whether the range is a single character or multiple
293 
294         if (baseCP == codepoint) {
295             // CASE: At very start of range
296             boolean connectsWithPrevious =
297                 baseIndex != 0 && areEqual(value, values[baseIndex-1]);
298 
299             if (limitCP == codepoint + 1) {
300                 // CASE: Single codepoint range
301                 boolean connectsWithFollowing =
302                     baseIndex < length - 2 && areEqual(value, values[limitIndex]); // was -1
303 
304                 if (connectsWithPrevious) {
305                     // A1a connects with previous & following, so remove index
306                     if (connectsWithFollowing) {
307                         _removeAt(baseIndex, 2);
308                     } else {
309                         _removeAt(baseIndex, 1); // extend previous
310                     }
311                     --baseIndex; // fix up
312                 } else if (connectsWithFollowing) {
313                     _removeAt(baseIndex, 1); // extend following backwards
314                     transitions[baseIndex] = codepoint;
315                 } else {
316                     // doesn't connect on either side, just reset
317                     values[baseIndex] = value;
318                 }
319             } else if (connectsWithPrevious) {
320                 // A.1: start of multi codepoint range
321                 // if connects
322                 ++transitions[baseIndex]; // extend previous
323             } else {
324                 // otherwise insert new transition
325                 transitions[baseIndex] = codepoint+1; // fix following range
326                 _insertGapAt(baseIndex, 1);
327                 values[baseIndex] = value;
328                 transitions[baseIndex] = codepoint;
329             }
330         } else if (limitCP == codepoint + 1) {
331             // CASE: at end of range
332             // if connects, just back up range
333             boolean connectsWithFollowing =
334                 baseIndex < length - 2 && areEqual(value, values[limitIndex]); // was -1
335 
336             if (connectsWithFollowing) {
337                 --transitions[limitIndex];
338                 return this;
339             } else {
340                 _insertGapAt(limitIndex, 1);
341                 transitions[limitIndex] = codepoint;
342                 values[limitIndex] = value;
343             }
344         } else {
345             // CASE: in middle of range
346             // insert gap, then set the new range
347             _insertGapAt(++baseIndex,2);
348             transitions[baseIndex] = codepoint;
349             values[baseIndex] = value;
350             transitions[baseIndex+1] = codepoint + 1;
351             values[baseIndex+1] = values[baseIndex-1]; // copy lower range values
352         }
353         lastIndex = baseIndex; // store for next time
354         return this;
355     }
356 
357     private UnicodeMap _putAll(int startCodePoint, int endCodePoint, T value) {
358         // TODO optimize
359         for (int i = startCodePoint; i <= endCodePoint; ++i) {
360             _put(i, value);
361             if (ASSERTIONS) _checkInvariants();
362         }
363         return this;
364     }
365 
366     /**
367      * Sets the codepoint value.
368      * @param codepoint
369      * @param value
370      * @return this (for chaining)
371      */
372     public UnicodeMap<T> put(int codepoint, T value) {
373         if (codepoint < 0 || codepoint > 0x10FFFF) {
374             throw new IllegalArgumentException("Codepoint out of range: " + codepoint);
375         }
376         _put(codepoint, value);
377         if (ASSERTIONS) _checkInvariants();
378         return this;
379     }
380 
381     /**
382      * Sets the codepoint value.
383      * @param codepoint
384      * @param value
385      * @return this (for chaining)
386      */
387     public UnicodeMap<T> put(String string, T value) {
388         int v = UnicodeSet.getSingleCodePoint(string);
389         if (v == Integer.MAX_VALUE) {
390             if (locked) {
391                 throw new UnsupportedOperationException("Attempt to modify locked object");
392             }
393             if (value != null) {
394                 if (stringMap == null) {
395                     stringMap = new TreeMap<String,T>();
396                 }
397                 stringMap.put(string, value);
398                 staleAvailableValues = true;
399             } else if (stringMap != null) {
400                 if (stringMap.remove(string) != null) {
401                     staleAvailableValues = true;
402                 }
403             }
404             return this;
405         }
406         return put(v, value);
407     }
408 
409     /**
410      * Adds bunch o' codepoints; otherwise like put.
411      * @param codepoints
412      * @param value
413      * @return this (for chaining)
414      */
415     public UnicodeMap<T> putAll(UnicodeSet codepoints, T value) {
416         UnicodeSetIterator it = new UnicodeSetIterator(codepoints);
417         while (it.nextRange()) {
418             if (it.string == null) {
419                 _putAll(it.codepoint, it.codepointEnd, value);
420             } else {
421                 put(it.string, value);
422             }
423         }
424         return this;
425     }
426 
427     /**
428      * Adds bunch o' codepoints; otherwise like add.
429      * @param startCodePoint
430      * @param endCodePoint
431      * @param value
432      * @return this (for chaining)
433      */
434     public UnicodeMap<T> putAll(int startCodePoint, int endCodePoint, T value) {
435         if (locked) {
436             throw new UnsupportedOperationException("Attempt to modify locked object");
437         }
438         if (startCodePoint < 0 || endCodePoint > 0x10FFFF) {
439             throw new IllegalArgumentException("Codepoint out of range: "
440                     + Utility.hex(startCodePoint) + ".." + Utility.hex(endCodePoint));
441         }
442         return _putAll(startCodePoint, endCodePoint, value);
443     }
444 
445     /**
446      * Add all the (main) values from a UnicodeMap
447      * @param unicodeMap the property to add to the map
448      * @return this (for chaining)
449      */
450     public UnicodeMap<T> putAll(UnicodeMap<T> unicodeMap) {
451         for (int i = 0; i < unicodeMap.length; ++i) {
452             T value = unicodeMap.values[i];
453             if (value != null) {
454                 _putAll(unicodeMap.transitions[i], unicodeMap.transitions[i+1]-1, value);
455             }
456             if (ASSERTIONS) _checkInvariants();
457         }
458         if (unicodeMap.stringMap != null && !unicodeMap.stringMap.isEmpty()) {
459             if (stringMap == null) {
460                 stringMap = new TreeMap<String,T>();
461             }
462             stringMap.putAll(unicodeMap.stringMap);
463         }
464         return this;
465     }
466 
467     /**
468      * Add all the (main) values from a Unicode property
469      * @param prop the property to add to the map
470      * @return this (for chaining)
471      */
472     public UnicodeMap<T> putAllFiltered(UnicodeMap<T> prop, UnicodeSet filter) {
473         // TODO optimize
474         for (UnicodeSetIterator it = new UnicodeSetIterator(filter); it.next();) {
475             if (it.codepoint != UnicodeSetIterator.IS_STRING) {
476                 T value = prop.getValue(it.codepoint);
477                 if (value != null) {
478                     _put(it.codepoint, value);
479                 }
480             }
481         }
482         // now do the strings
483         for (String key : filter.strings()) {
484             T value = prop.get(key);
485             if (value != null) {
486                 put(key, value);
487             }
488         }
489         return this;
490     }
491 
492     /**
493      * Set the currently unmapped Unicode code points to the given value.
494      * @param value the value to set
495      * @return this (for chaining)
496      */
497     public UnicodeMap<T> setMissing(T value) {
498         // fast path, if value not yet present
499         if (!getAvailableValues().contains(value)) {
500             staleAvailableValues = true;
501             availableValues.add(value);
502             for (int i = 0; i < length; ++i) {
503                 if (values[i] == null) values[i] = value;
504             }
505             return this;
506         } else {
507             return putAll(keySet(null), value);
508         }
509     }
510     /**
511      * Returns the keyset consisting of all the keys that would produce the given value. Deposits into
512      * result if it is not null. Remember to clear if you just want
513      * the new values.
514      */
515     public UnicodeSet keySet(T value, UnicodeSet result) {
516         if (result == null) result = new UnicodeSet();
517         for (int i = 0; i < length - 1; ++i) {
518             if (areEqual(value, values[i])) {
519                 result.add(transitions[i], transitions[i+1]-1);
520             }
521         }
522         if (value != null && stringMap != null) {
523             for (String key : stringMap.keySet()) {
524                 T newValue = stringMap.get(key);
525                 if (value.equals(newValue)) {
526                     result.add((String)key);
527                 }
528             }
529         }
530         return result;
531     }
532 
533     /**
534      * Returns the keyset consisting of all the keys that would produce the given value.
535      * the new values.
536      */
537     public UnicodeSet keySet(T value) {
538         return keySet(value,null);
539     }
540 
541     /**
542      * Returns the keyset consisting of all the keys that would produce (non-null) values.
543      */
544     public UnicodeSet keySet() {
545         UnicodeSet result = new UnicodeSet();
546         for (int i = 0; i < length - 1; ++i) {
547             if (values[i] != null) {
548                 result.add(transitions[i], transitions[i+1]-1);
549             }
550         }
551         if (stringMap != null) {
552             result.addAll(stringMap.keySet());
553         }
554         return result;
555     }
556 
557     /**
558      * Returns the list of possible values. Deposits each non-null value into
559      * result. Creates result if it is null. Remember to clear result if
560      * you are not appending to existing collection.
561      * @param result
562      * @return result
563      */
564     public <U extends Collection<T>> U values(U result) {
565         if (staleAvailableValues) {
566             // collect all the current values
567             // retain them in the availableValues
568             Set<T> temp = new HashSet<T>();
569             for (int i = 0; i < length - 1; ++i) {
570                 if (values[i] != null) temp.add(values[i]);
571             }
572             availableValues.retainAll(temp);
573             if (stringMap != null) {
574                 availableValues.addAll(stringMap.values());
575             }
576             staleAvailableValues = false;
577         }
578         if (result == null) {
579             result = (U) new ArrayList<T>(availableValues.size());
580         }
581         result.addAll(availableValues);
582         return result;
583     }
584 
585     /**
586      * Convenience method
587      */
588     public Collection<T> values() {
589         return getAvailableValues(null);
590     }
591     /**
592      * Gets the value associated with a given code point.
593      * Returns null, if there is no such value.
594      * @param codepoint
595      * @return the value
596      */
597     public T get(int codepoint) {
598         if (codepoint < 0 || codepoint > 0x10FFFF) {
599             throw new IllegalArgumentException("Codepoint out of range: " + codepoint);
600         }
601         return values[_findIndex(codepoint)];
602     }
603 
604     /**
605      * Gets the value associated with a given code point.
606      * Returns null, if there is no such value.
607      * @param codepoint
608      * @return the value
609      */
610     public T get(String value) {
611         if (UTF16.hasMoreCodePointsThan(value, 1)) {
612             if (stringMap == null) {
613                 return null;
614             }
615             return stringMap.get(value);
616         }
617         return getValue(UTF16.charAt(value, 0));
618     }
619 
620 
621     /**
622      * Change a new string from the source string according to the mappings.
623      * For each code point cp, if getValue(cp) is null, append the character, otherwise append getValue(cp).toString()
624      * TODO: extend to strings
625      * @param source
626      * @return
627      */
628     public String transform(String source) {
629         StringBuffer result = new StringBuffer();
630         int cp;
631         for (int i = 0; i < source.length(); i += UTF16.getCharCount(cp)) {
632             cp = UTF16.charAt(source, i);
633             T mResult = getValue(cp);
634             if (mResult != null) {
635                 result.append(mResult);
636             } else {
637                 UTF16.append(result, cp);
638             }
639         }
640         return result.toString();
641     }
642 
643     /**
644      * Used to add complex values, where the value isn't replaced but in some sense composed
645      * @author markdavis
646      */
647     public abstract static class Composer<T> {
648         /**
649          * This will be called with either a string or a code point. The result is the new value for that item.
650          * If the codepoint is used, the string is null; if the string is used, the codepoint is -1.
651          * @param a
652          * @param b
653          */
654         public abstract T compose(int codePoint, String string, T a, T b);
655     }
656 
657     public UnicodeMap<T> composeWith(UnicodeMap<T> other, Composer<T> composer) {
658         for (T value : other.getAvailableValues()) {
659             UnicodeSet set = other.keySet(value);
660             composeWith(set, value, composer);
661         }
662         return this;
663     }
664 
665     public UnicodeMap<T> composeWith(UnicodeSet set, T value, Composer<T> composer) {
666         for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.next();) {
667             int i = it.codepoint;
668             if (i == UnicodeSetIterator.IS_STRING) {
669                 String s = it.string;
670                 T v1 = getValue(s);
671                 T v3 = composer.compose(-1, s, v1, value);
672                 if (v1 != v3 && (v1 == null || !v1.equals(v3))) {
673                     put(s, v3);
674                 }
675             } else {
676                 T v1 = getValue(i);
677                 T v3 = composer.compose(i, null, v1, value);
678                 if (v1 != v3 && (v1 == null || !v1.equals(v3))) {
679                     put(i, v3);
680                 }
681             }
682         }
683         return this;
684     }
685 
686     public String toString() {
687         return toString(null);
688     }
689 
690     public String toString(Comparator<T> collected) {
691         StringBuffer result = new StringBuffer();
692         if (collected == null) {
693             for (int i = 0; i < length-1; ++i) {
694                 T value = values[i];
695                 if (value == null) continue;
696                 int start = transitions[i];
697                 int end = transitions[i+1]-1;
698                 result.append(Utility.hex(start));
699                 if (start != end) result.append("-").append(Utility.hex(end));
700                 result.append("=").append(value.toString()).append("\n");
701             }
702             if (stringMap != null) {
703                 for (String s : stringMap.keySet()) {
704                     result.append(Utility.hex(s)).append("=").append(stringMap.get(s).toString()).append("\n");
705                 }
706             }
707         } else {
708             Set<T> set = values(new TreeSet<T>(collected));
709             for (Iterator<T> it = set.iterator(); it.hasNext();) {
710                 T value = it.next();
711                 UnicodeSet s = keySet(value);
712                 result.append(value).append("=").append(s.toString()).append("\n");
713             }
714         }
715         return result.toString();
716     }
717     /**
718      * @return Returns the errorOnReset value.
719      */
720     public boolean getErrorOnReset() {
721         return errorOnReset;
722     }
723     /**
724      * Puts the UnicodeMap into a state whereby new mappings are accepted, but changes to old mappings cause an exception.
725      * @param errorOnReset The errorOnReset to set.
726      */
727     public UnicodeMap<T> setErrorOnReset(boolean errorOnReset) {
728         this.errorOnReset = errorOnReset;
729         return this;
730     }
731 
732     /* (non-Javadoc)
733      * @see com.ibm.icu.dev.test.util.Freezable#isFrozen()
734      */
735     public boolean isFrozen() {
736         // TODO Auto-generated method stub
737         return locked;
738     }
739 
740     /* (non-Javadoc)
741      * @see com.ibm.icu.dev.test.util.Freezable#lock()
742      */
743     public UnicodeMap<T> freeze() {
744         locked = true;
745         return this;
746     }
747 
748     /**
749      * Utility to find the maximal common prefix of two strings.
750      * TODO: fix supplemental support
751      */
752     static public int findCommonPrefix(String last, String s) {
753         int minLen = Math.min(last.length(), s.length());
754         for (int i = 0; i < minLen; ++i) {
755             if (last.charAt(i) != s.charAt(i)) return i;
756         }
757         return minLen;
758     }
759 
760     /**
761      * Get the number of ranges; used for getRangeStart/End. The ranges together cover all of the single-codepoint keys in the UnicodeMap. Other keys can be gotten with getStrings().
762      */
763     public int getRangeCount() {
764         return length-1;
765     }
766 
767     /**
768      * Get the start of a range. All code points between start and end are in the UnicodeMap's keyset.
769      */
770     public int getRangeStart(int range) {
771         return transitions[range];
772     }
773 
774     /**
775      * Get the start of a range. All code points between start and end are in the UnicodeMap's keyset.
776      */
777     public int getRangeEnd(int range) {
778         return transitions[range+1] - 1;
779     }
780 
781     /**
782      * Get the value for the range.
783      */
784     public T getRangeValue(int range) {
785         return values[range];
786     }
787 
788     /**
789      * Get the strings that are not in the ranges. Returns null if there are none.
790      * @return
791      */
792     public Set<String> getNonRangeStrings() {
793         if (stringMap == null || stringMap.isEmpty()) {
794             return null;
795         }
796         return Collections.unmodifiableSet(stringMap.keySet());
797     }
798 
799     static final boolean DEBUG_WRITE = false;
800 
801     /* (non-Javadoc)
802      * @see java.util.Map#containsKey(java.lang.Object)
803      */
804     public boolean containsKey(String key) {
805         return getValue(key) != null;
806     }
807 
808     /* (non-Javadoc)
809      * @see java.util.Map#containsKey(java.lang.Object)
810      */
811     public boolean containsKey(int key) {
812         return getValue(key) != null;
813     }
814 
815     /* (non-Javadoc)
816      * @see java.util.Map#containsValue(java.lang.Object)
817      */
818     public boolean containsValue(T value) {
819         // TODO Optimize
820         return getAvailableValues().contains(value);
821     }
822 
823     /* (non-Javadoc)
824      * @see java.util.Map#isEmpty()
825      */
826     public boolean isEmpty() {
827         return size() == 0;
828     }
829 
830     /* (non-Javadoc)
831      * @see java.util.Map#putAll(java.util.Map)
832      */
833     public UnicodeMap<T> putAll(Map<? extends String, ? extends T> map) {
834         for (String key : map.keySet()) {
835             put(key,map.get(key));
836         }
837         return this;
838     }
839 
840     /**
841      * Utility for extracting map
842      */
843     public UnicodeMap<T> putAllIn(Map<? super String, ? super T> map) {
844         for (String key : keySet()) {
845             map.put(key, get(key));
846         }
847         return this;
848     }
849 
850     /* (non-Javadoc)
851      * @see java.util.Map#remove(java.lang.Object)
852      */
853     public UnicodeMap<T> remove(String key) {
854         return put(key, null);
855     }
856 
857     /* (non-Javadoc)
858      * @see java.util.Map#remove(java.lang.Object)
859      */
860     public UnicodeMap<T> remove(int key) {
861         return put(key, null);
862     }
863 
864     /* (non-Javadoc)
865      * @see java.util.Map#size()
866      */
867     public int size() {
868         int result = stringMap == null ? 0 : stringMap.size();
869         for (int i = 0; i < length-1; ++i) {
870             T value = values[i];
871             if (value == null) continue;
872             result += transitions[i+1] - transitions[i];
873         }
874         return result;
875     }
876 
877     /* (non-Javadoc)
878      * @see java.util.Map#entrySet()
879      */
880     public Iterable<Entry<String,T>> entrySet() {
881         return new EntrySetX();
882     }
883 
884     private class EntrySetX implements Iterable<Entry<String, T>> {
885         public Iterator<Entry<String, T>> iterator() {
886             return new IteratorX();
887         }
888         public String toString() {
889             StringBuffer b = new StringBuffer();
890             for (Iterator it = iterator(); it.hasNext();) {
891                 Object item = it.next();
892                 b.append(item.toString()).append(' ');
893             }
894             return b.toString();
895         }
896     }
897 
898     private class IteratorX implements Iterator<Entry<String, T>> {
899         Iterator<String> iterator = keySet().iterator();
900 
901         /* (non-Javadoc)
902          * @see java.util.Iterator#hasNext()
903          */
904         public boolean hasNext() {
905             return iterator.hasNext();
906         }
907 
908         /* (non-Javadoc)
909          * @see java.util.Iterator#next()
910          */
911         public Entry<String, T> next() {
912             String key = iterator.next();
913             return new ImmutableEntry(key, get(key));
914         }
915 
916         /* (non-Javadoc)
917          * @see java.util.Iterator#remove()
918          */
919         public void remove() {
920             throw new UnsupportedOperationException();
921         }
922 
923     }
924 
925     /**
926      * Struct-like class used to iterate over a UnicodeMap in a for loop.
927      * If the value is a string, then codepoint == codepointEnd == -1. Otherwise the string is null;
928      * Caution: The contents may change during the iteration!
929      */
930     public static class EntryRange<T> {
931         public int codepoint;
932         public int codepointEnd;
933         public String string;
934         public T value;
935         @Override
936         public String toString() {
937             return (string != null ? Utility.hex(string)
938                     : Utility.hex(codepoint) + (codepoint == codepointEnd ? "" : ".." + Utility.hex(codepointEnd)))
939                     + "=" + value;
940         }
941     }
942 
943     /**
944      * Returns an Iterable over EntryRange, designed for efficient for loops over UnicodeMaps.
945      * Caution: For efficiency, the EntryRange may be reused, so the EntryRange may change on each iteration!
946      * The value is guaranteed never to be null.
947      * @return entry range, for for loops
948      */
949     public Iterable<EntryRange<T>> entryRanges() {
950         return new EntryRanges();
951     }
952 
953     private class EntryRanges implements Iterable<EntryRange<T>>, Iterator<EntryRange<T>> {
954         private int pos;
955         private EntryRange<T> result = new EntryRange<T>();
956         private int lastRealRange = values[length-2] == null ? length - 2 : length - 1;
957         private Iterator<Entry<String, T>> stringIterator = stringMap == null ? null : stringMap.entrySet().iterator();
958 
959         public Iterator<EntryRange<T>> iterator() {
960             return this;
961         }
962         public boolean hasNext() {
963             return pos < lastRealRange || (stringIterator != null && stringIterator.hasNext());
964         }
965         public EntryRange<T> next() {
966             // a range may be null, but then the next one must not be (except the final range)
967             if (pos < lastRealRange) {
968                 T temp = values[pos];
969                 if (temp == null) {
970                     temp = values[++pos];
971                 }
972                 result.codepoint = transitions[pos];
973                 result.codepointEnd = transitions[pos+1]-1;
974                 result.string = null;
975                 result.value = temp;
976                 ++pos;
977             } else {
978                 Entry<String, T> entry = stringIterator.next();
979                 result.codepoint = result.codepointEnd = -1;
980                 result.string = entry.getKey();
981                 result.value = entry.getValue();
982             }
983             return result;
984         }
985         public void remove() {
986             throw new UnsupportedOperationException();
987         }
988     }
989 
990     /* (non-Javadoc)
991      * @see java.lang.Iterable#iterator()
992      */
993     public Iterator<String> iterator() {
994         return keySet().iterator();
995     }
996 
997     /**
998      * Old form for compatibility
999      */
1000     public T getValue(String key) {
1001         return get(key);
1002     }
1003 
1004     /**
1005      * Old form for compatibility
1006      */
1007     public T getValue(int key) {
1008         // TODO Auto-generated method stub
1009         return get(key);
1010     }
1011 
1012     /**
1013      * Old form for compatibility
1014      */
1015     public Collection<T> getAvailableValues() {
1016         return values();
1017     }
1018 
1019     /**
1020      * Old form for compatibility
1021      */
1022     public <U extends Collection<T>> U getAvailableValues(U result) {
1023         return values(result);
1024     }
1025 
1026     /**
1027      * Old form for compatibility
1028      */
1029     public UnicodeSet getSet(T value) {
1030         return keySet(value);
1031     }
1032 
1033     /**
1034      * Old form for compatibility
1035      */
1036     public UnicodeSet getSet(T value, UnicodeSet result) {
1037         return keySet(value, result);
1038     }
1039 
1040     // This is to support compressed serialization. It works; just commented out for now as we shift to Generics
1041     // TODO Fix once generics are cleaned up.
1042     //    // TODO Fix to serialize more than just strings.
1043     //    // Only if all the items are strings will we do the following compression
1044     //    // Otherwise we'll just use Java Serialization, bulky as it is
1045     //    public void writeExternal(ObjectOutput out1) throws IOException {
1046     //        DataOutputCompressor sc = new DataOutputCompressor(out1);
1047     //        // if all objects are strings
1048     //        Collection<T> availableVals = getAvailableValues();
1049     //        boolean allStrings = allAreString(availableVals);
1050     //        sc.writeBoolean(allStrings);
1051     //        Map object_index = new LinkedHashMap();
1052     //        if (allAreString(availableVals)) {
1053     //            sc.writeStringSet(new TreeSet(availableVals), object_index);
1054     //        } else {
1055     //            sc.writeCollection(availableVals, object_index);
1056     //        }
1057     //        sc.writeUInt(length);
1058     //        int lastTransition = -1;
1059     //        int lastValueNumber = 0;
1060     //        if (DEBUG_WRITE) System.out.println("Trans count: " + length);
1061     //        for (int i = 0; i < length; ++i) {
1062     //            int valueNumber = ((Integer)object_index.get(values[i])).intValue();
1063     //            if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + valueNumber);
1064     //
1065     //            int deltaTransition = transitions[i] - lastTransition;
1066     //            lastTransition = transitions[i];
1067     //            int deltaValueNumber = valueNumber - lastValueNumber;
1068     //            lastValueNumber = valueNumber;
1069     //
1070     //            deltaValueNumber <<= 1; // make room for one bit
1071     //            boolean canCombine = deltaTransition == 1;
1072     //            if (canCombine) deltaValueNumber |= 1;
1073     //            sc.writeInt(deltaValueNumber);
1074     //            if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + deltaValueNumber);
1075     //            if (!canCombine) {
1076     //                sc.writeUInt(deltaTransition);
1077     //                if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition);
1078     //            }
1079     //        }
1080     //        sc.flush();
1081     //    }
1082     //
1083     //    /**
1084     //     *
1085     //     */
1086     //    private boolean allAreString(Collection<T> availableValues2) {
1087     //        //if (true) return false;
1088     //        for (Iterator<T> it = availableValues2.iterator(); it.hasNext();) {
1089     //            if (!(it.next() instanceof String)) return false;
1090     //        }
1091     //        return true;
1092     //    }
1093     //
1094     //    public void readExternal(ObjectInput in1) throws IOException, ClassNotFoundException {
1095     //        DataInputCompressor sc = new DataInputCompressor(in1);
1096     //        boolean allStrings = sc.readBoolean();
1097     //        T[] valuesList;
1098     //        availableValues = new LinkedHashSet();
1099     //        if (allStrings) {
1100     //            valuesList = sc.readStringSet(availableValues);
1101     //        } else {
1102     //            valuesList = sc.readCollection(availableValues);
1103     //        }
1104     //        length = sc.readUInt();
1105     //        transitions = new int[length];
1106     //        if (DEBUG_WRITE) System.out.println("Trans count: " + length);
1107     //        values = (T[]) new Object[length];
1108     //        int currentTransition = -1;
1109     //        int currentValue = 0;
1110     //        int deltaTransition;
1111     //        for (int i = 0; i < length; ++i) {
1112     //            int temp = sc.readInt();
1113     //            if (DEBUG_WRITE) System.out.println("deltaValueNumber: " + temp);
1114     //            boolean combined = (temp & 1) != 0;
1115     //            temp >>= 1;
1116     //        values[i] = valuesList[currentValue += temp];
1117     //        if (!combined) {
1118     //            deltaTransition = sc.readUInt();
1119     //            if (DEBUG_WRITE) System.out.println("deltaTransition: " + deltaTransition);
1120     //        } else {
1121     //            deltaTransition = 1;
1122     //        }
1123     //        transitions[i] = currentTransition += deltaTransition; // delta value
1124     //        if (DEBUG_WRITE) System.out.println("Trans: " + transitions[i] + ",\t" + currentValue);
1125     //        }
1126     //    }
1127 
1128     public final UnicodeMap<T> removeAll(UnicodeSet set) {
1129         return putAll(set, null);
1130     }
1131 
1132     public final UnicodeMap<T> removeAll(UnicodeMap<T> reference) {
1133         return removeRetainAll(reference, true);
1134     }
1135 
1136     public final UnicodeMap<T> retainAll(UnicodeSet set) {
1137         UnicodeSet toNuke = new UnicodeSet();
1138         // TODO Optimize
1139         for (EntryRange<T> ae : entryRanges()) {
1140             if (ae.string != null) {
1141                 if (!set.contains(ae.string)) {
1142                     toNuke.add(ae.string);
1143                 }
1144             } else {
1145                 for (int i = ae.codepoint; i <= ae.codepointEnd; ++i) {
1146                     if (!set.contains(i)) {
1147                         toNuke.add(i);
1148                     }
1149                 }
1150             }
1151         }
1152         return putAll(toNuke, null);
1153     }
1154 
1155     public final UnicodeMap<T> retainAll(UnicodeMap<T> reference) {
1156         return removeRetainAll(reference, false);
1157     }
1158 
1159     private final UnicodeMap<T> removeRetainAll(UnicodeMap<T> reference, boolean remove) {
1160         UnicodeSet toNuke = new UnicodeSet();
1161         // TODO Optimize
1162         for (EntryRange<T> ae : entryRanges()) {
1163             if (ae.string != null) {
1164                 if (ae.value.equals(reference.get(ae.string)) == remove) {
1165                     toNuke.add(ae.string);
1166                 }
1167             } else {
1168                 for (int i = ae.codepoint; i <= ae.codepointEnd; ++i) {
1169                     if (ae.value.equals(reference.get(i)) == remove) {
1170                         toNuke.add(i);
1171                     }
1172                 }
1173             }
1174         }
1175         return putAll(toNuke, null);
1176     }
1177 
1178     /**
1179      * Returns the keys that consist of multiple code points.
1180      * @return
1181      */
1182     public Set<String> stringKeys() {
1183         return stringMap.keySet();
1184     }
1185 }
1186