1 /*
2  **********************************************************************
3  * Copyright (c) 2006-2007, Google and others.  All Rights Reserved.
4  **********************************************************************
5  * Author: Mark Davis
6  **********************************************************************
7  */
8 package org.unicode.cldr.util;
9 
10 import java.util.ArrayList;
11 import java.util.Collection;
12 import java.util.Comparator;
13 import java.util.HashMap;
14 import java.util.HashSet;
15 import java.util.Map;
16 import java.util.Set;
17 import java.util.TreeSet;
18 
19 /**
20  * Provide way to associate each of a set of T objects with an integer, where
21  * the integer can be used to get the object. All objects are immutable, and
22  * created with a corresponding factory object. The integers have no relation to
23  * the order in the original set. The integers may not be compact (eg 0..n).
24  *
25  * @param <T>
26  */
27 public abstract class IntMap<T> {
28 
29     public interface IntMapFactory<T> {
make(Collection<T> values)30         public IntMap<T> make(Collection<T> values);
31     }
32 
get(int index)33     public abstract T get(int index);
34 
getValueMap(Map<T, Integer> output)35     public abstract Map<T, Integer> getValueMap(Map<T, Integer> output);
36 
getValueMap()37     public Map<T, Integer> getValueMap() {
38         return getValueMap(new HashMap<T, Integer>());
39     }
40 
approximateStorage()41     public abstract int approximateStorage();
42 
toString()43     public String toString() {
44         return getValueMap().toString();
45     }
46 
47     // Concrete classes, embedded for simplicity
48 
49     public static class BasicIntMap<T> extends IntMap<T> {
50         private final T[] intToValue;
51 
BasicIntMap(T[] intToValue)52         private BasicIntMap(T[] intToValue) {
53             this.intToValue = intToValue;
54         }
55 
get(int index)56         public T get(int index) {
57             return intToValue[index];
58         }
59 
getValueMap(Map<T, Integer> output)60         public Map<T, Integer> getValueMap(Map<T, Integer> output) {
61             for (int i = 0; i < intToValue.length; ++i) {
62                 output.put(intToValue[i], i);
63             }
64             return output;
65         }
66 
approximateStorage()67         public int approximateStorage() {
68             int size = OBJECT_OVERHEAD;
69             for (T item : intToValue) {
70                 size += 4; // size of int
71                 size += item.toString().length() * 2 + STRING_OVERHEAD;
72             }
73             return size;
74         }
75     }
76 
77     public static class BasicIntMapFactory<T> implements IntMapFactory<T> {
78         @SuppressWarnings("unchecked")
make(Collection<T> values)79         public BasicIntMap<T> make(Collection<T> values) {
80             return new BasicIntMap<T>((T[]) new ArrayList<T>(new HashSet<T>(values)).toArray());
81         }
82     }
83 
84     /**
85      * Stores short strings (255 or less) in compacted fashion. The number of
86      * strings is also limited: in the worst case to 2^24 / total number of UTF-8
87      * bytes
88      *
89      * @author markdavis
90      */
91     public static class CompactStringIntMap extends IntMap<String> {
92         private final String data;
93         private final int[] intToValue;
94 
CompactStringIntMap(String data, int[] intToValue)95         private CompactStringIntMap(String data, int[] intToValue) {
96             this.data = data;
97             this.intToValue = intToValue;
98         }
99 
get(int index)100         public String get(int index) {
101             // the packedIndex stores the string as an index in the top 24 bits, and length in the bottom 8.
102             int packedIndex = intToValue[index];
103             int len = packedIndex & 0xFF;
104             int dataIndex = packedIndex >>> 8;
105             return data.substring(dataIndex, dataIndex + len);
106         }
107 
getValueMap(Map<String, Integer> output)108         public Map<String, Integer> getValueMap(Map<String, Integer> output) {
109             for (int i = 0; i < intToValue.length; ++i) {
110                 output.put(get(i), i);
111             }
112             return output;
113         }
114 
approximateStorage()115         public int approximateStorage() {
116             int size = OBJECT_OVERHEAD + POINTER_OVERHEAD * 2;
117             size += data.length() * 2 + STRING_OVERHEAD;
118             size += 4 * intToValue.length;
119             return size;
120         }
121     }
122 
123     public static final Comparator<String> LONGEST_FIRST_COMPARATOR = new Comparator<String>() {
124         public int compare(String a, String b) {
125             return a.length() > b.length() ? -1
126                 : a.length() < b.length() ? 1
127                     : a.compareTo(b);
128         }
129     };
130 
131     public static class CompactStringIntMapFactory implements IntMapFactory<String> {
make(Collection<String> values)132         public CompactStringIntMap make(Collection<String> values) {
133             // first sort, longest first
134             Set<String> sorted = new TreeSet<String>(LONGEST_FIRST_COMPARATOR);
135             sorted.addAll(values);
136 
137             StringBuilder data = new StringBuilder();
138             int[] intToValue = new int[sorted.size()];
139             int count = 0;
140 
141             // compact the values
142             // the packedIndex stores the string as an index in the top 24 bits, and length in the bottom 8.
143             for (String string : sorted) {
144                 if (string.length() > 255) {
145                     throw new IllegalArgumentException(
146                         "String too large: CompactStringIntMapFactory only handles strings up to 255 in length");
147                 }
148                 int position = data.indexOf(string);
149                 if (position < 0) { // add if not there
150                     position = data.length();
151                     data.append(string);
152                 }
153                 intToValue[count++] = (position << 8) | string.length();
154             }
155             return new CompactStringIntMap(data.toString(), intToValue);
156         }
157     }
158 
159     // for approximateSize
160     private static final int OBJECT_OVERHEAD = 16;
161     private static final int POINTER_OVERHEAD = 16;
162     private static final int STRING_OVERHEAD = 16 + OBJECT_OVERHEAD;
163 
164 }