1 /* 2 ********************************************************************** 3 * Copyright (c) 2006-2007, Google and others. All Rights Reserved. 4 ********************************************************************** 5 * Author: Mark Davis 6 ********************************************************************** 7 */ 8 package org.unicode.cldr.util; 9 10 import java.util.ArrayList; 11 import java.util.Collection; 12 import java.util.Comparator; 13 import java.util.HashMap; 14 import java.util.HashSet; 15 import java.util.Map; 16 import java.util.Set; 17 import java.util.TreeSet; 18 19 /** 20 * Provide way to associate each of a set of T objects with an integer, where 21 * the integer can be used to get the object. All objects are immutable, and 22 * created with a corresponding factory object. The integers have no relation to 23 * the order in the original set. The integers may not be compact (eg 0..n). 24 * 25 * @param <T> 26 */ 27 public abstract class IntMap<T> { 28 29 public interface IntMapFactory<T> { make(Collection<T> values)30 public IntMap<T> make(Collection<T> values); 31 } 32 get(int index)33 public abstract T get(int index); 34 getValueMap(Map<T, Integer> output)35 public abstract Map<T, Integer> getValueMap(Map<T, Integer> output); 36 getValueMap()37 public Map<T, Integer> getValueMap() { 38 return getValueMap(new HashMap<T, Integer>()); 39 } 40 approximateStorage()41 public abstract int approximateStorage(); 42 toString()43 public String toString() { 44 return getValueMap().toString(); 45 } 46 47 // Concrete classes, embedded for simplicity 48 49 public static class BasicIntMap<T> extends IntMap<T> { 50 private final T[] intToValue; 51 BasicIntMap(T[] intToValue)52 private BasicIntMap(T[] intToValue) { 53 this.intToValue = intToValue; 54 } 55 get(int index)56 public T get(int index) { 57 return intToValue[index]; 58 } 59 getValueMap(Map<T, Integer> output)60 public Map<T, Integer> getValueMap(Map<T, Integer> output) { 61 for (int i = 0; i < intToValue.length; ++i) { 62 output.put(intToValue[i], i); 63 } 64 return output; 65 } 66 approximateStorage()67 public int approximateStorage() { 68 int size = OBJECT_OVERHEAD; 69 for (T item : intToValue) { 70 size += 4; // size of int 71 size += item.toString().length() * 2 + STRING_OVERHEAD; 72 } 73 return size; 74 } 75 } 76 77 public static class BasicIntMapFactory<T> implements IntMapFactory<T> { 78 @SuppressWarnings("unchecked") make(Collection<T> values)79 public BasicIntMap<T> make(Collection<T> values) { 80 return new BasicIntMap<T>((T[]) new ArrayList<T>(new HashSet<T>(values)).toArray()); 81 } 82 } 83 84 /** 85 * Stores short strings (255 or less) in compacted fashion. The number of 86 * strings is also limited: in the worst case to 2^24 / total number of UTF-8 87 * bytes 88 * 89 * @author markdavis 90 */ 91 public static class CompactStringIntMap extends IntMap<String> { 92 private final String data; 93 private final int[] intToValue; 94 CompactStringIntMap(String data, int[] intToValue)95 private CompactStringIntMap(String data, int[] intToValue) { 96 this.data = data; 97 this.intToValue = intToValue; 98 } 99 get(int index)100 public String get(int index) { 101 // the packedIndex stores the string as an index in the top 24 bits, and length in the bottom 8. 102 int packedIndex = intToValue[index]; 103 int len = packedIndex & 0xFF; 104 int dataIndex = packedIndex >>> 8; 105 return data.substring(dataIndex, dataIndex + len); 106 } 107 getValueMap(Map<String, Integer> output)108 public Map<String, Integer> getValueMap(Map<String, Integer> output) { 109 for (int i = 0; i < intToValue.length; ++i) { 110 output.put(get(i), i); 111 } 112 return output; 113 } 114 approximateStorage()115 public int approximateStorage() { 116 int size = OBJECT_OVERHEAD + POINTER_OVERHEAD * 2; 117 size += data.length() * 2 + STRING_OVERHEAD; 118 size += 4 * intToValue.length; 119 return size; 120 } 121 } 122 123 public static final Comparator<String> LONGEST_FIRST_COMPARATOR = new Comparator<String>() { 124 public int compare(String a, String b) { 125 return a.length() > b.length() ? -1 126 : a.length() < b.length() ? 1 127 : a.compareTo(b); 128 } 129 }; 130 131 public static class CompactStringIntMapFactory implements IntMapFactory<String> { make(Collection<String> values)132 public CompactStringIntMap make(Collection<String> values) { 133 // first sort, longest first 134 Set<String> sorted = new TreeSet<String>(LONGEST_FIRST_COMPARATOR); 135 sorted.addAll(values); 136 137 StringBuilder data = new StringBuilder(); 138 int[] intToValue = new int[sorted.size()]; 139 int count = 0; 140 141 // compact the values 142 // the packedIndex stores the string as an index in the top 24 bits, and length in the bottom 8. 143 for (String string : sorted) { 144 if (string.length() > 255) { 145 throw new IllegalArgumentException( 146 "String too large: CompactStringIntMapFactory only handles strings up to 255 in length"); 147 } 148 int position = data.indexOf(string); 149 if (position < 0) { // add if not there 150 position = data.length(); 151 data.append(string); 152 } 153 intToValue[count++] = (position << 8) | string.length(); 154 } 155 return new CompactStringIntMap(data.toString(), intToValue); 156 } 157 } 158 159 // for approximateSize 160 private static final int OBJECT_OVERHEAD = 16; 161 private static final int POINTER_OVERHEAD = 16; 162 private static final int STRING_OVERHEAD = 16 + OBJECT_OVERHEAD; 163 164 }