1 package perf; 2 3 import java.nio.charset.Charset; 4 import java.util.Arrays; 5 6 /** 7 * Trie container/wrapper, in this case implements Enum-value lookup. 8 * Sample code to possibly use for streamlined-lookup by dictionary, using 9 * UTF-8 bytes of {@link Enum#name()} as the key. 10 */ 11 public class EnumByBytesLookup<E extends Enum<E>> 12 { 13 private final static Charset UTF8 = Charset.forName("UTF-8"); 14 15 private final Trie<E> _root; 16 private final int _size; 17 EnumByBytesLookup(Trie<E> root, int size)18 private EnumByBytesLookup(Trie<E> root, int size) { 19 _root = root; 20 _size = size; 21 } 22 buildFor(Class<EIN> enumClass)23 public static <EIN extends Enum<EIN>> EnumByBytesLookup<EIN> buildFor(Class<EIN> enumClass) 24 { 25 Trie<EIN> root = new Trie<EIN>(null); 26 int size = 0; 27 for (EIN en : enumClass.getEnumConstants()) { 28 byte[] key = en.name().getBytes(UTF8); 29 root = root.with(en, key); 30 ++size; 31 } 32 return new EnumByBytesLookup<EIN>(root, size); 33 } 34 find(byte[] rawId)35 public E find(byte[] rawId) { 36 return _root.find(rawId); 37 } 38 size()39 public int size() { return _size; } 40 } 41 42 /** 43 * Trie nodes 44 */ 45 class Trie<T> { 46 private final static byte[] NO_BYTES = new byte[0]; 47 48 private final static Trie<?>[] NO_NODES = new Trie<?>[0]; 49 50 /** 51 * For leaves, value matched by sequence 52 */ 53 private final T _match; 54 55 private final byte[] _nextBytes; 56 private final Trie<T>[] nextNodes; 57 58 private final int nextCount; 59 60 @SuppressWarnings("unchecked") Trie(T match)61 Trie(T match) { 62 this(match, NO_BYTES, (Trie<T>[]) NO_NODES); 63 } 64 Trie(T match, byte[] nextBytes, Trie<T>[] nextNodes)65 private Trie(T match, byte[] nextBytes, Trie<T>[] nextNodes) { 66 this._match = match; 67 this._nextBytes = nextBytes; 68 this.nextNodes = nextNodes; 69 nextCount = nextBytes.length; 70 } 71 Trie(Trie<T> base, T match)72 private Trie(Trie<T> base, T match) { 73 // should we allow duplicate calls with same match? For now, let's not 74 if (base._match != null) { 75 throw new IllegalArgumentException("Trying to add same match multiple times"); 76 } 77 this._match = match; 78 _nextBytes = base._nextBytes; 79 nextNodes = base.nextNodes; 80 nextCount = base.nextCount; 81 } 82 Trie(Trie<T> base, byte nextByte, Trie<T> nextNode)83 private Trie(Trie<T> base, byte nextByte, Trie<T> nextNode) { 84 // should we allow duplicate calls with same match? For now, let's not 85 if (base._match != null) { 86 throw new IllegalArgumentException("Trying to add same match multiple times"); 87 } 88 _match = base._match; 89 int size = base._nextBytes.length + 1; 90 _nextBytes = Arrays.copyOf(base._nextBytes, size); 91 _nextBytes[size-1] = nextByte; 92 nextNodes = Arrays.copyOf(base.nextNodes, size); 93 nextNodes[size-1] = nextNode; 94 nextCount = size; 95 } 96 97 /** 98 * Constructor used when an existing branch needs to be replaced due to addition 99 */ Trie(Trie<T> base, int offset, Trie<T> newNode)100 private Trie(Trie<T> base, int offset, Trie<T> newNode) { 101 _match = base._match; 102 // can keep nextBytes, as they don't change 103 _nextBytes = base._nextBytes; 104 // but must create a copy of next nodes, to modify one entry 105 nextNodes = Arrays.copyOf(base.nextNodes, base.nextNodes.length); 106 nextNodes[offset] = newNode; 107 nextCount = base.nextCount; 108 } 109 110 /** 111 * "Mutant factory" method: constructs a modified Trie, with specified raw id 112 * added. 113 */ with(T match, byte[] rawId)114 public Trie<T> with(T match, byte[] rawId) { 115 return with(match, rawId, 0, rawId.length); 116 } 117 with(T match, byte[] rawId, int start, int end)118 private Trie<T> with(T match, byte[] rawId, int start, int end) { 119 if (start == end) { 120 return new Trie<T>(this, match); 121 } 122 // Ok: two choices; either we follow existing branch; or need to create new one 123 final byte b = rawId[start++]; 124 for (int i = 0; i < nextCount; ++i) { 125 if (_nextBytes[i] == b) { 126 // existing branch: good day for delegation... 127 Trie<T> old = nextNodes[i]; 128 // to keep things truly immutable, copy underlying arrays, then 129 return new Trie<T>(this, i, old.with(match, rawId, start, end)); 130 } 131 } 132 // simplest recursively, but for fun let's convert to iteration. Start with tail 133 Trie<T> curr = new Trie<T>(match); 134 135 for (int i = end-1; i >= start; --i) { 136 curr = new Trie<T>(this, rawId[i], curr); 137 } 138 return new Trie<T>(this, b, curr); 139 } 140 find(byte[] id)141 public T find(byte[] id) { 142 return find(id, 0, id.length); 143 } 144 find(byte[] id, int offset, int length)145 public T find(byte[] id, int offset, int length) { 146 Trie<T> t = this; 147 final int end = offset+length; 148 149 for (; offset < end; ++offset) { 150 byte b = id[offset]; 151 t = t.next(b); 152 if (t == null) { 153 // NOTE: if using null-padding, would trim here 154 /* 155 if (b == (byte) 0) { 156 break; 157 } 158 */ 159 return null; 160 } 161 } 162 return t._match; 163 } 164 next(int b)165 private Trie<T> next(int b) { 166 for (int i = 0; i < nextCount; ++i) { 167 if (_nextBytes[i] == b) { 168 return nextNodes[i]; 169 } 170 } 171 return null; 172 } 173 } 174