1 package perf;
2 
3 import java.nio.charset.Charset;
4 import java.util.Arrays;
5 
6 /**
7  * Trie container/wrapper, in this case implements Enum-value lookup.
8  * Sample code to possibly use for streamlined-lookup by dictionary, using
9  * UTF-8 bytes of {@link Enum#name()} as the key.
10  */
11 public class EnumByBytesLookup<E extends Enum<E>>
12 {
13     private final static Charset UTF8 = Charset.forName("UTF-8");
14 
15     private final Trie<E> _root;
16     private final int _size;
17 
EnumByBytesLookup(Trie<E> root, int size)18     private EnumByBytesLookup(Trie<E> root, int size) {
19         _root = root;
20         _size = size;
21     }
22 
buildFor(Class<EIN> enumClass)23     public static <EIN extends Enum<EIN>> EnumByBytesLookup<EIN> buildFor(Class<EIN> enumClass)
24     {
25         Trie<EIN> root = new Trie<EIN>(null);
26         int size = 0;
27         for (EIN en : enumClass.getEnumConstants()) {
28             byte[] key = en.name().getBytes(UTF8);
29             root = root.with(en, key);
30             ++size;
31         }
32         return new EnumByBytesLookup<EIN>(root, size);
33     }
34 
find(byte[] rawId)35     public E find(byte[] rawId) {
36       return _root.find(rawId);
37     }
38 
size()39     public int size() { return _size; }
40 }
41 
42 /**
43  * Trie nodes
44  */
45 class Trie<T> {
46     private final static byte[] NO_BYTES = new byte[0];
47 
48     private final static Trie<?>[] NO_NODES = new Trie<?>[0];
49 
50     /**
51      * For leaves, value matched by sequence
52      */
53     private final T _match;
54 
55     private final byte[] _nextBytes;
56     private final Trie<T>[] nextNodes;
57 
58     private final int nextCount;
59 
60     @SuppressWarnings("unchecked")
Trie(T match)61     Trie(T match) {
62       this(match, NO_BYTES, (Trie<T>[]) NO_NODES);
63     }
64 
Trie(T match, byte[] nextBytes, Trie<T>[] nextNodes)65     private Trie(T match, byte[] nextBytes, Trie<T>[] nextNodes) {
66       this._match = match;
67       this._nextBytes = nextBytes;
68       this.nextNodes = nextNodes;
69       nextCount = nextBytes.length;
70     }
71 
Trie(Trie<T> base, T match)72     private Trie(Trie<T> base, T match) {
73       // should we allow duplicate calls with same match? For now, let's not
74       if (base._match != null) {
75         throw new IllegalArgumentException("Trying to add same match multiple times");
76       }
77       this._match = match;
78       _nextBytes = base._nextBytes;
79       nextNodes = base.nextNodes;
80       nextCount = base.nextCount;
81     }
82 
Trie(Trie<T> base, byte nextByte, Trie<T> nextNode)83     private Trie(Trie<T> base, byte nextByte, Trie<T> nextNode) {
84       // should we allow duplicate calls with same match? For now, let's not
85       if (base._match != null) {
86         throw new IllegalArgumentException("Trying to add same match multiple times");
87       }
88       _match = base._match;
89       int size = base._nextBytes.length + 1;
90       _nextBytes = Arrays.copyOf(base._nextBytes, size);
91       _nextBytes[size-1] = nextByte;
92       nextNodes = Arrays.copyOf(base.nextNodes, size);
93       nextNodes[size-1] = nextNode;
94       nextCount = size;
95     }
96 
97     /**
98      * Constructor used when an existing branch needs to be replaced due to addition
99      */
Trie(Trie<T> base, int offset, Trie<T> newNode)100     private Trie(Trie<T> base, int offset, Trie<T> newNode) {
101       _match = base._match;
102       // can keep nextBytes, as they don't change
103       _nextBytes = base._nextBytes;
104       // but must create a copy of next nodes, to modify one entry
105       nextNodes = Arrays.copyOf(base.nextNodes, base.nextNodes.length);
106       nextNodes[offset] = newNode;
107       nextCount = base.nextCount;
108     }
109 
110     /**
111      * "Mutant factory" method: constructs a modified Trie, with specified raw id
112      * added.
113      */
with(T match, byte[] rawId)114     public Trie<T> with(T match, byte[] rawId) {
115       return with(match, rawId, 0, rawId.length);
116     }
117 
with(T match, byte[] rawId, int start, int end)118     private Trie<T> with(T match, byte[] rawId, int start, int end) {
119       if (start == end) {
120         return new Trie<T>(this, match);
121       }
122       // Ok: two choices; either we follow existing branch; or need to create new one
123       final byte b = rawId[start++];
124       for (int i = 0; i < nextCount; ++i) {
125         if (_nextBytes[i] == b) {
126           // existing branch: good day for delegation...
127           Trie<T> old = nextNodes[i];
128           // to keep things truly immutable, copy underlying arrays, then
129           return new Trie<T>(this, i, old.with(match, rawId, start, end));
130         }
131       }
132       // simplest recursively, but for fun let's convert to iteration. Start with tail
133       Trie<T> curr = new Trie<T>(match);
134 
135       for (int i = end-1; i >= start; --i) {
136         curr = new Trie<T>(this, rawId[i], curr);
137       }
138       return new Trie<T>(this, b, curr);
139     }
140 
find(byte[] id)141     public T find(byte[] id) {
142       return find(id, 0, id.length);
143     }
144 
find(byte[] id, int offset, int length)145     public T find(byte[] id, int offset, int length) {
146       Trie<T> t = this;
147       final int end = offset+length;
148 
149       for (; offset < end; ++offset) {
150         byte b = id[offset];
151         t = t.next(b);
152         if (t == null) {
153             // NOTE: if using null-padding, would trim here
154             /*
155           if (b == (byte) 0) {
156             break;
157           }
158           */
159           return null;
160         }
161       }
162       return t._match;
163     }
164 
next(int b)165     private Trie<T> next(int b) {
166       for (int i = 0; i < nextCount; ++i) {
167         if (_nextBytes[i] == b) {
168           return nextNodes[i];
169         }
170       }
171       return null;
172     }
173 }
174