1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 * ******************************************************************************** 5 * Copyright (C) 2007-2011, International Business Machines Corporation and others. 6 * All Rights Reserved. 7 * ******************************************************************************** 8 */ 9 package com.ibm.icu.impl; 10 11 import java.util.Iterator; 12 import java.util.LinkedList; 13 import java.util.List; 14 import java.util.ListIterator; 15 16 import com.ibm.icu.lang.UCharacter; 17 import com.ibm.icu.text.UnicodeSet; 18 19 /** 20 * TextTrieMap is a trie implementation for supporting 21 * fast prefix match for the key. 22 */ 23 public class TextTrieMap<V> { 24 25 private Node _root = new Node(); 26 boolean _ignoreCase; 27 28 public static class Output { 29 public int matchLength; 30 public boolean partialMatch; 31 } 32 33 /** 34 * Constructs a TextTrieMap object. 35 * 36 * @param ignoreCase true to use simple case insensitive match 37 */ TextTrieMap(boolean ignoreCase)38 public TextTrieMap(boolean ignoreCase) { 39 _ignoreCase = ignoreCase; 40 } 41 42 /** 43 * Adds the text key and its associated object in this object. 44 * 45 * @param text The text. 46 * @param val The value object associated with the text. 47 */ put(CharSequence text, V val)48 public TextTrieMap<V> put(CharSequence text, V val) { 49 CharIterator chitr = new CharIterator(text, 0, _ignoreCase); 50 _root.add(chitr, val); 51 return this; 52 } 53 54 /** 55 * Gets an iterator of the objects associated with the 56 * longest prefix matching string key. 57 * 58 * @param text The text to be matched with prefixes. 59 * @return An iterator of the objects associated with 60 * the longest prefix matching matching key, or null 61 * if no matching entry is found. 62 */ get(String text)63 public Iterator<V> get(String text) { 64 return get(text, 0); 65 } 66 67 /** 68 * Gets an iterator of the objects associated with the 69 * longest prefix matching string key starting at the 70 * specified position. 71 * 72 * @param text The text to be matched with prefixes. 73 * @param start The start index of of the text 74 * @return An iterator of the objects associated with the 75 * longest prefix matching matching key, or null if no 76 * matching entry is found. 77 */ get(CharSequence text, int start)78 public Iterator<V> get(CharSequence text, int start) { 79 return get(text, start, null); 80 } 81 get(CharSequence text, int start, Output output)82 public Iterator<V> get(CharSequence text, int start, Output output) { 83 LongestMatchHandler<V> handler = new LongestMatchHandler<V>(); 84 find(text, start, handler, output); 85 if (output != null) { 86 output.matchLength = handler.getMatchLength(); 87 } 88 return handler.getMatches(); 89 } 90 find(CharSequence text, ResultHandler<V> handler)91 public void find(CharSequence text, ResultHandler<V> handler) { 92 find(text, 0, handler, null); 93 } 94 find(CharSequence text, int offset, ResultHandler<V> handler)95 public void find(CharSequence text, int offset, ResultHandler<V> handler) { 96 find(text, offset, handler, null); 97 } 98 find(CharSequence text, int offset, ResultHandler<V> handler, Output output)99 private void find(CharSequence text, int offset, ResultHandler<V> handler, Output output) { 100 CharIterator chitr = new CharIterator(text, offset, _ignoreCase); 101 find(_root, chitr, handler, output); 102 } 103 find(Node node, CharIterator chitr, ResultHandler<V> handler, Output output)104 private synchronized void find(Node node, CharIterator chitr, ResultHandler<V> handler, Output output) { 105 Iterator<V> values = node.values(); 106 if (values != null) { 107 if (!handler.handlePrefixMatch(chitr.processedLength(), values)) { 108 return; 109 } 110 } 111 112 Node nextMatch = node.findMatch(chitr, output); 113 if (nextMatch != null) { 114 find(nextMatch, chitr, handler, output); 115 } 116 } 117 putLeadCodePoints(UnicodeSet output)118 public void putLeadCodePoints(UnicodeSet output) { 119 _root.putLeadCodePoints(output); 120 } 121 122 public static class CharIterator implements Iterator<Character> { 123 private boolean _ignoreCase; 124 private CharSequence _text; 125 private int _nextIdx; 126 private int _startIdx; 127 128 private Character _remainingChar; 129 CharIterator(CharSequence text, int offset, boolean ignoreCase)130 CharIterator(CharSequence text, int offset, boolean ignoreCase) { 131 _text = text; 132 _nextIdx = _startIdx = offset; 133 _ignoreCase = ignoreCase; 134 } 135 136 /* (non-Javadoc) 137 * @see java.util.Iterator#hasNext() 138 */ 139 @Override hasNext()140 public boolean hasNext() { 141 if (_nextIdx == _text.length() && _remainingChar == null) { 142 return false; 143 } 144 return true; 145 } 146 147 /* (non-Javadoc) 148 * @see java.util.Iterator#next() 149 */ 150 @Override next()151 public Character next() { 152 if (_nextIdx == _text.length() && _remainingChar == null) { 153 return null; 154 } 155 Character next; 156 if (_remainingChar != null) { 157 next = _remainingChar; 158 _remainingChar = null; 159 } else { 160 if (_ignoreCase) { 161 int cp = UCharacter.foldCase(Character.codePointAt(_text, _nextIdx), true); 162 _nextIdx = _nextIdx + Character.charCount(cp); 163 164 char[] chars = Character.toChars(cp); 165 next = chars[0]; 166 if (chars.length == 2) { 167 _remainingChar = chars[1]; 168 } 169 } else { 170 next = _text.charAt(_nextIdx); 171 _nextIdx++; 172 } 173 } 174 return next; 175 } 176 177 /* (non-Javadoc) 178 * @see java.util.Iterator#remove() 179 */ 180 @Override remove()181 public void remove() { 182 throw new UnsupportedOperationException("remove() not supproted"); 183 } 184 nextIndex()185 public int nextIndex() { 186 return _nextIdx; 187 } 188 processedLength()189 public int processedLength() { 190 if (_remainingChar != null) { 191 throw new IllegalStateException("In the middle of surrogate pair"); 192 } 193 return _nextIdx - _startIdx; 194 } 195 } 196 197 /** 198 * Callback handler for processing prefix matches used by 199 * find method. 200 */ 201 public interface ResultHandler<V> { 202 /** 203 * Handles a prefix key match 204 * 205 * @param matchLength Matched key's length 206 * @param values An iterator of the objects associated with the matched key 207 * @return Return true to continue the search in the trie, false to quit. 208 */ handlePrefixMatch(int matchLength, Iterator<V> values)209 public boolean handlePrefixMatch(int matchLength, Iterator<V> values); 210 } 211 212 private static class LongestMatchHandler<V> implements ResultHandler<V> { 213 private Iterator<V> matches = null; 214 private int length = 0; 215 216 @Override handlePrefixMatch(int matchLength, Iterator<V> values)217 public boolean handlePrefixMatch(int matchLength, Iterator<V> values) { 218 if (matchLength > length) { 219 length = matchLength; 220 matches = values; 221 } 222 return true; 223 } 224 getMatches()225 public Iterator<V> getMatches() { 226 return matches; 227 } 228 getMatchLength()229 public int getMatchLength() { 230 return length; 231 } 232 } 233 234 /** 235 * Inner class representing a text node in the trie. 236 */ 237 private class Node { 238 private char[] _text; 239 private List<V> _values; 240 private List<Node> _children; 241 Node()242 private Node() { 243 } 244 Node(char[] text, List<V> values, List<Node> children)245 private Node(char[] text, List<V> values, List<Node> children) { 246 _text = text; 247 _values = values; 248 _children = children; 249 } 250 charCount()251 public int charCount() { 252 return _text == null ? 0 : _text.length; 253 } 254 values()255 public Iterator<V> values() { 256 if (_values == null) { 257 return null; 258 } 259 return _values.iterator(); 260 } 261 add(CharIterator chitr, V value)262 public void add(CharIterator chitr, V value) { 263 StringBuilder buf = new StringBuilder(); 264 while (chitr.hasNext()) { 265 buf.append(chitr.next()); 266 } 267 add(toCharArray(buf), 0, value); 268 } 269 findMatch(CharIterator chitr, Output output)270 public Node findMatch(CharIterator chitr, Output output) { 271 if (_children == null) { 272 return null; 273 } 274 if (!chitr.hasNext()) { 275 if (output != null) { 276 output.partialMatch = true; 277 } 278 return null; 279 } 280 Node match = null; 281 Character ch = chitr.next(); 282 for (Node child : _children) { 283 if (ch < child._text[0]) { 284 break; 285 } 286 if (ch == child._text[0]) { 287 if (child.matchFollowing(chitr, output)) { 288 match = child; 289 } 290 break; 291 } 292 } 293 return match; 294 } 295 putLeadCodePoints(UnicodeSet output)296 public void putLeadCodePoints(UnicodeSet output) { 297 if (_children == null) { 298 return; 299 } 300 for (Node child : _children) { 301 char c0 = child._text[0]; 302 if (!UCharacter.isHighSurrogate(c0)) { 303 output.add(c0); 304 } else if (child.charCount() >= 2) { 305 output.add(Character.codePointAt(child._text, 0)); 306 } else if (child._children != null) { 307 // Construct all possible code points from grandchildren. 308 for (Node grandchild : child._children) { 309 char c1 = grandchild._text[0]; 310 int cp = Character.toCodePoint(c0, c1); 311 output.add(cp); 312 } 313 } 314 } 315 } 316 add(char[] text, int offset, V value)317 private void add(char[] text, int offset, V value) { 318 if (text.length == offset) { 319 _values = addValue(_values, value); 320 return; 321 } 322 323 if (_children == null) { 324 _children = new LinkedList<Node>(); 325 Node child = new Node(subArray(text, offset), addValue(null, value), null); 326 _children.add(child); 327 return; 328 } 329 330 // walk through children 331 ListIterator<Node> litr = _children.listIterator(); 332 while (litr.hasNext()) { 333 Node next = litr.next(); 334 if (text[offset] < next._text[0]) { 335 litr.previous(); 336 break; 337 } 338 if (text[offset] == next._text[0]) { 339 int matchLen = next.lenMatches(text, offset); 340 if (matchLen == next._text.length) { 341 // full match 342 next.add(text, offset + matchLen, value); 343 } else { 344 // partial match, create a branch 345 next.split(matchLen); 346 next.add(text, offset + matchLen, value); 347 } 348 return; 349 } 350 } 351 // add a new child to this node 352 litr.add(new Node(subArray(text, offset), addValue(null, value), null)); 353 } 354 matchFollowing(CharIterator chitr, Output output)355 private boolean matchFollowing(CharIterator chitr, Output output) { 356 boolean matched = true; 357 int idx = 1; 358 while (idx < _text.length) { 359 if(!chitr.hasNext()) { 360 if (output != null) { 361 output.partialMatch = true; 362 } 363 matched = false; 364 break; 365 } 366 Character ch = chitr.next(); 367 if (ch != _text[idx]) { 368 matched = false; 369 break; 370 } 371 idx++; 372 } 373 return matched; 374 } 375 lenMatches(char[] text, int offset)376 private int lenMatches(char[] text, int offset) { 377 int textLen = text.length - offset; 378 int limit = _text.length < textLen ? _text.length : textLen; 379 int len = 0; 380 while (len < limit) { 381 if (_text[len] != text[offset + len]) { 382 break; 383 } 384 len++; 385 } 386 return len; 387 } 388 389 private void split(int offset) { 390 // split the current node at the offset 391 char[] childText = subArray(_text, offset); 392 _text = subArray(_text, 0, offset); 393 394 // add the Node representing after the offset as a child 395 Node child = new Node(childText, _values, _children); 396 _values = null; 397 398 _children = new LinkedList<Node>(); 399 _children.add(child); 400 } 401 402 private List<V> addValue(List<V> list, V value) { 403 if (list == null) { 404 list = new LinkedList<V>(); 405 } 406 list.add(value); 407 return list; 408 } 409 } 410 411 private static char[] toCharArray(CharSequence text) { 412 char[] array = new char[text.length()]; 413 for (int i = 0; i < array.length; i++) { 414 array[i] = text.charAt(i); 415 } 416 return array; 417 } 418 419 private static char[] subArray(char[] array, int start) { 420 if (start == 0) { 421 return array; 422 } 423 char[] sub = new char[array.length - start]; 424 System.arraycopy(array, start, sub, 0, sub.length); 425 return sub; 426 } 427 428 private static char[] subArray(char[] array, int start, int limit) { 429 if (start == 0 && limit == array.length) { 430 return array; 431 } 432 char[] sub = new char[limit - start]; 433 System.arraycopy(array, start, sub, 0, limit - start); 434 return sub; 435 } 436 } 437