1 /* 2 ********************************************************************** 3 * Copyright (c) 2006-2007, Google and others. All Rights Reserved. 4 ********************************************************************** 5 * Author: Mark Davis 6 ********************************************************************** 7 */ 8 package org.unicode.cldr.util; 9 10 import java.util.Collections; 11 import java.util.Iterator; 12 import java.util.Map; 13 import java.util.Map.Entry; 14 import java.util.Set; 15 import java.util.TreeMap; 16 import java.util.TreeSet; 17 18 import org.unicode.cldr.util.Dictionary.Matcher.Status; 19 20 /** 21 * This is a simple dictionary class used for testing. Should be in the package usertest, but it's a pain to rename 22 * files in CVS. 23 * 24 * @author markdavis 25 */ 26 public class SimpleDictionary<T> extends Dictionary<T> { 27 private TreeMap<CharSequence, T> data = new TreeMap<CharSequence, T>(); 28 private Set<CharSequence> possibleMatchesBefore; 29 private Set<CharSequence> possibleMatchesAfter; 30 private Status finalStatus; 31 boolean done; 32 private int matchCount; 33 private CharSequence lastEntry = ""; 34 35 public static class SimpleDictionaryBuilder<T> implements DictionaryBuilder<T> { 36 make(Map<CharSequence, T> source)37 public SimpleDictionary<T> make(Map<CharSequence, T> source) { 38 return new SimpleDictionary(source); 39 } 40 41 } 42 SimpleDictionary(Map<CharSequence, T> source)43 private SimpleDictionary(Map<CharSequence, T> source) { 44 for (CharSequence text : source.keySet()) { 45 addMapping(text, source.get(text)); 46 } 47 } 48 49 // private List<T> results; 50 // 51 // public static <U> Dictionary<U> getInstance(Map<CharSequence, U> source) { 52 // SimpleDictionary<U> result = new SimpleDictionary<U>(); 53 // // if T is not an integer, then get the results, and assign each a number 54 // Map<U,Integer> valueToInt = new HashMap<U,Integer>(); 55 // result.results = new ArrayList<U>(); 56 // int count = 0; 57 // for (U value : source.values()) { 58 // Integer oldValue = valueToInt.get(value); 59 // if (oldValue == null) { 60 // result.results.add(value); 61 // valueToInt.put(value, count++); 62 // } 63 // } 64 // 65 // 66 // for (CharSequence text : source.keySet()) { 67 // result.addMapping(text, valueToInt.get(source.get(text))); 68 // } 69 // return result; 70 // } 71 addMapping(CharSequence text, T result)72 private void addMapping(CharSequence text, T result) { 73 if (CharUtilities.compare(text, lastEntry) <= 0) { 74 throw new IllegalArgumentException("Each string must be greater than the previous one."); 75 } 76 lastEntry = text; 77 data.put(text, result); 78 } 79 getMapping()80 public Iterator<Entry<CharSequence, T>> getMapping() { 81 return Collections.unmodifiableMap(data).entrySet().iterator(); 82 } 83 84 @Override getMatcher()85 public Matcher<T> getMatcher() { 86 return new SimpleMatcher(); 87 } 88 89 private class SimpleMatcher extends Matcher<T> { 90 91 @Override setOffset(int offset)92 public Matcher<T> setOffset(int offset) { 93 possibleMatchesBefore = data.keySet(); 94 done = false; 95 matchValue = null; 96 return super.setOffset(offset); 97 } 98 99 /** 100 * Dumb implementation. 101 * 102 */ 103 @Override next()104 public Status next() { 105 // There are two degenerate cases: our dictionary is empty, or we are called on an empty string. 106 107 // As long as we get matches, we return them. 108 // When we fail, we return one of two statuses 109 // DONE if there were no more matches in the dictionary past the last match 110 // SINGLE if there was a longer match, plus the longest offset that we successfully got to. 111 112 // if we have already narrowed down to the end, just return the status 113 // everything should already be set to make this work. 114 if (done) { 115 if (finalStatus == Status.NONE) { 116 matchValue = null; 117 } 118 return finalStatus; 119 } 120 121 CharSequence firstMatch = null; 122 123 while (text.hasCharAt(matchEnd)) { 124 // get the next probe value 125 ++matchEnd; 126 CharSequence probe = text.subSequence(offset, matchEnd); 127 128 // narrow to the items that start with the probe 129 // this filters Before into After 130 131 firstMatch = filterToStartsWith(probe); 132 133 // if we have a full match, return it 134 135 if (firstMatch != null && firstMatch.length() == probe.length()) { 136 possibleMatchesAfter.remove(firstMatch); 137 possibleMatchesBefore = possibleMatchesAfter; 138 matchValue = data.get(firstMatch); 139 finalStatus = Status.NONE; 140 return Status.MATCH; 141 } 142 143 // See if we've run out 144 // example: probe = "man" 145 // three cases, based on what was in the set 146 // {man}: return DONE (we did a match before) 147 // {man, many}: return SINGLE 148 // {man, many, manner}: return PLURAL 149 // {many}: return SINGLE 150 if (possibleMatchesAfter.size() == 0) { 151 --matchEnd; // backup 152 break; 153 } 154 possibleMatchesBefore = possibleMatchesAfter; 155 } 156 // no more work to be done. 157 done = true; 158 159 if (matchEnd == offset || possibleMatchesBefore.size() == 0) { 160 matchValue = null; 161 return finalStatus = Status.NONE; 162 } 163 if (firstMatch == null) { // just in case we skipped the above loop 164 firstMatch = possibleMatchesBefore.iterator().next(); 165 } 166 matchValue = data.get(firstMatch); 167 matchCount = possibleMatchesBefore.size(); 168 return finalStatus = Status.PARTIAL; 169 } 170 nextUniquePartial()171 public boolean nextUniquePartial() { 172 // we have already set the matchValue, so we don't need to reset here. 173 return matchCount == 1; 174 } 175 176 /** 177 * Returns the first matching item, if there is one, and 178 * filters the rest of the list to those that match the probe. 179 * 180 * @param probe 181 * @return 182 */ filterToStartsWith(CharSequence probe)183 private CharSequence filterToStartsWith(CharSequence probe) { 184 CharSequence result = null; 185 possibleMatchesAfter = new TreeSet<CharSequence>(); 186 for (CharSequence item : possibleMatchesBefore) { 187 if (startsWith(item, probe)) { 188 if (result == null) { 189 result = item; 190 } 191 possibleMatchesAfter.add(item); 192 } 193 } 194 return result; 195 } 196 contains(CharSequence text)197 public boolean contains(CharSequence text) { 198 return data.containsKey(text); 199 } 200 get(CharSequence text)201 public T get(CharSequence text) { 202 return data.get(text); 203 } 204 205 // public static class GeqGetter<K> { 206 // private Set<K> set; 207 // private Iterator<K> iterator; 208 // private Comparator<? super K> comparator; 209 // boolean done; 210 // K lastItem = null; 211 // private int count; 212 // 213 // public GeqGetter(Set<K> set, Comparator<K> comparator) { 214 // this.comparator = comparator; 215 // this.set = set; 216 // reset(); 217 // } 218 // 219 // public GeqGetter reset() { 220 // iterator = set.iterator(); 221 // done = false; 222 // return this; 223 // } 224 // 225 // /** 226 // * Returns least element greater than or equal to probe. 227 // * @param probe 228 // * @return 229 // */ 230 // public K getGeq(K probe) { 231 // if (lastItem != null && comparator.compare(lastItem, probe) >= 0) { 232 // return lastItem; 233 // } 234 // count = 0; 235 // while (iterator.hasNext()) { 236 // lastItem = iterator.next(); 237 // ++count; 238 // if (comparator.compare(lastItem, probe) >= 0) { 239 // return lastItem; 240 // } 241 // } 242 // lastItem = null; 243 // return lastItem; 244 // } 245 // } 246 247 @Override getDictionary()248 public Dictionary<T> getDictionary() { 249 // TODO Auto-generated method stub 250 return SimpleDictionary.this; 251 } 252 253 // public static class CharSequenceComparator implements Comparator<CharSequence> { 254 // 255 // public int compare(CharSequence first, CharSequence second) { 256 // int minLen = first.length(); 257 // if (minLen > second.length()) 258 // minLen = second.length(); 259 // int result; 260 // for (int i = 0; i < minLen; ++i) { 261 // if (0 != (result = first.charAt(i) - second.charAt(i))) 262 // return result; 263 // } 264 // return first.length() - second.length(); 265 // } 266 // 267 // } 268 } 269 startsWith(CharSequence first, CharSequence possiblePrefix)270 public static boolean startsWith(CharSequence first, CharSequence possiblePrefix) { 271 if (first.length() < possiblePrefix.length()) { 272 return false; 273 } 274 for (int i = 0; i < possiblePrefix.length(); ++i) { 275 if (first.charAt(i) != possiblePrefix.charAt(i)) { 276 return false; 277 } 278 } 279 return true; 280 } 281 282 }