1 /*
2  **********************************************************************
3  * Copyright (c) 2006-2007, Google and others.  All Rights Reserved.
4  **********************************************************************
5  * Author: Mark Davis
6  **********************************************************************
7  */
8 package org.unicode.cldr.util;
9 
10 import java.util.Collections;
11 import java.util.Iterator;
12 import java.util.Map;
13 import java.util.Map.Entry;
14 import java.util.Set;
15 import java.util.TreeMap;
16 import java.util.TreeSet;
17 
18 import org.unicode.cldr.util.Dictionary.Matcher.Status;
19 
20 /**
21  * This is a simple dictionary class used for testing. Should be in the package usertest, but it's a pain to rename
22  * files in CVS.
23  *
24  * @author markdavis
25  */
26 public class SimpleDictionary<T> extends Dictionary<T> {
27     private TreeMap<CharSequence, T> data = new TreeMap<CharSequence, T>();
28     private Set<CharSequence> possibleMatchesBefore;
29     private Set<CharSequence> possibleMatchesAfter;
30     private Status finalStatus;
31     boolean done;
32     private int matchCount;
33     private CharSequence lastEntry = "";
34 
35     public static class SimpleDictionaryBuilder<T> implements DictionaryBuilder<T> {
36 
make(Map<CharSequence, T> source)37         public SimpleDictionary<T> make(Map<CharSequence, T> source) {
38             return new SimpleDictionary(source);
39         }
40 
41     }
42 
SimpleDictionary(Map<CharSequence, T> source)43     private SimpleDictionary(Map<CharSequence, T> source) {
44         for (CharSequence text : source.keySet()) {
45             addMapping(text, source.get(text));
46         }
47     }
48 
49     // private List<T> results;
50     //
51     // public static <U> Dictionary<U> getInstance(Map<CharSequence, U> source) {
52     // SimpleDictionary<U> result = new SimpleDictionary<U>();
53     // // if T is not an integer, then get the results, and assign each a number
54     // Map<U,Integer> valueToInt = new HashMap<U,Integer>();
55     // result.results = new ArrayList<U>();
56     // int count = 0;
57     // for (U value : source.values()) {
58     // Integer oldValue = valueToInt.get(value);
59     // if (oldValue == null) {
60     // result.results.add(value);
61     // valueToInt.put(value, count++);
62     // }
63     // }
64     //
65     //
66     // for (CharSequence text : source.keySet()) {
67     // result.addMapping(text, valueToInt.get(source.get(text)));
68     // }
69     // return result;
70     // }
71 
addMapping(CharSequence text, T result)72     private void addMapping(CharSequence text, T result) {
73         if (CharUtilities.compare(text, lastEntry) <= 0) {
74             throw new IllegalArgumentException("Each string must be greater than the previous one.");
75         }
76         lastEntry = text;
77         data.put(text, result);
78     }
79 
getMapping()80     public Iterator<Entry<CharSequence, T>> getMapping() {
81         return Collections.unmodifiableMap(data).entrySet().iterator();
82     }
83 
84     @Override
getMatcher()85     public Matcher<T> getMatcher() {
86         return new SimpleMatcher();
87     }
88 
89     private class SimpleMatcher extends Matcher<T> {
90 
91         @Override
setOffset(int offset)92         public Matcher<T> setOffset(int offset) {
93             possibleMatchesBefore = data.keySet();
94             done = false;
95             matchValue = null;
96             return super.setOffset(offset);
97         }
98 
99         /**
100          * Dumb implementation.
101          *
102          */
103         @Override
next()104         public Status next() {
105             // There are two degenerate cases: our dictionary is empty, or we are called on an empty string.
106 
107             // As long as we get matches, we return them.
108             // When we fail, we return one of two statuses
109             // DONE if there were no more matches in the dictionary past the last match
110             // SINGLE if there was a longer match, plus the longest offset that we successfully got to.
111 
112             // if we have already narrowed down to the end, just return the status
113             // everything should already be set to make this work.
114             if (done) {
115                 if (finalStatus == Status.NONE) {
116                     matchValue = null;
117                 }
118                 return finalStatus;
119             }
120 
121             CharSequence firstMatch = null;
122 
123             while (text.hasCharAt(matchEnd)) {
124                 // get the next probe value
125                 ++matchEnd;
126                 CharSequence probe = text.subSequence(offset, matchEnd);
127 
128                 // narrow to the items that start with the probe
129                 // this filters Before into After
130 
131                 firstMatch = filterToStartsWith(probe);
132 
133                 // if we have a full match, return it
134 
135                 if (firstMatch != null && firstMatch.length() == probe.length()) {
136                     possibleMatchesAfter.remove(firstMatch);
137                     possibleMatchesBefore = possibleMatchesAfter;
138                     matchValue = data.get(firstMatch);
139                     finalStatus = Status.NONE;
140                     return Status.MATCH;
141                 }
142 
143                 // See if we've run out
144                 // example: probe = "man"
145                 // three cases, based on what was in the set
146                 // {man}: return DONE (we did a match before)
147                 // {man, many}: return SINGLE
148                 // {man, many, manner}: return PLURAL
149                 // {many}: return SINGLE
150                 if (possibleMatchesAfter.size() == 0) {
151                     --matchEnd; // backup
152                     break;
153                 }
154                 possibleMatchesBefore = possibleMatchesAfter;
155             }
156             // no more work to be done.
157             done = true;
158 
159             if (matchEnd == offset || possibleMatchesBefore.size() == 0) {
160                 matchValue = null;
161                 return finalStatus = Status.NONE;
162             }
163             if (firstMatch == null) { // just in case we skipped the above loop
164                 firstMatch = possibleMatchesBefore.iterator().next();
165             }
166             matchValue = data.get(firstMatch);
167             matchCount = possibleMatchesBefore.size();
168             return finalStatus = Status.PARTIAL;
169         }
170 
nextUniquePartial()171         public boolean nextUniquePartial() {
172             // we have already set the matchValue, so we don't need to reset here.
173             return matchCount == 1;
174         }
175 
176         /**
177          * Returns the first matching item, if there is one, and
178          * filters the rest of the list to those that match the probe.
179          *
180          * @param probe
181          * @return
182          */
filterToStartsWith(CharSequence probe)183         private CharSequence filterToStartsWith(CharSequence probe) {
184             CharSequence result = null;
185             possibleMatchesAfter = new TreeSet<CharSequence>();
186             for (CharSequence item : possibleMatchesBefore) {
187                 if (startsWith(item, probe)) {
188                     if (result == null) {
189                         result = item;
190                     }
191                     possibleMatchesAfter.add(item);
192                 }
193             }
194             return result;
195         }
196 
contains(CharSequence text)197         public boolean contains(CharSequence text) {
198             return data.containsKey(text);
199         }
200 
get(CharSequence text)201         public T get(CharSequence text) {
202             return data.get(text);
203         }
204 
205         // public static class GeqGetter<K> {
206         // private Set<K> set;
207         // private Iterator<K> iterator;
208         // private Comparator<? super K> comparator;
209         // boolean done;
210         // K lastItem = null;
211         // private int count;
212         //
213         // public GeqGetter(Set<K> set, Comparator<K> comparator) {
214         // this.comparator = comparator;
215         // this.set = set;
216         // reset();
217         // }
218         //
219         // public GeqGetter reset() {
220         // iterator = set.iterator();
221         // done = false;
222         // return this;
223         // }
224         //
225         // /**
226         // * Returns least element greater than or equal to probe.
227         // * @param probe
228         // * @return
229         // */
230         // public K getGeq(K probe) {
231         // if (lastItem != null && comparator.compare(lastItem, probe) >= 0) {
232         // return lastItem;
233         // }
234         // count = 0;
235         // while (iterator.hasNext()) {
236         // lastItem = iterator.next();
237         // ++count;
238         // if (comparator.compare(lastItem, probe) >= 0) {
239         // return lastItem;
240         // }
241         // }
242         // lastItem = null;
243         // return lastItem;
244         // }
245         // }
246 
247         @Override
getDictionary()248         public Dictionary<T> getDictionary() {
249             // TODO Auto-generated method stub
250             return SimpleDictionary.this;
251         }
252 
253         // public static class CharSequenceComparator implements Comparator<CharSequence> {
254         //
255         // public int compare(CharSequence first, CharSequence second) {
256         // int minLen = first.length();
257         // if (minLen > second.length())
258         // minLen = second.length();
259         // int result;
260         // for (int i = 0; i < minLen; ++i) {
261         // if (0 != (result = first.charAt(i) - second.charAt(i)))
262         // return result;
263         // }
264         // return first.length() - second.length();
265         // }
266         //
267         // }
268     }
269 
startsWith(CharSequence first, CharSequence possiblePrefix)270     public static boolean startsWith(CharSequence first, CharSequence possiblePrefix) {
271         if (first.length() < possiblePrefix.length()) {
272             return false;
273         }
274         for (int i = 0; i < possiblePrefix.length(); ++i) {
275             if (first.charAt(i) != possiblePrefix.charAt(i)) {
276                 return false;
277             }
278         }
279         return true;
280     }
281 
282 }