1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4 *******************************************************************************
5 * Copyright (C) 2009-2014, International Business Machines Corporation and    *
6 * others. All Rights Reserved.                                                *
7 *******************************************************************************
8 */
9 
10 package com.ibm.icu.impl.text;
11 
12 import java.util.HashMap;
13 import java.util.Map;
14 
15 import com.ibm.icu.impl.ICUDebug;
16 import com.ibm.icu.text.CollationElementIterator;
17 import com.ibm.icu.text.Collator;
18 import com.ibm.icu.text.RbnfLenientScanner;
19 import com.ibm.icu.text.RbnfLenientScannerProvider;
20 import com.ibm.icu.text.RuleBasedCollator;
21 import com.ibm.icu.util.ULocale;
22 
23 /**
24  * Returns RbnfLenientScanners that use the old RuleBasedNumberFormat
25  * implementation behind setLenientParseMode, which is based on Collator.
26  * @internal
27  * @deprecated This API is ICU internal only.
28  */
29 @Deprecated
30 public class RbnfScannerProviderImpl implements RbnfLenientScannerProvider {
31     private static final boolean DEBUG = ICUDebug.enabled("rbnf");
32     private Map<String, RbnfLenientScanner> cache;
33 
34     /**
35      * @internal
36      * @deprecated This API is ICU internal only.
37      */
38     @Deprecated
RbnfScannerProviderImpl()39     public RbnfScannerProviderImpl() {
40         cache = new HashMap<String, RbnfLenientScanner>();
41     }
42 
43     /**
44      * Returns a collation-based scanner.
45      *
46      * Only primary differences are treated as significant.  This means that case
47      * differences, accent differences, alternate spellings of the same letter
48      * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
49      * matching the text.  In many cases, numerals will be accepted in place of words
50      * or phrases as well.
51      *
52      * For example, all of the following will correctly parse as 255 in English in
53      * lenient-parse mode:
54      * <br>"two hundred fifty-five"
55      * <br>"two hundred fifty five"
56      * <br>"TWO HUNDRED FIFTY-FIVE"
57      * <br>"twohundredfiftyfive"
58      * <br>"2 hundred fifty-5"
59      *
60      * The Collator used is determined by the locale that was
61      * passed to this object on construction.  The description passed to this object
62      * on construction may supply additional collation rules that are appended to the
63      * end of the default collator for the locale, enabling additional equivalences
64      * (such as adding more ignorable characters or permitting spelled-out version of
65      * symbols; see the demo program for examples).
66      *
67      * It's important to emphasize that even strict parsing is relatively lenient: it
68      * will accept some text that it won't produce as output.  In English, for example,
69      * it will correctly parse "two hundred zero" and "fifteen hundred".
70      *
71      * @internal
72      * @deprecated This API is ICU internal only.
73      */
74     @Deprecated
get(ULocale locale, String extras)75     public RbnfLenientScanner get(ULocale locale, String extras) {
76         RbnfLenientScanner result = null;
77         String key = locale.toString() + "/" + extras;
78         synchronized(cache) {
79             result = cache.get(key);
80             if (result != null) {
81                 return result;
82             }
83         }
84         result = createScanner(locale, extras);
85         synchronized(cache) {
86             cache.put(key, result);
87         }
88         return result;
89     }
90 
91     /**
92      * @internal
93      * @deprecated This API is ICU internal only.
94      */
95     @Deprecated
createScanner(ULocale locale, String extras)96     protected RbnfLenientScanner createScanner(ULocale locale, String extras) {
97         RuleBasedCollator collator = null;
98         try {
99             // create a default collator based on the locale,
100             // then pull out that collator's rules, append any additional
101             // rules specified in the description, and create a _new_
102             // collator based on the combination of those rules
103             collator = (RuleBasedCollator)Collator.getInstance(locale.toLocale());
104             if (extras != null) {
105                 String rules = collator.getRules() + extras;
106                 collator = new RuleBasedCollator(rules);
107             }
108             collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
109         }
110         catch (Exception e) {
111             // If we get here, it means we have a malformed set of
112             // collation rules, which hopefully won't happen
113             ///CLOVER:OFF
114             if (DEBUG){ // debug hook
115                 e.printStackTrace(); System.out.println("++++");
116             }
117             collator = null;
118             ///CLOVER:ON
119         }
120 
121         return new RbnfLenientScannerImpl(collator);
122     }
123 
124     private static class RbnfLenientScannerImpl implements RbnfLenientScanner {
125         private final RuleBasedCollator collator;
126 
RbnfLenientScannerImpl(RuleBasedCollator rbc)127         private RbnfLenientScannerImpl(RuleBasedCollator rbc) {
128             this.collator = rbc;
129         }
130 
allIgnorable(String s)131         public boolean allIgnorable(String s) {
132             CollationElementIterator iter = collator.getCollationElementIterator(s);
133 
134             int o = iter.next();
135             while (o != CollationElementIterator.NULLORDER
136                    && CollationElementIterator.primaryOrder(o) == 0) {
137                 o = iter.next();
138             }
139             return o == CollationElementIterator.NULLORDER;
140         }
141 
findText(String str, String key, int startingAt)142         public int[] findText(String str, String key, int startingAt) {
143             int p = startingAt;
144             int keyLen = 0;
145 
146             // basically just isolate smaller and smaller substrings of
147             // the target string (each running to the end of the string,
148             // and with the first one running from startingAt to the end)
149             // and then use prefixLength() to see if the search key is at
150             // the beginning of each substring.  This is excruciatingly
151             // slow, but it will locate the key and tell use how long the
152             // matching text was.
153             while (p < str.length() && keyLen == 0) {
154                 keyLen = prefixLength(str.substring(p), key);
155                 if (keyLen != 0) {
156                     return new int[] { p, keyLen };
157                 }
158                 ++p;
159             }
160             // if we make it to here, we didn't find it.  Return -1 for the
161             // location.  The length should be ignored, but set it to 0,
162             // which should be "safe"
163             return new int[] { -1, 0 };
164         }
165 
166         ///CLOVER:OFF
167         // The following method contains the same signature as findText
168         //  and has never been used by anything once.
169         @SuppressWarnings("unused")
findText2(String str, String key, int startingAt)170         public int[] findText2(String str, String key, int startingAt) {
171 
172             CollationElementIterator strIter = collator.getCollationElementIterator(str);
173             CollationElementIterator keyIter = collator.getCollationElementIterator(key);
174 
175             int keyStart = -1;
176 
177             strIter.setOffset(startingAt);
178 
179             int oStr = strIter.next();
180             int oKey = keyIter.next();
181             while (oKey != CollationElementIterator.NULLORDER) {
182                 while (oStr != CollationElementIterator.NULLORDER &&
183                        CollationElementIterator.primaryOrder(oStr) == 0) {
184                     oStr = strIter.next();
185                 }
186 
187                 while (oKey != CollationElementIterator.NULLORDER &&
188                        CollationElementIterator.primaryOrder(oKey) == 0) {
189                     oKey = keyIter.next();
190                 }
191 
192                 if (oStr == CollationElementIterator.NULLORDER) {
193                     return new int[] { -1, 0 };
194                 }
195 
196                 if (oKey == CollationElementIterator.NULLORDER) {
197                     break;
198                 }
199 
200                 if (CollationElementIterator.primaryOrder(oStr) ==
201                     CollationElementIterator.primaryOrder(oKey)) {
202                     keyStart = strIter.getOffset();
203                     oStr = strIter.next();
204                     oKey = keyIter.next();
205                 } else {
206                     if (keyStart != -1) {
207                         keyStart = -1;
208                         keyIter.reset();
209                     } else {
210                         oStr = strIter.next();
211                     }
212                 }
213             }
214 
215             return new int[] { keyStart, strIter.getOffset() - keyStart };
216         }
217         ///CLOVER:ON
218 
prefixLength(String str, String prefix)219         public int prefixLength(String str, String prefix) {
220             // Create two collation element iterators, one over the target string
221             // and another over the prefix.
222             //
223             // Previous code was matching "fifty-" against " fifty" and leaving
224             // the number " fifty-7" to parse as 43 (50 - 7).
225             // Also it seems that if we consume the entire prefix, that's ok even
226             // if we've consumed the entire string, so I switched the logic to
227             // reflect this.
228 
229             CollationElementIterator strIter = collator.getCollationElementIterator(str);
230             CollationElementIterator prefixIter = collator.getCollationElementIterator(prefix);
231 
232             // match collation elements between the strings
233             int oStr = strIter.next();
234             int oPrefix = prefixIter.next();
235 
236             while (oPrefix != CollationElementIterator.NULLORDER) {
237                 // skip over ignorable characters in the target string
238                 while (CollationElementIterator.primaryOrder(oStr) == 0 && oStr !=
239                        CollationElementIterator.NULLORDER) {
240                     oStr = strIter.next();
241                 }
242 
243                 // skip over ignorable characters in the prefix
244                 while (CollationElementIterator.primaryOrder(oPrefix) == 0 && oPrefix !=
245                        CollationElementIterator.NULLORDER) {
246                     oPrefix = prefixIter.next();
247                 }
248 
249                 // if skipping over ignorables brought to the end of
250                 // the prefix, we DID match: drop out of the loop
251                 if (oPrefix == CollationElementIterator.NULLORDER) {
252                     break;
253                 }
254 
255                 // if skipping over ignorables brought us to the end
256                 // of the target string, we didn't match and return 0
257                 if (oStr == CollationElementIterator.NULLORDER) {
258                     return 0;
259                 }
260 
261                 // match collation elements from the two strings
262                 // (considering only primary differences).  If we
263                 // get a mismatch, dump out and return 0
264                 if (CollationElementIterator.primaryOrder(oStr) !=
265                     CollationElementIterator.primaryOrder(oPrefix)) {
266                     return 0;
267                 }
268 
269                 // otherwise, advance to the next character in each string
270                 // and loop (we drop out of the loop when we exhaust
271                 // collation elements in the prefix)
272 
273                 oStr = strIter.next();
274                 oPrefix = prefixIter.next();
275             }
276 
277             int result = strIter.getOffset();
278             if (oStr != CollationElementIterator.NULLORDER) {
279                 --result;
280             }
281             return result;
282         }
283     }
284 }
285