1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  *******************************************************************************
5  * Copyright (C) 2013-2015, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 package com.ibm.icu.text;
10 
11 import java.util.ArrayList;
12 import java.util.Collection;
13 import java.util.Collections;
14 import java.util.HashMap;
15 import java.util.HashSet;
16 import java.util.LinkedHashSet;
17 import java.util.List;
18 import java.util.Map;
19 import java.util.Map.Entry;
20 import java.util.Set;
21 import java.util.TreeSet;
22 
23 import com.ibm.icu.text.PluralRules.FixedDecimal;
24 import com.ibm.icu.text.PluralRules.KeywordStatus;
25 import com.ibm.icu.util.Output;
26 
27 /**
28  * @author markdavis
29  * Refactor samples as first step to moving into CLDR
30  *
31  * @internal
32  * @deprecated This API is ICU internal only.
33  */
34 @Deprecated
35 public class PluralSamples {
36 
37     private PluralRules pluralRules;
38     private final Map<String, List<Double>> _keySamplesMap;
39 
40     /**
41      * @internal
42      * @deprecated This API is ICU internal only.
43      */
44     @Deprecated
45     public final Map<String, Boolean> _keyLimitedMap;
46     private final Map<String, Set<FixedDecimal>> _keyFractionSamplesMap;
47     private final Set<FixedDecimal> _fractionSamples;
48 
49     /**
50      * @internal
51      * @deprecated This API is ICU internal only.
52      */
53     @Deprecated
PluralSamples(PluralRules pluralRules)54     public PluralSamples(PluralRules pluralRules) {
55         this.pluralRules = pluralRules;
56         Set<String> keywords = pluralRules.getKeywords();
57         // ensure both _keySamplesMap and _keyLimitedMap are initialized.
58         // If this were allowed to vary on a per-call basis, we'd have to recheck and
59         // possibly rebuild the samples cache.  Doesn't seem worth it.
60         // This 'max samples' value only applies to keywords that are unlimited, for
61         // other keywords all the matching values are returned.  This might be a lot.
62         final int MAX_SAMPLES = 3;
63 
64         Map<String, Boolean> temp = new HashMap<String, Boolean>();
65         for (String k : keywords) {
66             temp.put(k, pluralRules.isLimited(k));
67         }
68         _keyLimitedMap = temp;
69 
70         Map<String, List<Double>> sampleMap = new HashMap<String, List<Double>>();
71         int keywordsRemaining = keywords.size();
72 
73         int limit = 128; // Math.max(5, getRepeatLimit() * MAX_SAMPLES) * 2;
74 
75         for (int i = 0; keywordsRemaining > 0 && i < limit; ++i) {
76             keywordsRemaining = addSimpleSamples(pluralRules, MAX_SAMPLES, sampleMap, keywordsRemaining, i / 2.0);
77         }
78         // Hack for Celtic
79         keywordsRemaining = addSimpleSamples(pluralRules, MAX_SAMPLES, sampleMap, keywordsRemaining, 1000000);
80 
81 
82         // collect explicit samples
83         Map<String, Set<FixedDecimal>> sampleFractionMap = new HashMap<String, Set<FixedDecimal>>();
84         Set<FixedDecimal> mentioned = new TreeSet<FixedDecimal>();
85         // make sure that there is at least one 'other' value
86         Map<String, Set<FixedDecimal>> foundKeywords = new HashMap<String, Set<FixedDecimal>>();
87         for (FixedDecimal s : mentioned) {
88             String keyword = pluralRules.select(s);
89             addRelation(foundKeywords, keyword, s);
90         }
91         main:
92             if (foundKeywords.size() != keywords.size()) {
93                 for (int i = 1; i < 1000; ++i) {
94                     boolean done = addIfNotPresent(i, mentioned, foundKeywords);
95                     if (done) break main;
96                 }
97                 // if we are not done, try tenths
98                 for (int i = 10; i < 1000; ++i) {
99                     boolean done = addIfNotPresent(i/10d, mentioned, foundKeywords);
100                     if (done) break main;
101                 }
102                 System.out.println("Failed to find sample for each keyword: " + foundKeywords + "\n\t" + pluralRules + "\n\t" + mentioned);
103             }
104         mentioned.add(new FixedDecimal(0)); // always there
105         mentioned.add(new FixedDecimal(1)); // always there
106         mentioned.add(new FixedDecimal(2)); // always there
107         mentioned.add(new FixedDecimal(0.1,1)); // always there
108         mentioned.add(new FixedDecimal(1.99,2)); // always there
109         mentioned.addAll(fractions(mentioned));
110         for (FixedDecimal s : mentioned) {
111             String keyword = pluralRules.select(s);
112             Set<FixedDecimal> list = sampleFractionMap.get(keyword);
113             if (list == null) {
114                 list = new LinkedHashSet<FixedDecimal>(); // will be sorted because the iteration is
115                 sampleFractionMap.put(keyword, list);
116             }
117             list.add(s);
118         }
119 
120         if (keywordsRemaining > 0) {
121             for (String k : keywords) {
122                 if (!sampleMap.containsKey(k)) {
123                     sampleMap.put(k, Collections.<Double>emptyList());
124                 }
125                 if (!sampleFractionMap.containsKey(k)) {
126                     sampleFractionMap.put(k, Collections.<FixedDecimal>emptySet());
127                 }
128             }
129         }
130 
131         // Make lists immutable so we can return them directly
132         for (Entry<String, List<Double>> entry : sampleMap.entrySet()) {
133             sampleMap.put(entry.getKey(), Collections.unmodifiableList(entry.getValue()));
134         }
135         for (Entry<String, Set<FixedDecimal>> entry : sampleFractionMap.entrySet()) {
136             sampleFractionMap.put(entry.getKey(), Collections.unmodifiableSet(entry.getValue()));
137         }
138         _keySamplesMap = sampleMap;
139         _keyFractionSamplesMap = sampleFractionMap;
140         _fractionSamples = Collections.unmodifiableSet(mentioned);
141     }
142 
addSimpleSamples(PluralRules pluralRules, final int MAX_SAMPLES, Map<String, List<Double>> sampleMap, int keywordsRemaining, double val)143     private int addSimpleSamples(PluralRules pluralRules, final int MAX_SAMPLES, Map<String, List<Double>> sampleMap,
144             int keywordsRemaining, double val) {
145         String keyword = pluralRules.select(val);
146         boolean keyIsLimited = _keyLimitedMap.get(keyword);
147 
148         List<Double> list = sampleMap.get(keyword);
149         if (list == null) {
150             list = new ArrayList<Double>(MAX_SAMPLES);
151             sampleMap.put(keyword, list);
152         } else if (!keyIsLimited && list.size() == MAX_SAMPLES) {
153             return keywordsRemaining;
154         }
155         list.add(Double.valueOf(val));
156 
157         if (!keyIsLimited && list.size() == MAX_SAMPLES) {
158             --keywordsRemaining;
159         }
160         return keywordsRemaining;
161     }
162 
addRelation(Map<String, Set<FixedDecimal>> foundKeywords, String keyword, FixedDecimal s)163     private void addRelation(Map<String, Set<FixedDecimal>> foundKeywords, String keyword, FixedDecimal s) {
164         Set<FixedDecimal> set = foundKeywords.get(keyword);
165         if (set == null) {
166             foundKeywords.put(keyword, set = new HashSet<FixedDecimal>());
167         }
168         set.add(s);
169     }
170 
addIfNotPresent(double d, Set<FixedDecimal> mentioned, Map<String, Set<FixedDecimal>> foundKeywords)171     private boolean addIfNotPresent(double d, Set<FixedDecimal> mentioned, Map<String, Set<FixedDecimal>> foundKeywords) {
172         FixedDecimal numberInfo = new FixedDecimal(d);
173         String keyword = pluralRules.select(numberInfo);
174         if (!foundKeywords.containsKey(keyword) || keyword.equals("other")) {
175             addRelation(foundKeywords, keyword, numberInfo);
176             mentioned.add(numberInfo);
177             if (keyword.equals("other")) {
178                 if (foundKeywords.get("other").size() > 1) {
179                     return true;
180                 }
181             }
182         }
183         return false;
184     }
185 
186     private static final int[] TENS = {1, 10, 100, 1000, 10000, 100000, 1000000};
187 
188     private static final int LIMIT_FRACTION_SAMPLES = 3;
189 
190 
fractions(Set<FixedDecimal> original)191     private Set<FixedDecimal> fractions(Set<FixedDecimal> original) {
192         Set<FixedDecimal> toAddTo = new HashSet<FixedDecimal>();
193 
194         Set<Integer> result = new HashSet<Integer>();
195         for (FixedDecimal base1 : original) {
196             result.add((int)base1.integerValue);
197         }
198         List<Integer> ints = new ArrayList<Integer>(result);
199         Set<String> keywords = new HashSet<String>();
200 
201         for (int j = 0; j < ints.size(); ++j) {
202             Integer base = ints.get(j);
203             String keyword = pluralRules.select(base);
204             if (keywords.contains(keyword)) {
205                 continue;
206             }
207             keywords.add(keyword);
208             toAddTo.add(new FixedDecimal(base,1)); // add .0
209             toAddTo.add(new FixedDecimal(base,2)); // add .00
210             Integer fract = getDifferentCategory(ints, keyword);
211             if (fract >= TENS[LIMIT_FRACTION_SAMPLES-1]) { // make sure that we always get the value
212                 toAddTo.add(new FixedDecimal(base + "." + fract));
213             } else {
214                 for (int visibleFractions = 1; visibleFractions < LIMIT_FRACTION_SAMPLES; ++visibleFractions) {
215                     for (int i = 1; i <= visibleFractions; ++i) {
216                         // with visible fractions = 3, and fract = 1, then we should get x.10, 0.01
217                         // with visible fractions = 3, and fract = 15, then we should get x.15, x.15
218                         if (fract >= TENS[i]) {
219                             continue;
220                         }
221                         toAddTo.add(new FixedDecimal(base + fract/(double)TENS[i], visibleFractions));
222                     }
223                 }
224             }
225         }
226         return toAddTo;
227     }
228 
getDifferentCategory(List<Integer> ints, String keyword)229     private Integer getDifferentCategory(List<Integer> ints, String keyword) {
230         for (int i = ints.size() - 1; i >= 0; --i) {
231             Integer other = ints.get(i);
232             String keywordOther = pluralRules.select(other);
233             if (!keywordOther.equals(keyword)) {
234                 return other;
235             }
236         }
237         return 37;
238     }
239 
240     /**
241      * @internal
242      * @deprecated This API is ICU internal only.
243      */
244     @Deprecated
getStatus(String keyword, int offset, Set<Double> explicits, Output<Double> uniqueValue)245     public KeywordStatus getStatus(String keyword, int offset, Set<Double> explicits, Output<Double> uniqueValue) {
246         if (uniqueValue != null) {
247             uniqueValue.value = null;
248         }
249 
250         if (!pluralRules.getKeywords().contains(keyword)) {
251             return KeywordStatus.INVALID;
252         }
253         Collection<Double> values = pluralRules.getAllKeywordValues(keyword);
254         if (values == null) {
255             return KeywordStatus.UNBOUNDED;
256         }
257         int originalSize = values.size();
258 
259         if (explicits == null) {
260             explicits = Collections.emptySet();
261         }
262 
263         // Quick check on whether there are multiple elements
264 
265         if (originalSize > explicits.size()) {
266             if (originalSize == 1) {
267                 if (uniqueValue != null) {
268                     uniqueValue.value = values.iterator().next();
269                 }
270                 return KeywordStatus.UNIQUE;
271             }
272             return KeywordStatus.BOUNDED;
273         }
274 
275         // Compute if the quick test is insufficient.
276 
277         HashSet<Double> subtractedSet = new HashSet<Double>(values);
278         for (Double explicit : explicits) {
279             subtractedSet.remove(explicit - offset);
280         }
281         if (subtractedSet.size() == 0) {
282             return KeywordStatus.SUPPRESSED;
283         }
284 
285         if (uniqueValue != null && subtractedSet.size() == 1) {
286             uniqueValue.value = subtractedSet.iterator().next();
287         }
288 
289         return originalSize == 1 ? KeywordStatus.UNIQUE : KeywordStatus.BOUNDED;
290     }
291 
getKeySamplesMap()292     Map<String, List<Double>> getKeySamplesMap() {
293         return _keySamplesMap;
294     }
295 
getKeyFractionSamplesMap()296     Map<String, Set<FixedDecimal>> getKeyFractionSamplesMap() {
297         return _keyFractionSamplesMap;
298     }
299 
getFractionSamples()300     Set<FixedDecimal> getFractionSamples() {
301         return _fractionSamples;
302     }
303 
304     /**
305      * Returns all the values that trigger this keyword, or null if the number of such
306      * values is unlimited.
307      *
308      * @param keyword the keyword
309      * @return the values that trigger this keyword, or null.  The returned collection
310      * is immutable. It will be empty if the keyword is not defined.
311      * @stable ICU 4.8
312      */
313 
getAllKeywordValues(String keyword)314     Collection<Double> getAllKeywordValues(String keyword) {
315         // HACK for now
316         if (!pluralRules.getKeywords().contains(keyword)) {
317             return Collections.<Double>emptyList();
318         }
319         Collection<Double> result = getKeySamplesMap().get(keyword);
320 
321         // We depend on MAX_SAMPLES here.  It's possible for a conjunction
322         // of unlimited rules that 'looks' unlimited to return a limited
323         // number of values.  There's no bounds to this limited number, in
324         // general, because you can construct arbitrarily complex rules.  Since
325         // we always generate 3 samples if a rule is really unlimited, that's
326         // where we put the cutoff.
327         if (result.size() > 2 && !_keyLimitedMap.get(keyword)) {
328             return null;
329         }
330         return result;
331     }
332 }
333