1 package org.unicode.cldr.test;
2 
3 import java.io.DataInputStream;
4 import java.io.File;
5 import java.io.FileNotFoundException;
6 import java.io.IOException;
7 import java.io.InputStream;
8 import java.lang.ref.Reference;
9 import java.lang.ref.SoftReference;
10 import java.util.Collections;
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.Map;
14 import java.util.Set;
15 
16 import org.unicode.cldr.util.CLDRConfig;
17 import org.unicode.cldr.util.CLDRFile;
18 import org.unicode.cldr.util.CldrUtility;
19 import org.unicode.cldr.util.Factory;
20 import org.unicode.cldr.util.InputStreamFactory;
21 import org.unicode.cldr.util.PathHeader;
22 import org.unicode.cldr.util.RegexLookup;
23 import org.unicode.cldr.util.StringId;
24 
25 import com.ibm.icu.util.ICUUncheckedIOException;
26 
27 /**
28  * This class should be used to detect when a path should be included in the set
29  * of outdated items, because the value in the locale has not changed since the
30  * last time the English changed. For efficiency, it only keeps a record of
31  * those values in trunk that are out of date.
32  * <p>
33  * That is, to get the set of outdated values, the caller should do the following:
34  * <ol>
35  * <li>Test to see if the user has voted for a value for the path. If so, don't include.
36  * <li>Test to see if the winning value for the path is different from the trunk value. If so, don't include.
37  * <li>Test with isOutdated(path) to see if the trunk value was outdated. If not, don't include.
38  * <li>Otherwise, include this path in the set of outdated items.
39  * </ol>
40  * <p>
41  * To update the data file, use GenerateBirth.java.
42  */
43 public class OutdatedPaths {
44 
45     public static final String OUTDATED_DIR = "births/";
46     public static final String OUTDATED_ENGLISH_DATA = "outdatedEnglish.data";
47     public static final String OUTDATED_DATA = "outdated.data";
48 
49     private static final boolean DEBUG = CldrUtility.getProperty("OutdatedPathsDebug", false);
50 
51     private final HashMap<String, Set<Long>> localeToData = new HashMap<String, Set<Long>>();
52     private final HashMap<Long, String> pathToPrevious = new HashMap<Long, String>();
53 
54     /**
55      * Creates a new OutdatedPaths, using the data file "outdated.data" in the same directory as this class.
56      *
57      * @param version
58      */
OutdatedPaths()59     public OutdatedPaths() {
60         this(null);
61     }
62 
63     /**
64      * Loads the data from the specified directory, using the data file "outdated.data".
65      *
66      * @param directory
67      */
OutdatedPaths(String directory)68     public OutdatedPaths(String directory) {
69         try {
70             DataInputStream dataIn = openDataInput(directory, OUTDATED_DATA);
71             Map<Long, PathHeader> id2header = new HashMap<Long, PathHeader>();
72             if (DEBUG) {
73                 Factory factory = CLDRConfig.getInstance().getMainAndAnnotationsFactory();
74                 id2header = getIdToPath(factory);
75             }
76             while (true) {
77                 String locale = dataIn.readUTF();
78                 if (locale.equals("$END$")) {
79                     break;
80                 }
81                 if (DEBUG) {
82                     System.out.println("OutdatedPaths: Locale: " + locale);
83                 }
84                 final HashSet<Long> data = new HashSet<Long>();
85                 int size = dataIn.readInt();
86                 for (int i = 0; i < size; ++i) {
87                     long item = dataIn.readLong();
88                     data.add(item);
89                     if (DEBUG) {
90                         System.out.println(locale + "\t" + id2header.get(item));
91                     }
92                 }
93                 localeToData.put(locale, Collections.unmodifiableSet(data));
94             }
95             dataIn.close();
96 
97             // now previous English
98 
99             dataIn = openDataInput(directory, OUTDATED_ENGLISH_DATA);
100             int size = dataIn.readInt();
101             if (DEBUG) {
102                 System.out.println("English Data");
103             }
104             for (int i = 0; i < size; ++i) {
105                 long pathId = dataIn.readLong();
106                 String previous = dataIn.readUTF();
107                 if (DEBUG) {
108                     System.out.println("en\t(" + previous + ")\t" + id2header.get(pathId));
109                 }
110                 pathToPrevious.put(pathId, previous);
111             }
112             String finalCheck = dataIn.readUTF();
113             if (!finalCheck.equals("$END$")) {
114                 throw new IllegalArgumentException("Corrupted " + OUTDATED_ENGLISH_DATA);
115             }
116             dataIn.close();
117 
118         } catch (IOException e) {
119             throw new ICUUncheckedIOException("Data Not Available", e);
120         }
121     }
122 
getIdToPath(Factory factory)123     public Map<Long, PathHeader> getIdToPath(Factory factory) {
124         Map<Long, PathHeader> result = new HashMap<Long, PathHeader>();
125         CLDRFile english = factory.make("en", true);
126         PathHeader.Factory pathHeaders = PathHeader.getFactory(english);
127         for (String s : english) {
128             long id = StringId.getId(s);
129             PathHeader pathHeader = pathHeaders.fromPath(s);
130             result.put(id, pathHeader);
131         }
132         return result;
133     }
134 
135     @SuppressWarnings("resource")
openDataInput(String directory, String filename)136     private DataInputStream openDataInput(String directory, String filename) throws FileNotFoundException {
137         String dataFileName = filename;
138         InputStream fileInputStream = directory == null
139             ? CldrUtility.getInputStream(OUTDATED_DIR + dataFileName) :
140             //: new FileInputStream(new File(directory, dataFileName));
141             InputStreamFactory.createInputStream(new File(directory, dataFileName));
142         DataInputStream dataIn = new DataInputStream(fileInputStream);
143         return dataIn;
144     }
145 
146     /**
147      * Returns true if the value for the path is outdated in trunk. See class
148      * description for more info.
149      *
150      * @param distinguishedPath
151      * @return true if the string is outdated
152      */
isOutdated(String locale, String distinguishedPath)153     public boolean isOutdated(String locale, String distinguishedPath) {
154         Set<Long> data = localeToData.get(locale);
155         if (data == null) {
156             return false;
157         }
158         long id = StringId.getId(distinguishedPath);
159         boolean result = data.contains(id);
160         if (result == false) {
161             return false;
162         }
163         Boolean toSkip = SKIP_PATHS.get(distinguishedPath);
164         if (toSkip != null) {
165             return false;
166         }
167         return result;
168     }
169 
170     /**
171      * The same as isOutdated, but also returns paths that aren't skipped.
172      *
173      * @param locale
174      * @param distinguishedPath
175      * @return
176      */
isRawOutdated(String locale, String distinguishedPath)177     public boolean isRawOutdated(String locale, String distinguishedPath) {
178         Set<Long> data = localeToData.get(locale);
179         if (data == null) {
180             return false;
181         }
182         long id = StringId.getId(distinguishedPath);
183         return data.contains(id);
184     }
185 
186     /**
187      * Is this path to be skipped? (because the English is normally irrelevant).
188      *
189      * @param distinguishedPath
190      * @return
191      */
isSkipped(String distinguishedPath)192     public boolean isSkipped(String distinguishedPath) {
193         return SKIP_PATHS.get(distinguishedPath) != null;
194     }
195 
196     /**
197      * Returns true if the value for the path is outdated in trunk. See class
198      * description for more info.
199      *
200      * @param distinguishedPath
201      * @return true if the string is outdated
202      */
getPreviousEnglish(String distinguishedPath)203     public String getPreviousEnglish(String distinguishedPath) {
204         long id = StringId.getId(distinguishedPath);
205         return pathToPrevious.get(id);
206     }
207 
208     static RegexLookup<Boolean> SKIP_PATHS = new RegexLookup<Boolean>()
209         .add("/exemplarCharacters", true)
210         .add("/references", true)
211         .add("/delimiters/[^/]*uotation", true)
212         .add("/posix", true)
213         .add("/pattern", true)
214         .add("/fields/field[^/]*/displayName", true)
215         .add("/dateFormatItem", true)
216         .add("/numbers/symbols", true)
217         .add("/fallback", true)
218         .add("/quarters", true)
219         .add("/months", true);
220 
221     /**
222      * Returns the number of outdated paths.
223      *
224      * @param locale
225      * @return number of outdated paths.
226      */
countOutdated(String locale)227     public int countOutdated(String locale) {
228         Set<Long> data = localeToData.get(locale);
229         return data == null ? 0 : data.size();
230     }
231 
getInstance()232     public static OutdatedPaths getInstance() {
233         OutdatedPaths outdatedPaths = SINGLETON.get();
234         if (outdatedPaths == null) {
235             outdatedPaths = new OutdatedPaths();
236             SINGLETON = new SoftReference<OutdatedPaths>(outdatedPaths);
237         }
238         return outdatedPaths;
239     }
240 
241     private static Reference<OutdatedPaths> SINGLETON = new SoftReference<OutdatedPaths>(null);
242 }
243