1 package org.unicode.cldr.test;
2 
3 import java.io.DataInputStream;
4 import java.io.File;
5 import java.io.FileNotFoundException;
6 import java.io.IOException;
7 import java.io.InputStream;
8 import java.lang.ref.Reference;
9 import java.lang.ref.SoftReference;
10 import java.util.Collections;
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.Map;
14 import java.util.Set;
15 
16 import org.unicode.cldr.tool.CldrVersion;
17 import org.unicode.cldr.util.CLDRConfig;
18 import org.unicode.cldr.util.CLDRFile;
19 import org.unicode.cldr.util.CldrUtility;
20 import org.unicode.cldr.util.Factory;
21 import org.unicode.cldr.util.InputStreamFactory;
22 import org.unicode.cldr.util.Pair;
23 import org.unicode.cldr.util.PathHeader;
24 import org.unicode.cldr.util.RegexLookup;
25 import org.unicode.cldr.util.StringId;
26 
27 import com.ibm.icu.util.ICUUncheckedIOException;
28 
29 /**
30  * This class should be used to detect when a path should be included in the set
31  * of outdated items, because the value in the locale has not changed since the
32  * last time the English changed. For efficiency, it only keeps a record of
33  * those values in trunk that are out of date.
34  * <p>
35  * That is, to get the set of outdated values, the caller should do the following:
36  * <ol>
37  * <li>Test to see if the user has voted for a value for the path. If so, don't include.
38  * <li>Test to see if the winning value for the path is different from the trunk value. If so, don't include.
39  * <li>Test with isOutdated(path) to see if the trunk value was outdated. If not, don't include.
40  * <li>Otherwise, include this path in the set of outdated items.
41  * </ol>
42  * <p>
43  * To update the data file, use GenerateBirth.java.
44  */
45 public class OutdatedPaths {
46     public static String FORMAT_KEY = "odp-1";
47     public static final String NO_VALUE = "�";
48 
49     public static final String OUTDATED_DIR = "births/";
50     public static final String OUTDATED_ENGLISH_DATA = "outdatedEnglish.data";
51     public static final String OUTDATED_DATA = "outdated.data";
52 
53     private static final boolean DEBUG = CldrUtility.getProperty("OutdatedPathsDebug", false);
54 
55     private final Map<String, Set<Long>> localeToData = new HashMap<>();
56     private final Map<Long, Pair<CldrVersion,String>> pathToBirthNPrevious = new HashMap<>();
57 
58     /**
59      * Creates a new OutdatedPaths, using the data file "outdated.data" in the same directory as this class.
60      *
61      * @param version
62      */
OutdatedPaths()63     public OutdatedPaths() {
64         this(null);
65     }
66 
67     /**
68      * Loads the data from the specified directory, using the data file "outdated.data".
69      *
70      * @param directory
71      */
OutdatedPaths(String directory)72     public OutdatedPaths(String directory) {
73         Map<Long, PathHeader> id2header = new HashMap<>(); // for debugging
74 
75         readLocaleToPaths(directory, id2header);
76 
77         // now previous English
78 
79         readBirthValues(directory, id2header, pathToBirthNPrevious);
80     }
81 
readLocaleToPaths(String directory, Map<Long, PathHeader> id2header)82     private void readLocaleToPaths(String directory, Map<Long, PathHeader> id2header) {
83         try {
84             DataInputStream dataIn = openDataInput(directory, OUTDATED_DATA);
85             String key = dataIn.readUTF();
86             if (!OutdatedPaths.FORMAT_KEY.equals(key)) {
87                 throw new IllegalArgumentException("Mismatch in FORMAT_KEY: expected=" + OutdatedPaths.FORMAT_KEY + ", read=" + key);
88             }
89             if (DEBUG) {
90                 Factory factory = CLDRConfig.getInstance().getMainAndAnnotationsFactory();
91                 id2header = getIdToPath(factory);
92             }
93             while (true) {
94                 String locale = dataIn.readUTF();
95                 if (locale.equals("$END$")) {
96                     break;
97                 }
98                 if (DEBUG) {
99                     System.out.println("OutdatedPaths: Locale: " + locale);
100                 }
101                 final HashSet<Long> data = new HashSet<>();
102                 int size = dataIn.readInt();
103                 for (int i = 0; i < size; ++i) {
104                     long item = dataIn.readLong();
105                     data.add(item);
106                     if (DEBUG) {
107                         System.out.println(locale + "\t" + id2header.get(item));
108                     }
109                 }
110                 localeToData.put(locale, Collections.unmodifiableSet(data));
111             }
112             dataIn.close();
113         } catch (IOException e) {
114             throw new ICUUncheckedIOException("Data Not Available", e);
115         }
116     }
117 
readBirthValues(String outdatedDirectory, Map<Long, PathHeader> id2header, Map<Long, Pair<CldrVersion, String>> pathToBirthNPrevious2)118     public static void readBirthValues(String outdatedDirectory, Map<Long, PathHeader> id2header,
119         Map<Long, Pair<CldrVersion, String>> pathToBirthNPrevious2) {
120         try {
121             DataInputStream dataIn = openDataInput(outdatedDirectory, OUTDATED_ENGLISH_DATA);
122             String key = dataIn.readUTF();
123             if (!OutdatedPaths.FORMAT_KEY.equals(key)) {
124                 throw new IllegalArgumentException("Mismatch in FORMAT_KEY: expected=" + OutdatedPaths.FORMAT_KEY + ", read=" + key);
125             }
126 
127             int size = dataIn.readInt();
128             if (DEBUG) {
129                 System.out.println("English Data");
130             }
131             for (int i = 0; i < size; ++i) {
132                 long pathId = dataIn.readLong();
133                 String previous = dataIn.readUTF();
134                 CldrVersion birth = CldrVersion.from(dataIn.readUTF());
135 
136                 if (DEBUG) {
137                     System.out.println("en\t(" + previous + ")"
138                         + (id2header == null ? "" : "\t" + id2header.get(pathId)));
139                 }
140                 pathToBirthNPrevious2.put(pathId, Pair.of(birth,previous).freeze());
141             }
142             String finalCheck = dataIn.readUTF();
143             if (!finalCheck.equals("$END$")) {
144                 throw new IllegalArgumentException("Corrupted " + OUTDATED_ENGLISH_DATA);
145             }
146             dataIn.close();
147         } catch (IOException e) {
148             throw new ICUUncheckedIOException("Data Not Available", e);
149         }
150     }
151 
getIdToPath(Factory factory)152     public Map<Long, PathHeader> getIdToPath(Factory factory) {
153         Map<Long, PathHeader> result = new HashMap<>();
154         CLDRFile english = factory.make("en", true);
155         PathHeader.Factory pathHeaders = PathHeader.getFactory(english);
156         for (String s : english) {
157             long id = StringId.getId(s);
158             PathHeader pathHeader = pathHeaders.fromPath(s);
159             result.put(id, pathHeader);
160         }
161         return result;
162     }
163 
openDataInput(String directory, String filename)164     private static DataInputStream openDataInput(String directory, String filename) throws FileNotFoundException {
165         String dataFileName = filename;
166         InputStream fileInputStream = directory == null
167             ? CldrUtility.getInputStream(OUTDATED_DIR + dataFileName) :
168                 //: new FileInputStream(new File(directory, dataFileName));
169                 InputStreamFactory.createInputStream(new File(directory, dataFileName));
170             DataInputStream dataIn = new DataInputStream(fileInputStream);
171             return dataIn;
172     }
173 
174     /**
175      * Returns true if the value for the path is outdated in trunk. See class
176      * description for more info.
177      *
178      * @param distinguishedPath
179      * @return true if the string is outdated
180      */
isOutdated(String locale, String distinguishedPath)181     public boolean isOutdated(String locale, String distinguishedPath) {
182         Set<Long> data = localeToData.get(locale);
183         if (data == null) {
184             return false;
185         }
186         long id = StringId.getId(distinguishedPath);
187         boolean result = data.contains(id);
188         if (result == false) {
189             return false;
190         }
191         Boolean toSkip = SKIP_PATHS.get(distinguishedPath);
192         if (toSkip != null) {
193             return false;
194         }
195         return result;
196     }
197 
198     /**
199      * The same as isOutdated, but also returns paths that aren't skipped.
200      *
201      * @param locale
202      * @param distinguishedPath
203      * @return
204      */
isRawOutdated(String locale, String distinguishedPath)205     public boolean isRawOutdated(String locale, String distinguishedPath) {
206         Set<Long> data = localeToData.get(locale);
207         if (data == null) {
208             return false;
209         }
210         long id = StringId.getId(distinguishedPath);
211         return data.contains(id);
212     }
213 
214     /**
215      * Is this path to be skipped? (because the English is normally irrelevant).
216      *
217      * @param distinguishedPath
218      * @return
219      */
isSkipped(String distinguishedPath)220     public boolean isSkipped(String distinguishedPath) {
221         return SKIP_PATHS.get(distinguishedPath) != null;
222     }
223 
224     /**
225      * Returns true if the value for the path is outdated in trunk. See class
226      * description for more info.
227      *
228      * @param distinguishedPath
229      * @return true if the string is outdated
230      */
getPreviousEnglish(String distinguishedPath)231     public String getPreviousEnglish(String distinguishedPath) {
232         long id = StringId.getId(distinguishedPath);
233         Pair<CldrVersion, String> value = pathToBirthNPrevious.get(id);
234         return value == null ? null : value.getSecond();
235     }
236 
getEnglishBirth(String distinguishedPath)237     public CldrVersion getEnglishBirth(String distinguishedPath) {
238         long id = StringId.getId(distinguishedPath);
239         Pair<CldrVersion, String> value = pathToBirthNPrevious.get(id);
240         return value == null ? null : value.getFirst();
241     }
242 
243     static RegexLookup<Boolean> SKIP_PATHS = new RegexLookup<Boolean>()
244         .add("/exemplarCharacters", true)
245         .add("/references", true)
246         .add("/delimiters/[^/]*uotation", true)
247         .add("/posix", true)
248         .add("/pattern", true)
249         .add("/fields/field[^/]*/displayName", true)
250         .add("/dateFormatItem", true)
251         .add("/numbers/symbols", true)
252         .add("/fallback", true)
253         .add("/quarters", true)
254         .add("/months", true);
255 
256     /**
257      * Returns the number of outdated paths.
258      *
259      * @param locale
260      * @return number of outdated paths.
261      */
countOutdated(String locale)262     public int countOutdated(String locale) {
263         Set<Long> data = localeToData.get(locale);
264         return data == null ? 0 : data.size();
265     }
266 
getInstance()267     public static OutdatedPaths getInstance() {
268         OutdatedPaths outdatedPaths = SINGLETON.get();
269         if (outdatedPaths == null) {
270             outdatedPaths = new OutdatedPaths();
271             SINGLETON = new SoftReference<>(outdatedPaths);
272         }
273         return outdatedPaths;
274     }
275 
276     private static Reference<OutdatedPaths> SINGLETON = new SoftReference<>(null);
277 }
278