1 package org.unicode.cldr.tool;
2 
3 import java.io.DataOutputStream;
4 import java.io.File;
5 import java.io.FileOutputStream;
6 import java.io.IOException;
7 import java.io.PrintWriter;
8 import java.util.ArrayList;
9 import java.util.Collections;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.List;
13 import java.util.Map;
14 import java.util.Map.Entry;
15 import java.util.Set;
16 import java.util.TreeMap;
17 import java.util.TreeSet;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20 
21 import org.unicode.cldr.draft.FileUtilities;
22 import org.unicode.cldr.test.OutdatedPaths;
23 import org.unicode.cldr.tool.Option.Options;
24 import org.unicode.cldr.util.CLDRConfig;
25 import org.unicode.cldr.util.CLDRFile;
26 import org.unicode.cldr.util.CLDRPaths;
27 import org.unicode.cldr.util.CldrUtility;
28 import org.unicode.cldr.util.Factory;
29 import org.unicode.cldr.util.LanguageTagParser;
30 import org.unicode.cldr.util.Pair;
31 import org.unicode.cldr.util.PathUtilities;
32 import org.unicode.cldr.util.PatternCache;
33 import org.unicode.cldr.util.SimpleFactory;
34 import org.unicode.cldr.util.StringId;
35 
36 import com.google.common.base.Objects;
37 import com.ibm.icu.impl.Relation;
38 import com.ibm.icu.impl.Row;
39 import com.ibm.icu.impl.Row.R3;
40 import com.ibm.icu.lang.CharSequences;
41 import com.ibm.icu.util.ICUException;
42 
43 public class GenerateBirth {
44     private static boolean DEBUG = false;
45 
46     private static final List<CldrVersion> VERSIONS_WITH_TRUNK_DESCENDING = CldrVersion.CLDR_VERSIONS_DESCENDING;
47 
48     static final CldrVersion[] VERSIONS = VERSIONS_WITH_TRUNK_DESCENDING.toArray(
49         new CldrVersion[VERSIONS_WITH_TRUNK_DESCENDING.size()]); // hack for now; should change to list
50 
51     static final Factory[] factories = new Factory[VERSIONS.length - 1]; // hack for now; should change to list
52 
53     final static Options myOptions = new Options()
54         .add("target", ".*", CLDRPaths.BIRTH_DATA_DIR,
55             "The target directory for building the text files that show the results.")
56         .add("log", ".*", CLDRPaths.AUX_DIRECTORY + "births/" + CldrVersion.trunk.getVersionInfo().getVersionString(2, 4),
57             "The target directory for building the text files that show the results.")
58         .add(
59             "file",
60             ".*",
61             ".*",
62             "Filter the information based on file name, using a regex argument. The '.xml' is removed from the file before filtering")
63         .add("previous", "Stop after writing the English previous data.")
64         .add("debug", "Debug");
65 
main(String[] args)66     public static void main(String[] args) throws IOException {
67         System.out.println("Run TestOutdatedPaths.java -v to see a listing of changes.");
68         myOptions.parse(args, true);
69         try {
70             CldrVersion.checkVersions(); // verify versions up to date
71         } catch (Exception e) {
72             throw new ICUException("This tool can only be run if the archive of released versions matching CldrVersion is available.", e);
73         }
74 
75         // set up the CLDR Factories
76 
77         DEBUG = myOptions.get("debug").doesOccur();
78 
79         final CLDRConfig config = CLDRConfig.getInstance();
80 
81         String filePattern = myOptions.get("file").getValue();
82 
83         ArrayList<Factory> list = new ArrayList<>();
84         for (CldrVersion version : VERSIONS) {
85             if (version == CldrVersion.unknown) {
86                 continue;
87             }
88             List<File> paths = version.getPathsForFactory();
89 //            String base = version.getBaseDirectory();
90 //            File[] paths = version.compareTo(CldrVersion.v27_0) > 0 ? // warning, order is reversed
91 //                new File[] { new File(base + "common/main/") } :
92 //                    new File[] { new File(base + "common/main/"), new File(base + "common/annotations/") };
93 
94             System.out.println(version + ", " + paths);
95             Factory aFactory = SimpleFactory.make(paths.toArray(new File[paths.size()]), filePattern);
96             list.add(aFactory);
97         }
98         list.toArray(factories);
99 
100         final String dataDirectory = myOptions.get("target").getValue();
101         File dataDir = new File(dataDirectory);
102         if (!dataDir.isDirectory()) {
103             throw new IllegalArgumentException("-t value is not directory: " + dataDir);
104         }
105 
106         // load and process English
107 
108         String logDirectory = myOptions.get("log").getValue();
109 
110         System.out.println("en");
111         Births english = new Births("en");
112         english.writeBirth(logDirectory, "en", null);
113         english.writeBirthValues(dataDirectory + "/" + OutdatedPaths.OUTDATED_ENGLISH_DATA);
114 
115         Map<Long, Pair<CldrVersion, String>> pathToPrevious = new HashMap<>();
116 
117         // Verify that the write of English worked
118 
119         OutdatedPaths.readBirthValues(dataDirectory, null, pathToPrevious);
120         for (Entry<String, R3<CldrVersion, String, String>> entry : english.pathToBirthCurrentPrevious.entrySet()) {
121             String path = entry.getKey();
122             String previous = entry.getValue().get2();
123             CldrVersion birth = entry.getValue().get0();
124             if (previous == null) {
125                 previous = OutdatedPaths.NO_VALUE;
126             }
127             long id = StringId.getId(path);
128             Pair<CldrVersion, String> readValue = pathToPrevious.get(id);
129             CldrVersion birthRead = readValue == null ? null : readValue.getFirst();
130             String previousRead = readValue == null ? null : readValue.getSecond();
131             if (!Objects.equal(previous, previousRead) || !Objects.equal(birth, birthRead)) {
132                 throw new IllegalArgumentException("path: " + path
133                     + "\tprevious: " + previous + "\tread: " + readValue
134                     + "\tbirth: " + birth + "\tread: " + birthRead);
135             }
136         }
137 
138         // Set up the binary data files for all others
139 
140         File file = new File(dataDirectory + "/" + OutdatedPaths.OUTDATED_DATA);
141         final String outputDataFile = PathUtilities.getNormalizedPathString(file);
142         System.out.println("Writing data: " + outputDataFile);
143         DataOutputStream dataOut = new DataOutputStream(new FileOutputStream(file));
144         dataOut.writeUTF(OutdatedPaths.FORMAT_KEY);
145 
146         // Load and process all the locales
147 
148         TreeMap<String, Set<String>> localeToNewer = new TreeMap<>();
149         LanguageTagParser ltp = new LanguageTagParser();
150         for (String fileName : factories[0].getAvailable()) {
151             if (fileName.equals("en")) {
152                 continue;
153             }
154             if (!ltp.set(fileName).getRegion().isEmpty()) {
155                 continue; // skip region locales
156             }
157             // TODO skip default content locales
158             System.out.println(fileName);
159             Births other = new Births(fileName);
160             Set<String> newer = other.writeBirth(logDirectory, fileName, english);
161 
162             dataOut.writeUTF(fileName);
163             dataOut.writeInt(newer.size());
164             for (String item : newer) {
165                 long id = StringId.getId(item);
166                 dataOut.writeLong(id);
167                 if (DEBUG) {
168                     System.out.println(id + "\t" + item);
169                 }
170             }
171             localeToNewer.put(fileName, newer);
172         }
173         dataOut.writeUTF("$END$");
174         dataOut.close();
175 
176         // Doublecheck the data
177 
178         OutdatedPaths outdatedPaths = new OutdatedPaths(dataDirectory);
179         Set<String> needPrevious = new TreeSet<>();
180         int errorCount = 0;
181         for (Entry<String, Set<String>> localeAndNewer : localeToNewer.entrySet()) {
182             String locale = localeAndNewer.getKey();
183             System.out.println("Checking " + locale);
184             Set<String> newer = localeAndNewer.getValue();
185             if (newer.size() != outdatedPaths.countOutdated(locale)) {
186                 throw new IllegalArgumentException("broken: " + locale);
187             }
188             for (String xpath : newer) {
189                 boolean isOutdated = outdatedPaths.isRawOutdated(locale, xpath);
190                 if (!isOutdated) {
191                     System.out.println("Error, broken locale: " + locale + "\t" + StringId.getId(xpath) + "\t" + xpath);
192                     ++errorCount;
193                 }
194                 if (outdatedPaths.isSkipped(xpath)) {
195                     continue;
196                 }
197                 String previous = outdatedPaths.getPreviousEnglish(xpath);
198                 if (previous.isEmpty() != english.emptyPrevious.contains(xpath)) {
199                     System.out.println("previous.isEmpty() != original " + locale + "\t" + StringId.getId(xpath) + "\t"
200                         + xpath);
201                     needPrevious.add(xpath);
202                     ++errorCount;
203                 }
204             }
205         }
206         if (errorCount != 0) {
207             throw new IllegalArgumentException("Done, but " + errorCount + " errors");
208         } else {
209             System.out.println("Done, no errors");
210         }
211     }
212 
213     static class Births {
214         final Relation<CldrVersion, String> birthToPaths;
215         final Map<String, Row.R3<CldrVersion, String, String>> pathToBirthCurrentPrevious;
216         final String locale;
217         static final Pattern TYPE = PatternCache.get("\\[@type=\"([^\"]*)\"");
218         final Matcher typeMatcher = TYPE.matcher("");
219         Set<String> emptyPrevious = new HashSet<>();
220 
Births(String file)221         Births(String file) {
222             locale = file;
223             CLDRFile[] files = new CLDRFile[factories.length];
224             for (int i = 0; i < factories.length; ++i) {
225                 try {
226                     files[i] = factories[i].make(file, false);
227                 } catch (Exception e) {
228                     // stop when we fail to find
229                     System.out.println("Stopped at " + file + ", " + CldrVersion.CLDR_VERSIONS_DESCENDING.get(i));
230                     //e.printStackTrace();
231                     break;
232                 }
233             }
234             birthToPaths = Relation.of(new TreeMap<CldrVersion, Set<String>>(), TreeSet.class);
235             pathToBirthCurrentPrevious = new HashMap<>();
236             for (String xpath : files[0]) {
237                 xpath = xpath.intern();
238                 if (xpath.contains("[@type=\"ar\"]")) {
239                     int debug = 0;
240                 }
241                 String base = files[0].getStringValue(xpath);
242                 String previousValue = null;
243                 int i;
244                 CLDRFile lastFile = files[0];
245                 for (i = 1; i < files.length && files[i] != null; ++i) {
246                     String previous = files[i].getStringValue(xpath);
247                     if (previous == null) {
248                         previous = OutdatedPaths.NO_VALUE; // fixNullPrevious(xpath);
249                     }
250                     if (!CharSequences.equals(base, previous)) {
251                         if (previous != null) {
252                             previousValue = previous;
253                         }
254                         break;
255                     }
256                     lastFile = files[i];
257                 }
258                 CldrVersion version = CldrVersion.from(lastFile.getDtdVersionInfo());
259                 birthToPaths.put(version, xpath);
260                 pathToBirthCurrentPrevious.put(xpath, Row.of(version, base, previousValue));
261             }
262         }
263 
fixNullPrevious(String xpath)264         private String fixNullPrevious(String xpath) {
265             if (typeMatcher.reset(xpath).find()) {
266                 String type = typeMatcher.group(1);
267                 if (xpath.contains("metazone")) {
268                     return type.replace("_", " ");
269                 } else if (xpath.contains("zone")) {
270                     String[] splits = type.split("/");
271                     return splits[splits.length - 1].replace("_", " ");
272                 }
273                 return type;
274             }
275             return null;
276         }
277 
writeBirthValues(String file)278         public void writeBirthValues(String file) throws IOException {
279             DataOutputStream dataOut = new DataOutputStream(new FileOutputStream(file));
280             dataOut.writeUTF(OutdatedPaths.FORMAT_KEY);
281             System.out.println("Writing data: " + PathUtilities.getNormalizedPathString(file));
282             dataOut.writeInt(pathToBirthCurrentPrevious.size());
283 
284             // Load and process all the locales
285 
286             //TreeMap<String, Set<String>> localeToNewer = new TreeMap<String, Set<String>>();
287             for (Entry<String, R3<CldrVersion, String, String>> entry : pathToBirthCurrentPrevious.entrySet()) {
288                 String path = entry.getKey();
289                 R3<CldrVersion, String, String> birthCurrentPrevious = entry.getValue();
290                 CldrVersion birth = birthCurrentPrevious.get0();
291                 String current = birthCurrentPrevious.get1();
292                 String previous = birthCurrentPrevious.get2();
293                 long id = StringId.getId(path);
294                 dataOut.writeLong(id);
295                 final String previousString = previous == null ? OutdatedPaths.NO_VALUE : previous;
296                 dataOut.writeUTF(previousString);
297                 if (previous == null) {
298                     emptyPrevious.add(path);
299                 }
300                 dataOut.writeUTF(birth.toString());
301                 if (true) {
302                     System.out.println(id + "\t" + birth + "\t«" + current + "⇐" + previous + "»");
303                 }
304             }
305             dataOut.writeUTF("$END$");
306             dataOut.close();
307             emptyPrevious = Collections.unmodifiableSet(emptyPrevious);
308         }
309 
writeBirth(PrintWriter out, Births onlyNewer)310         Set<String> writeBirth(PrintWriter out, Births onlyNewer) {
311 
312             out.println("Loc\tVersion\tValue\tPrevValue\tEVersion\tEValue\tEPrevValue\tPath");
313 
314             Set<String> newer = new HashSet<>();
315             HashMap<Long, String> sanityCheck = new HashMap<>();
316             CldrVersion onlyNewerVersion = null;
317             String otherValue = "n/a";
318             String olderOtherValue = "n/a";
319             for (Entry<CldrVersion, Set<String>> entry2 : birthToPaths.keyValuesSet()) {
320                 CldrVersion version = entry2.getKey();
321                 for (String xpath : entry2.getValue()) {
322                     long id = StringId.getId(xpath);
323                     String old = sanityCheck.get(id);
324                     if (old != null) {
325                         throw new IllegalArgumentException("Path Collision " + xpath + ", old:" + old + ", id: " + id);
326                     } else {
327                         sanityCheck.put(id, xpath);
328                     }
329                     R3<CldrVersion, String, String> info = pathToBirthCurrentPrevious.get(xpath);
330                     if (onlyNewer != null) {
331 
332                         R3<CldrVersion, String, String> otherInfo = onlyNewer.pathToBirthCurrentPrevious.get(xpath);
333                         if (otherInfo == null) {
334                             continue;
335                         }
336                         // skip if not older than "comparison version"
337                         onlyNewerVersion = otherInfo.get0();
338                         if (!version.isOlderThan(onlyNewerVersion)) {
339                             continue;
340                         }
341                         otherValue = fixNull(otherInfo.get1());
342                         olderOtherValue = fixNull(otherInfo.get2());
343                         newer.add(xpath);
344                     }
345                     String value = fixNull(info.get1());
346                     String olderValue = fixNull(info.get2());
347 
348                     out.println(locale
349                         + "\t" + version
350                         + "\t" + value
351                         + "\t" + olderValue
352                         + "\t" + CldrUtility.ifNull(onlyNewerVersion, "n/a")
353                         + "\t" + otherValue
354                         + "\t" + olderOtherValue
355                         + "\t" + xpath);
356 
357                 }
358             }
359             return newer;
360         }
361 
fixNull(String value)362         private String fixNull(String value) {
363             if (value == null) {
364                 value = OutdatedPaths.NO_VALUE;
365             }
366             return value;
367         }
368 
writeBirth(String directory, String filename, Births onlyNewer)369         Set<String> writeBirth(String directory, String filename, Births onlyNewer) throws IOException {
370             PrintWriter out = FileUtilities.openUTF8Writer(directory, filename + ".txt");
371             Set<String> newer = writeBirth(out, onlyNewer);
372             out.close();
373             return newer;
374         }
375     }
376 }
377