1 package org.unicode.cldr.tool;
2 
3 import java.io.DataOutputStream;
4 import java.io.File;
5 import java.io.FileOutputStream;
6 import java.io.IOException;
7 import java.io.PrintWriter;
8 import java.util.ArrayList;
9 import java.util.Arrays;
10 import java.util.Collections;
11 import java.util.HashMap;
12 import java.util.HashSet;
13 import java.util.Map;
14 import java.util.Map.Entry;
15 import java.util.Set;
16 import java.util.TreeMap;
17 import java.util.TreeSet;
18 import java.util.regex.Matcher;
19 import java.util.regex.Pattern;
20 
21 import org.unicode.cldr.draft.FileUtilities;
22 import org.unicode.cldr.test.OutdatedPaths;
23 import org.unicode.cldr.tool.Option.Options;
24 import org.unicode.cldr.util.CLDRConfig;
25 import org.unicode.cldr.util.CLDRFile;
26 import org.unicode.cldr.util.CLDRPaths;
27 import org.unicode.cldr.util.Factory;
28 import org.unicode.cldr.util.LanguageTagParser;
29 import org.unicode.cldr.util.PatternCache;
30 import org.unicode.cldr.util.SimpleFactory;
31 import org.unicode.cldr.util.StringId;
32 
33 import com.ibm.icu.impl.Relation;
34 import com.ibm.icu.impl.Row;
35 import com.ibm.icu.impl.Row.R3;
36 import com.ibm.icu.lang.CharSequences;
37 
38 public class GenerateBirth {
39     private static boolean DEBUG = false;
40 
41     public enum Versions {
42         trunk, v31_0, v30_0, v29_0, v28_0, v27_0, v26_0, v25_0, v24_0, v23_1, v22_1, v21_0, v2_0_1, v1_9_1, v1_8_1, v1_7_2, v1_6_1, v1_5_1, v1_4_1, v1_3_0, v1_2_0, v1_1_1;
toString()43         public String toString() {
44             return this == Versions.trunk ? name() : name().substring(1).replace('_', '.');
45         };
46     }
47 
48     static final Versions[] VERSIONS = Versions.values();
49     static final Factory[] factories = new Factory[VERSIONS.length];
50 
51     final static Options myOptions = new Options()
52         .add("target", ".*", CLDRPaths.BIRTH_DATA_DIR,
53             "The target directory for building the text files that show the results.")
54         .add("log", ".*", CLDRPaths.TMP_DIRECTORY + "dropbox/births/",
55             "The target directory for building the text files that show the results.")
56         .add(
57             "file",
58             ".*",
59             ".*",
60             "Filter the information based on file name, using a regex argument. The '.xml' is removed from the file before filtering")
61         .add("previous", "Stop after writing the English previous data.")
62         .add("debug", "Debug");
63 
main(String[] args)64     public static void main(String[] args) throws IOException {
65         myOptions.parse(args, true);
66 
67         // set up the CLDR Factories
68 
69         DEBUG = myOptions.get("debug").doesOccur();
70 
71         final CLDRConfig config = CLDRConfig.getInstance();
72 
73         String filePattern = myOptions.get("file").getValue();
74 
75         ArrayList<Factory> list = new ArrayList<Factory>();
76         for (Versions version : VERSIONS) {
77             String base = version == Versions.trunk
78                 ? CLDRPaths.BASE_DIRECTORY
79                 : CLDRPaths.ARCHIVE_DIRECTORY + "cldr-" + version + "/";
80             File[] paths = version.compareTo(Versions.v27_0) > 0 // warning, order is reversed
81                 ? new File[] { new File(base + "common/main/") }
82                 : new File[] { new File(base + "common/main/"), new File(base + "common/annotations/") };
83             System.out.println(version + ", " + Arrays.asList(paths));
84             Factory aFactory = SimpleFactory.make(paths, filePattern);
85             list.add(aFactory);
86         }
87         list.toArray(factories);
88 
89         final String dataDirectory = myOptions.get("target").getValue();
90 
91         // load and process English
92 
93         String outputDirectory = myOptions.get("log").getValue();
94 
95         System.out.println("en");
96         Births english = new Births("en");
97         english.writeBirth(outputDirectory, "en", null);
98         english.writeBirthValues(dataDirectory + "/" + OutdatedPaths.OUTDATED_ENGLISH_DATA);
99 
100         // if (!myOptions.get("file").doesOccur()) {
101         // OutdatedPaths outdatedPaths = new OutdatedPaths(dataDirectory);
102         //
103         // return;
104         // }
105         // Set up the binary data file
106 
107         File file = new File(dataDirectory + "/" + OutdatedPaths.OUTDATED_DATA);
108         final String outputDataFile = file.getCanonicalPath();
109         System.out.println("Writing data: " + outputDataFile);
110         DataOutputStream dataOut = new DataOutputStream(new FileOutputStream(file));
111 
112         // Load and process all the locales
113 
114         TreeMap<String, Set<String>> localeToNewer = new TreeMap<String, Set<String>>();
115         LanguageTagParser ltp = new LanguageTagParser();
116         for (String fileName : factories[0].getAvailable()) {
117             if (fileName.equals("en")) {
118                 continue;
119             }
120             if (!ltp.set(fileName).getRegion().isEmpty()) {
121                 continue; // skip region locales
122             }
123             // TODO skip default content locales
124             System.out.println(fileName);
125             Births other = new Births(fileName);
126             Set<String> newer = other.writeBirth(outputDirectory, fileName, english);
127 
128             dataOut.writeUTF(fileName);
129             dataOut.writeInt(newer.size());
130             for (String item : newer) {
131                 long id = StringId.getId(item);
132                 dataOut.writeLong(id);
133                 if (DEBUG) {
134                     System.out.println(id + "\t" + item);
135                 }
136             }
137             localeToNewer.put(fileName, newer);
138         }
139         dataOut.writeUTF("$END$");
140         dataOut.close();
141 
142         // Doublecheck the data
143 
144         OutdatedPaths outdatedPaths = new OutdatedPaths(dataDirectory);
145         Set<String> needPrevious = new TreeSet<String>();
146         int errorCount = 0;
147         for (Entry<String, Set<String>> localeAndNewer : localeToNewer.entrySet()) {
148             String locale = localeAndNewer.getKey();
149             System.out.println("Checking " + locale);
150             Set<String> newer = localeAndNewer.getValue();
151             if (newer.size() != outdatedPaths.countOutdated(locale)) {
152                 throw new IllegalArgumentException("broken: " + locale);
153             }
154             for (String xpath : newer) {
155                 boolean isOutdated = outdatedPaths.isRawOutdated(locale, xpath);
156                 if (!isOutdated) {
157                     System.out.println("Error, broken locale: " + locale + "\t" + StringId.getId(xpath) + "\t" + xpath);
158                     ++errorCount;
159                 }
160                 if (outdatedPaths.isSkipped(xpath)) {
161                     continue;
162                 }
163                 String previous = outdatedPaths.getPreviousEnglish(xpath);
164                 if (previous.isEmpty() != english.emptyPrevious.contains(xpath)) {
165                     System.out.println("previous.isEmpty() != original" + locale + "\t" + StringId.getId(xpath) + "\t"
166                         + xpath);
167                     needPrevious.add(xpath);
168                     ++errorCount;
169                 }
170             }
171         }
172         if (errorCount != 0) {
173             throw new IllegalArgumentException("Done, but " + errorCount + " errors");
174         } else {
175             System.out.println("Done, no errors");
176         }
177     }
178 
179     static class Births {
180         final Relation<Versions, String> birthToPaths;
181         final Map<String, Row.R3<Versions, String, String>> pathToBirthCurrentPrevious;
182         final String locale;
183         static final Pattern TYPE = PatternCache.get("\\[@type=\"([^\"]*)\"");
184         final Matcher typeMatcher = TYPE.matcher("");
185         Set<String> emptyPrevious = new HashSet<String>();
186 
Births(String file)187         Births(String file) {
188             locale = file;
189             CLDRFile[] files = new CLDRFile[factories.length];
190             for (int i = 0; i < factories.length; ++i) {
191                 try {
192                     files[i] = factories[i].make(file, false);
193                 } catch (Exception e) {
194                     //e.printStackTrace();
195                     break;
196                 }
197             }
198             birthToPaths = Relation.of(new TreeMap<Versions, Set<String>>(), TreeSet.class);
199             pathToBirthCurrentPrevious = new HashMap<String, Row.R3<Versions, String, String>>();
200             for (String xpath : files[0]) {
201 
202                 xpath = xpath.intern();
203                 String base = files[0].getStringValue(xpath);
204                 String previousValue = null;
205                 int i;
206                 for (i = 1; i < files.length && files[i] != null; ++i) {
207                     String previous = files[i].getStringValue(xpath);
208                     if (previous == null) {
209                         previous = fixNullPrevious(xpath);
210                     }
211                     if (!CharSequences.equals(base, previous)) {
212                         if (previous != null) {
213                             previousValue = previous;
214                         }
215                         break;
216                     }
217                 }
218                 Versions version = VERSIONS[i - 1];
219                 birthToPaths.put(version, xpath);
220                 pathToBirthCurrentPrevious.put(xpath, Row.of(version, base, previousValue));
221             }
222         }
223 
fixNullPrevious(String xpath)224         private String fixNullPrevious(String xpath) {
225             if (typeMatcher.reset(xpath).find()) {
226                 String type = typeMatcher.group(1);
227                 if (xpath.contains("metazone")) {
228                     return type.replace("_", " ");
229                 } else if (xpath.contains("zone")) {
230                     String[] splits = type.split("/");
231                     return splits[splits.length - 1].replace("_", " ");
232                 }
233                 return type;
234             }
235             return null;
236         }
237 
writeBirthValues(String file)238         public void writeBirthValues(String file) throws IOException {
239             DataOutputStream dataOut = new DataOutputStream(new FileOutputStream(file));
240             System.out.println("Writing data: " + new File(file).getCanonicalPath());
241             dataOut.writeInt(pathToBirthCurrentPrevious.size());
242 
243             // Load and process all the locales
244 
245             //TreeMap<String, Set<String>> localeToNewer = new TreeMap<String, Set<String>>();
246             for (Entry<String, R3<Versions, String, String>> entry : pathToBirthCurrentPrevious.entrySet()) {
247                 String path = entry.getKey();
248                 R3<Versions, String, String> birthCurrentPrevious = entry.getValue();
249                 String previous = birthCurrentPrevious.get2();
250                 long id = StringId.getId(path);
251                 dataOut.writeLong(id);
252                 final String previousString = previous == null ? "" : previous;
253                 dataOut.writeUTF(previousString);
254                 if (previousString.isEmpty()) {
255                     emptyPrevious.add(path);
256                 }
257                 if (DEBUG) {
258                     System.out.println(id + "\t" + previous);
259                 }
260             }
261             dataOut.writeUTF("$END$");
262             dataOut.close();
263             emptyPrevious = Collections.unmodifiableSet(emptyPrevious);
264         }
265 
writeBirth(PrintWriter out, Births onlyNewer)266         Set<String> writeBirth(PrintWriter out, Births onlyNewer) {
267             Set<String> newer = new HashSet<String>();
268             HashMap<Long, String> sanityCheck = new HashMap<Long, String>();
269             Versions onlyNewerVersion = Versions.trunk;
270             String otherValue = "";
271             String olderOtherValue = "";
272             for (Entry<Versions, Set<String>> entry2 : birthToPaths.keyValuesSet()) {
273                 Versions version = entry2.getKey();
274                 for (String xpath : entry2.getValue()) {
275                     long id = StringId.getId(xpath);
276                     String old = sanityCheck.get(id);
277                     if (old != null) {
278                         throw new IllegalArgumentException("Path Collision " + xpath + ", old:" + old + ", id: " + id);
279                     } else {
280                         sanityCheck.put(id, xpath);
281                     }
282                     R3<Versions, String, String> info = pathToBirthCurrentPrevious.get(xpath);
283                     if (onlyNewer != null) {
284 
285                         R3<Versions, String, String> otherInfo = onlyNewer.pathToBirthCurrentPrevious.get(xpath);
286                         if (otherInfo == null) {
287                             continue;
288                         }
289                         // skip if older or same
290                         onlyNewerVersion = otherInfo.get0();
291                         if (version.compareTo(onlyNewerVersion) <= 0) {
292                             continue;
293                         }
294                         otherValue = fixNull(otherInfo.get1());
295                         olderOtherValue = fixNull(otherInfo.get2());
296                         newer.add(xpath);
297                     }
298                     String value = fixNull(info.get1());
299                     String olderValue = fixNull(info.get2());
300 
301                     out.println(locale
302                         + "\t" + version
303                         + "\t" + value
304                         + "\t" + olderValue
305                         + "\t" + onlyNewerVersion
306                         + "\t" + otherValue
307                         + "\t" + olderOtherValue
308                         + "\t" + xpath);
309 
310                 }
311             }
312             return newer;
313         }
314 
fixNull(String value)315         private String fixNull(String value) {
316             if (value == null) {
317                 value = "∅";
318             }
319             return value;
320         }
321 
writeBirth(String directory, String filename, Births onlyNewer)322         Set<String> writeBirth(String directory, String filename, Births onlyNewer) throws IOException {
323             PrintWriter out = FileUtilities.openUTF8Writer(directory, filename + ".txt");
324             Set<String> newer = writeBirth(out, onlyNewer);
325             out.close();
326             return newer;
327         }
328     }
329 }
330