1 package org.unicode.cldr.icu;
2 
3 import java.io.BufferedReader;
4 import java.io.File;
5 import java.io.IOException;
6 import java.util.ArrayList;
7 import java.util.Arrays;
8 import java.util.HashMap;
9 import java.util.HashSet;
10 import java.util.Iterator;
11 import java.util.List;
12 import java.util.Map;
13 import java.util.Map.Entry;
14 import java.util.Set;
15 import java.util.regex.Pattern;
16 
17 import org.unicode.cldr.ant.CLDRConverterTool;
18 import org.unicode.cldr.icu.ResourceSplitter.SplitInfo;
19 import org.unicode.cldr.tool.Option;
20 import org.unicode.cldr.tool.Option.Options;
21 import org.unicode.cldr.util.CLDRFile.DraftStatus;
22 import org.unicode.cldr.util.Factory;
23 import org.unicode.cldr.util.FileReaders;
24 import org.unicode.cldr.util.PatternCache;
25 import org.unicode.cldr.util.SupplementalDataInfo;
26 
27 /**
28  * Simpler mechanism for converting CLDR data to ICU Resource Bundles, intended
29  * to replace LDML2ICUConverter. The format is almost entirely data-driven
30  * instead of having lots of special-case code.
31  *
32  * The flags used to specify the data to be generated are copied directly from
33  * LDML2ICUConverter.
34  *
35  * Unlike the instructions in CLDRConverterTool, this converter does not invoke
36  * computeConvertibleXPaths to check if each xpath is convertible because the
37  * xpaths that are convertible have already been filtered out by the regex lookups.
38  * It may make more sense down the road to refactor CLDRConverterTool such that
39  * this class doesn't inherit unnecessary functionality.
40  *
41  * A rough overview of the new converter is available at
42  * https://sites.google.com/site/cldr/development/coding-cldr-tools/newldml2icuconverter
43  *
44  * @author jchye
45  */
46 public class NewLdml2IcuConverter extends CLDRConverterTool {
47     private static final String ALIAS_PATH = "/\"%%ALIAS\"";
48 
49     static final boolean DEBUG = true;
50 
51     static final Pattern SEMI = PatternCache.get("\\s*+;\\s*+");
52 
53     /*
54      * The type of file to be converted.
55      */
56     enum Type {
57         locales, dayPeriods, genderList, likelySubtags, metadata, metaZones, numberingSystems, plurals, pluralRanges, postalCodeData, rgScope, supplementalData, windowsZones, keyTypeData, brkitr, collation, rbnf;
58     }
59 
60     private static final Options options = new Options(
61         "Usage: LDML2ICUConverter [OPTIONS] [FILES]\n" +
62             "This program is used to convert LDML files to ICU data text files.\n" +
63             "Please refer to the following options. Options are not case sensitive.\n" +
64             "\texample: org.unicode.cldr.icu.Ldml2IcuConverter -s xxx -d yyy en")
65                 .add("sourcedir", ".*", "Source directory for CLDR files")
66                 .add("destdir", ".*", ".", "Destination directory for output files, defaults to the current directory")
67                 .add("specialsdir", 'p', ".*", null, "Source directory for files containing special data, if any")
68                 .add("supplementaldir", 'm', ".*", null, "The supplemental data directory")
69                 .add("keeptogether", 'k', null, null,
70                     "Write locale data to one file instead of splitting into separate directories. For debugging")
71                 .add("type", 't', "\\w+", null, "The type of file to be generated")
72                 .add("xpath", 'x', ".*", null, "An optional xpath to debug the regexes with")
73                 .add("filter", 'f', null, null, "Perform filtering on the locale data to be converted.")
74                 .add("organization", 'o', ".*", null, "The organization to filter the data for")
75                 .add("makefile", 'g', ".*", null, "If set, generates makefiles and alias files for the specified type. " +
76                     "The value to set should be the name of the makefile.")
77                 .add("verbose", 'v', null, null, "Debugging aids");
78 
79     private static final String LOCALES_DIR = "locales";
80 
81     private boolean keepTogether = false;
82     private Map<String, String> dirMapping;
83     private Set<String> allDirs;
84     private String sourceDir;
85     private String destinationDir;
86     private String supplementalDir;
87     private IcuDataSplitter splitter;
88     private Filter filter;
89     private boolean verbose = false;
90 
91     /**
92      * Maps ICU paths to the directories they should end up in.
93      */
getDirMapping()94     private Map<String, String> getDirMapping() {
95         if (dirMapping == null) {
96             dirMapping = loadMapFromFile("ldml2icu_dir_mapping.txt");
97             allDirs = new HashSet<String>(dirMapping.values());
98             allDirs.remove("*");
99             allDirs.add(LOCALES_DIR);
100         }
101         return dirMapping;
102     }
103 
loadMapFromFile(String filename)104     private static Map<String, String> loadMapFromFile(String filename) {
105         Map<String, String> map = new HashMap<String, String>();
106         BufferedReader reader = FileReaders.openFile(NewLdml2IcuConverter.class, filename);
107         String line;
108         try {
109             int lineNum = 1;
110             while ((line = reader.readLine()) != null) {
111                 if (line.length() == 0 || line.startsWith("#")) continue;
112                 String[] content = line.split(SEMI.toString());
113                 if (content.length != 2) {
114                     throw new IllegalArgumentException("Invalid syntax of " + filename + " at line " + lineNum);
115                 }
116                 map.put(content[0], content[1]);
117                 lineNum++;
118             }
119         } catch (IOException e) {
120             System.err.println("Failed to read fallback file.");
121             e.printStackTrace();
122         }
123         return map;
124     }
125 
loadSplitInfoFromFile()126     private List<SplitInfo> loadSplitInfoFromFile() {
127         Map<String, String> dirMapping = getDirMapping();
128         List<SplitInfo> splitInfos = new ArrayList<SplitInfo>();
129         for (Entry<String, String> entry : dirMapping.entrySet()) {
130             SplitInfo splitInfo = new SplitInfo(entry.getKey(), entry.getValue());
131             splitInfos.add(splitInfo);
132         }
133         return splitInfos;
134     }
135 
136     @Override
processArgs(String[] args)137     public void processArgs(String[] args) {
138         Set<String> extraArgs = options.parse(args, true);
139         // For supplemental output files, the supplemental directory is specified
140         // as the source directory and the supplemental directory argument is
141         // not required.
142         if (!options.get("sourcedir").doesOccur()) {
143             throw new IllegalArgumentException("Source directory must be specified.");
144         }
145         sourceDir = options.get("sourcedir").getValue();
146         supplementalDir = options.get("supplementaldir").getValue();
147 
148         destinationDir = options.get("destdir").getValue();
149         if (!options.get("type").doesOccur()) {
150             throw new IllegalArgumentException("Type not specified: " + Arrays.asList(Type.values()));
151         }
152         Type type = Type.valueOf(options.get("type").getValue());
153         keepTogether = options.get("keeptogether").doesOccur();
154         if (!keepTogether && type == Type.supplementalData || type == Type.locales) {
155             if (splitInfos == null) {
156                 splitInfos = loadSplitInfoFromFile();
157             }
158             splitter = IcuDataSplitter.make(destinationDir, splitInfos);
159         }
160 
161         verbose = options.get("verbose").doesOccur();
162 
163         String debugXPath = options.get("xpath").getValue();
164         // Quotes are stripped out at the command line so add them back in.
165         if (debugXPath != null) {
166             debugXPath = debugXPath.replaceAll("=([^\\]\"]++)\\]", "=\"$1\"\\]");
167         }
168 
169         Factory specialFactory = null;
170         File specialsDir = null;
171         Option option = options.get("specialsdir");
172         if (option.doesOccur()) {
173             if (type == Type.rbnf) {
174                 specialsDir = new File(option.getValue());
175             } else {
176                 specialFactory = Factory.make(option.getValue(), ".*");
177             }
178         } else if (type == Type.brkitr) {
179             specialFactory = Factory.make(options.get("specialsdir").getValue(), ".*");
180         }
181 
182         // Get list of locales if defined.
183         Set<String> includedLocales = getIncludedLocales();
184         Map<String, String> localesMap = getLocalesMap();
185         if (includedLocales != null && includedLocales.size() > 0) {
186             final Set<String> locales = new HashSet<String>();
187             for (String locale : includedLocales) {
188                 if (localesMap.containsKey(locale + ".xml")) {
189                     locales.add(locale);
190                 }
191             }
192 
193             filter = new Filter() {
194                 @Override
195                 public boolean includes(String value) {
196                     return locales.contains(value);
197                 }
198             };
199         } else if (extraArgs.size() > 0) {
200             final String regex = extraArgs.iterator().next();
201             filter = new Filter() {
202                 @Override
203                 public boolean includes(String value) {
204                     return value.matches(regex);
205                 }
206             };
207         } else if (type == Type.locales || type == Type.collation) {
208             throw new IllegalArgumentException(
209                 "Missing locale list. Please provide a list of locales or a regex.");
210         } else {
211             filter = new Filter() {
212                 @Override
213                 public boolean includes(String value) {
214                     return true;
215                 }
216             };
217         }
218 
219         // Process files.
220         Mapper mapper = null;
221         switch (type) {
222         case locales:
223             // Generate locale data.
224             SupplementalDataInfo supplementalDataInfo = null;
225             option = options.get("supplementaldir");
226             if (option.doesOccur()) {
227                 supplementalDataInfo = SupplementalDataInfo.getInstance(supplementalDir);
228             } else {
229                 throw new IllegalArgumentException("Supplemental directory must be specified with -s");
230             }
231 
232             Factory factory = Factory.make(sourceDir, ".*", DraftStatus.contributed);
233             String organization = options.get("organization").getValue();
234             LocaleMapper localeMapper = new LocaleMapper(factory, specialFactory,
235                 supplementalDataInfo, options.get("filter").doesOccur(), organization);
236             localeMapper.setDebugXPath(debugXPath);
237             mapper = localeMapper;
238             break;
239         case keyTypeData:
240             processBcp47Data();
241             break;
242         case brkitr:
243             mapper = new BreakIteratorMapper(sourceDir, specialFactory);
244             break;
245         case collation:
246             mapper = new CollationMapper(sourceDir, specialFactory);
247             break;
248         case rbnf:
249             mapper = new RbnfMapper(new File(sourceDir), specialsDir);
250             break;
251         default: // supplemental data
252             processSupplemental(type, debugXPath);
253         }
254 
255         if (mapper != null) {
256             convert(mapper);
257             option = options.get("makefile");
258             if (option.doesOccur()) {
259                 generateMakefile(mapper, option.getValue());
260             }
261         }
262     }
263 
processBcp47Data()264     private void processBcp47Data() {
265         Bcp47Mapper mapper = new Bcp47Mapper(sourceDir);
266         IcuData[] icuData = mapper.fillFromCldr();
267         for (IcuData data : icuData) {
268             writeIcuData(data, destinationDir);
269         }
270     }
271 
processSupplemental(Type type, String debugXPath)272     private void processSupplemental(Type type, String debugXPath) {
273         IcuData icuData;
274         // Use the supplementaldir if explicitly specified , otherwise the source dir.
275         String dir = options.get("supplementaldir").doesOccur() ? supplementalDir : sourceDir;
276         switch (type) {
277         case plurals: {
278             PluralsMapper mapper = new PluralsMapper(dir);
279             icuData = mapper.fillFromCldr();
280             break;
281         }
282         case pluralRanges: {
283             PluralRangesMapper mapper = new PluralRangesMapper(dir);
284             icuData = mapper.fillFromCldr();
285             break;
286         }
287         case dayPeriods: {
288             DayPeriodsMapper mapper = new DayPeriodsMapper(dir);
289             icuData = mapper.fillFromCldr();
290             break;
291         }
292         default: {
293             SupplementalMapper mapper = SupplementalMapper.create(dir);
294             if (debugXPath != null) {
295                 mapper.setDebugXPath(debugXPath);
296             }
297             icuData = mapper.fillFromCldr(type.toString());
298         }
299         }
300         writeIcuData(icuData, destinationDir);
301     }
302 
303     /**
304      * Writes the given IcuData object to file.
305      *
306      * @param icuData
307      *            the IcuData object to be written
308      * @param outputDir
309      *            the destination directory of the output file
310      */
writeIcuData(IcuData icuData, String outputDir)311     private void writeIcuData(IcuData icuData, String outputDir) {
312         if (icuData.keySet().size() == 0) {
313             throw new RuntimeException(icuData.getName() + " was not written because no data was generated.");
314         }
315         try {
316             // Split data into different directories if necessary.
317             // splitInfos is filled from the <remap> element in ICU's build.xml.
318             if (splitter == null) {
319                 IcuTextWriter.writeToFile(icuData, outputDir);
320             } else {
321                 String fallbackDir = new File(outputDir).getName();
322                 Map<String, IcuData> splitData = splitter.split(icuData, fallbackDir);
323                 for (String dir : splitData.keySet()) {
324                     IcuTextWriter.writeToFile(splitData.get(dir), outputDir + "/../" + dir);
325                 }
326             }
327         } catch (IOException e) {
328             System.err.println("Error while converting " + icuData.getSourceFile());
329             e.printStackTrace();
330         }
331     }
332 
333     /**
334      * Converts CLDR XML files using the specified mapper.
335      */
convert(Mapper mapper)336     private void convert(Mapper mapper) {
337         IcuData icuData;
338         Iterator<IcuData> iterator = mapper.iterator(filter);
339         final Type type = Type.valueOf(options.get("type").getValue());
340         while (iterator.hasNext()) {
341             long time = System.currentTimeMillis();
342             icuData = iterator.next();
343             writeIcuData(icuData, destinationDir);
344             System.out.println("Converted " + type + ": " + icuData.getName() + ".xml in " +
345                 (System.currentTimeMillis() - time) + "ms");
346         }
347     }
348 
349     /**
350      * Generates makefiles for files generated from the specified mapper.
351      * @param mapper
352      * @param makefileName
353      */
generateMakefile(Mapper mapper, String makefileName)354     private void generateMakefile(Mapper mapper, String makefileName) {
355         // Generate aliases and makefiles for main directory.
356         Set<String> aliases = writeSyntheticFiles(mapper.getGenerated(), destinationDir);
357         Makefile makefile = mapper.generateMakefile(aliases);
358         writeMakefile(makefile, destinationDir, makefileName);
359         if (splitter == null) return;
360 
361         // Generate aliases and locales for remaining directories if a splitter was used.
362         for (String dir : splitter.getTargetDirs()) {
363             File outputDir = new File(destinationDir, "../" + dir);
364             aliases = writeSyntheticFiles(splitter.getDirSources(dir), outputDir.getAbsolutePath());
365             makefile = splitter.generateMakefile(aliases, outputDir.getName());
366             writeMakefile(makefile, outputDir.getAbsolutePath(), makefileName);
367         }
368     }
369 
370     /**
371      * Creates all synthetic files needed by the makefile in the specified output directory.
372      * @param sources the set of source files that have already been generated
373      * @param outputDir
374      * @return
375      */
writeSyntheticFiles(Set<String> sources, String outputDir)376     private Set<String> writeSyntheticFiles(Set<String> sources, String outputDir) {
377         Set<String> targets = new HashSet<String>();
378         if (aliasDeprecates != null) {
379             if (aliasDeprecates.emptyLocaleList != null) {
380                 for (String locale : aliasDeprecates.emptyLocaleList) {
381                     IcuData icuData = createEmptyFile(locale);
382                     System.out.println("Empty locale created: " + locale);
383                     targets.add(locale);
384                     writeIcuData(icuData, outputDir);
385                 }
386             }
387             if (aliasDeprecates.aliasList != null) {
388                 for (Alias alias : aliasDeprecates.aliasList) {
389                     try {
390                         writeAlias(alias, outputDir, sources, targets);
391                     } catch (IOException e) {
392                         System.err.println("Error writing alias " + alias.from + "-" + alias.to);
393                         System.exit(-1);
394                     }
395                 }
396             }
397         }
398         return targets;
399     }
400 
401     /**
402      * Writes a makefile to the specified directory and filename.
403      */
writeMakefile(Makefile makefile, String outputDir, String makefileName)404     private void writeMakefile(Makefile makefile, String outputDir, String makefileName) {
405         try {
406             new File(outputDir + File.separator + makefileName).createNewFile();
407             makefile.print(outputDir, makefileName);
408         } catch (IOException e) {
409             System.err.println("Error while writing makefile for " + outputDir + "/" + makefileName);
410         }
411     }
412 
413     /**
414      * Creates an empty IcuData object to act as a placeholder for the specified alias target locale.
415      */
createEmptyFile(String locale)416     public IcuData createEmptyFile(String locale) {
417         IcuData icuData = new IcuData("icu-locale-deprecates.xml & build.xml", locale, true);
418         icuData.setFileComment("generated alias target");
419         icuData.add("/___", "");
420         return icuData;
421     }
422 
423     /**
424      * Creates any synthetic files required for the specified alias.
425      * @param alias
426      * @param outputDir
427      * @param sources the set of sources in the output directory
428      * @param aliasTargets the alias targets already created in the output directory
429      * @throws IOException
430      */
writeAlias(Alias alias, String outputDir, Set<String> sources, Set<String> aliasTargets)431     private void writeAlias(Alias alias, String outputDir,
432         Set<String> sources, Set<String> aliasTargets) throws IOException {
433         String from = alias.from;
434         String to = alias.to;
435         // Add synthetic destination file for alias if necessary.
436         if (!sources.contains(to) && !aliasTargets.contains(to) && new File(outputDir + File.separator + alias.to + ".txt").createNewFile()) {
437             System.out.println(to + " not found, creating empty file in " + outputDir);
438             IcuTextWriter.writeToFile(createEmptyFile(alias.to), outputDir);
439             aliasTargets.add(to);
440         }
441 
442         if (from == null || to == null) {
443             throw new IllegalArgumentException("Malformed alias - no 'from' or 'to': from=\"" +
444                 from + "\" to=\"" + to + "\"");
445         }
446 
447         if (sources.contains(from)) {
448             throw new IllegalArgumentException(
449                 "Can't be both a synthetic alias locale and a real xml file - "
450                     + "consider using <aliasLocale locale=\"" + from + "\"/> instead. ");
451         }
452 
453         String rbPath = alias.rbPath;
454         String value = alias.value;
455         if ((rbPath == null) != (value == null)) {
456             throw new IllegalArgumentException("Incomplete alias specification for " +
457                 from + "-" + to + ": both rbPath (" +
458                 rbPath + ") and value (" + value + ") must be specified");
459         }
460 
461         IcuData icuData = new IcuData("icu-locale-deprecates.xml & build.xml", from, true);
462         if (rbPath == null) {
463             icuData.add(ALIAS_PATH, to);
464         } else {
465             icuData.add(rbPath, value);
466         }
467 
468         if (new File(outputDir + File.separator + from + ".txt").createNewFile()) {
469             IcuTextWriter.writeToFile(icuData, outputDir);
470             aliasTargets.add(alias.from);
471             System.out.println("Created alias from " + from + " to " + to + " in " + outputDir + ".");
472         }
473     }
474 
main(String[] args)475     public static void main(String[] args) throws IOException {
476         long totalTime = System.currentTimeMillis();
477         NewLdml2IcuConverter converter = new NewLdml2IcuConverter();
478         converter.processArgs(args);
479         System.out.println("Total time taken: " + (System.currentTimeMillis() - totalTime) + "ms");
480     }
481 }
482