1 package org.unicode.cldr.test;
2 
3 import java.io.File;
4 import java.io.IOException;
5 import java.io.PrintWriter;
6 import java.util.ArrayList;
7 import java.util.EnumMap;
8 import java.util.HashMap;
9 import java.util.LinkedHashSet;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Set;
13 import java.util.regex.Matcher;
14 import java.util.regex.Pattern;
15 
16 import org.unicode.cldr.test.CheckConsistentCasing.CasingType;
17 import org.unicode.cldr.test.CheckConsistentCasing.CasingTypeAndErrFlag;
18 import org.unicode.cldr.test.CheckConsistentCasing.Category;
19 import org.unicode.cldr.tool.Option.Options;
20 import org.unicode.cldr.util.CLDRFile;
21 import org.unicode.cldr.util.CLDRFile.WinningChoice;
22 import org.unicode.cldr.util.CLDRPaths;
23 import org.unicode.cldr.util.CldrUtility;
24 import org.unicode.cldr.util.Factory;
25 import org.unicode.cldr.util.LocaleIDParser;
26 import org.unicode.cldr.util.PatternCache;
27 import org.unicode.cldr.util.SimpleXMLSource;
28 import org.unicode.cldr.util.SupplementalDataInfo;
29 import org.unicode.cldr.util.XMLFileReader;
30 import org.unicode.cldr.util.XMLSource;
31 import org.unicode.cldr.util.XPathParts;
32 
33 import com.ibm.icu.text.MessageFormat;
34 import com.ibm.icu.text.UnicodeSet;
35 
36 /**
37  * Calculates, reads, writes and returns casing information about locales for
38  * CheckConsistentCasing.
39  * Run main() to generate the casing information files which will be stored in common/casing.
40  *
41  * @author jchye
42  */
43 public class CasingInfo {
44     private static final Options options = new Options(
45         "This program is used to generate casing files for locales.")
46             .add("locales", ".*", ".*", "A regex of the locales to generate casing information for")
47             .add("summary", null,
48                 "generates a summary of the casing for all locales that had casing generated for this run");
49     private Map<String, Map<Category, CasingTypeAndErrFlag>> casing;
50     private List<File> casingDirs;
51 
CasingInfo(Factory factory)52     public CasingInfo(Factory factory) {
53         casingDirs = new ArrayList<>();
54         for (File f : factory.getSourceDirectories()) {
55             this.casingDirs.add(new File(f.getAbsolutePath() + "/../casing"));
56         }
57         casing = CldrUtility.newConcurrentHashMap();
58     }
59 
60     /**
61      * ONLY usable in command line tests.
62      */
CasingInfo()63     public CasingInfo() {
64         casingDirs = new ArrayList<>();
65         this.casingDirs.add(new File(CLDRPaths.CASING_DIRECTORY));
66         casing = CldrUtility.newConcurrentHashMap();
67     }
68 
69     /**
70      * Returns casing information to be used for a specified locale.
71      *
72      * @param localeID
73      * @return
74      */
getLocaleCasing(String localeID)75     public Map<Category, CasingTypeAndErrFlag> getLocaleCasing(String localeID) {
76         // Check if the localeID contains casing first.
77         // If there isn't a casing file available for the locale,
78         // recurse over the locale's parents until something is found.
79         if (!casing.containsKey(localeID)) {
80             // Synchronize writes to casing map in an attempt to avoid NPEs (cldrbug 5051).
81             synchronized (casing) {
82                 CasingHandler handler = loadFromXml(localeID);
83                 if (handler != null) {
84                     handler.addParsedResult(casing);
85                 }
86                 if (!casing.containsKey(localeID)) {
87                     String parentID = LocaleIDParser.getSimpleParent(localeID);
88                     if (!parentID.equals("root")) {
89                         casing.put(localeID, getLocaleCasing(parentID));
90                     }
91                 }
92             }
93         }
94 
95         return casing.get(localeID);
96     }
97 
98     /**
99      * Loads casing information about a specified locale from the casing XML,
100      * if it exists.
101      *
102      * @param localeID
103      */
loadFromXml(String localeID)104     private CasingHandler loadFromXml(String localeID) {
105         for (File casingDir : casingDirs) {
106             File casingFile = new File(casingDir, localeID + ".xml");
107             if (casingFile.isFile()) {
108                 CasingHandler handler = new CasingHandler();
109                 XMLFileReader xfr = new XMLFileReader().setHandler(handler);
110                 xfr.read(casingFile.toString(), -1, true);
111                 return handler;
112             }
113         } // Fail silently if file not found.
114         return null;
115     }
116 
117     /**
118      * Calculates casing information about all languages from the locale data.
119      */
generateCasingInformation(String localePattern)120     private Map<String, Boolean> generateCasingInformation(String localePattern) {
121         SupplementalDataInfo supplementalDataInfo = SupplementalDataInfo.getInstance();
122         Set<String> defaultContentLocales = supplementalDataInfo.getDefaultContentLocales();
123         String sourceDirectory = CldrUtility.checkValidDirectory(CLDRPaths.MAIN_DIRECTORY);
124         Factory cldrFactory = Factory.make(sourceDirectory, localePattern);
125         Set<String> locales = new LinkedHashSet<>(cldrFactory.getAvailable());
126         locales.removeAll(defaultContentLocales); // Skip all default content locales
127         UnicodeSet allCaps = new UnicodeSet("[:Lu:]");
128         Map<String, Boolean> localeUsesCasing = new HashMap<>();
129         LocaleIDParser parser = new LocaleIDParser();
130 
131         for (String localeID : locales) {
132             if (CLDRFile.isSupplementalName(localeID)) continue;
133 
134             // We want country/script differences but not region differences
135             // (unless it's pt_PT, which we do want).
136             // Keep regional locales only if there isn't already a locale for its script,
137             // e.g. keep zh_Hans_HK because zh_Hans is a default locale.
138             parser.set(localeID);
139             if (parser.getRegion().length() > 0 && !localeID.equals("pt_PT")) {
140                 System.out.println("Skipping regional locale " + localeID);
141                 continue;
142             }
143 
144             // Save casing information about the locale.
145             CLDRFile file = cldrFactory.make(localeID, true);
146             UnicodeSet examplars = file.getExemplarSet("", WinningChoice.NORMAL);
147             localeUsesCasing.put(localeID, examplars.containsSome(allCaps));
148             createCasingXml(localeID, CheckConsistentCasing.getSamples(file));
149         }
150         return localeUsesCasing;
151     }
152 
153     /**
154      * Creates a CSV summary of casing information over all locales for verification.
155      *
156      * @param outputFile
157      */
createCasingSummary(String outputFile, Map<String, Boolean> localeUsesCasing)158     private void createCasingSummary(String outputFile, Map<String, Boolean> localeUsesCasing) {
159         PrintWriter out;
160         try {
161             out = new PrintWriter(outputFile);
162         } catch (IOException e) {
163             e.printStackTrace();
164             return;
165         }
166 
167         // Header
168         out.print(",");
169         for (Category category : Category.values()) {
170             out.print("," + category.toString().replace('_', '-'));
171         }
172         out.println();
173         out.print("Locale ID,Case");
174         for (int i = 0; i < Category.values().length; i++) {
175             out.print("," + i);
176         }
177         out.println();
178 
179         Set<String> locales = casing.keySet();
180         for (String localeID : locales) {
181             // Write casing information about the locale to file.
182             out.print(localeID);
183             out.print(",");
184             out.print(localeUsesCasing.get(localeID) ? "Y" : "N");
185             Map<Category, CasingTypeAndErrFlag> types = casing.get(localeID);
186             for (Category category : Category.values()) {
187                 CasingTypeAndErrFlag value = types.get(category);
188                 out.print("," + value == null ? null : value.type().toString().charAt(0));
189             }
190             out.println();
191             out.flush();
192         }
193         out.close();
194     }
195 
196     /**
197      * Writes casing information for the specified locale to XML format.
198      */
createCasingXml(String localeID, Map<Category, CasingType> localeCasing)199     private void createCasingXml(String localeID, Map<Category, CasingType> localeCasing) {
200         // Load any existing overrides over casing info.
201         CasingHandler handler = loadFromXml(localeID);
202         Map<Category, CasingType> overrides = handler == null ? new EnumMap<>(Category.class) : handler.getOverrides();
203         localeCasing.putAll(overrides);
204 
205         XMLSource source = new SimpleXMLSource(localeID);
206         for (Category category : Category.values()) {
207             if (category == Category.NOT_USED) continue;
208             CasingType type = localeCasing.get(category);
209             if (overrides.containsKey(category)) {
210                 String path = MessageFormat.format("//ldml/metadata/casingData/casingItem[@type=\"{0}\"][@override=\"true\"]", category);
211                 source.putValueAtPath(path, type.toString());
212             } else if (type != CasingType.other) {
213                 String path = "//ldml/metadata/casingData/casingItem[@type=\"" + category + "\"]";
214                 source.putValueAtPath(path, type.toString());
215             }
216         }
217         CLDRFile cldrFile = new CLDRFile(source);
218         File casingFile = new File(CLDRPaths.GEN_DIRECTORY + "/casing", localeID + ".xml");
219 
220         try {
221             PrintWriter out = new PrintWriter(casingFile);
222             cldrFile.write(out);
223             out.close();
224         } catch (IOException e) {
225             e.printStackTrace();
226         }
227     }
228 
229     /**
230      * Generates all the casing information and writes it to XML.
231      * A CSV summary of casing information is written to file if a filename argument is provided.
232      *
233      * @param args
234      */
main(String[] args)235     public static void main(String[] args) {
236         CasingInfo casingInfo = new CasingInfo();
237         options.parse(args, true);
238         Map<String, Boolean> localeUsesCasing = casingInfo.generateCasingInformation(options.get("locales").getValue());
239         if (options.get("summary").doesOccur()) {
240             casingInfo.createCasingSummary(args[0], localeUsesCasing);
241         }
242     }
243 
244     /**
245      * XML handler for parsing casing files.
246      */
247     private class CasingHandler extends XMLFileReader.SimpleHandler {
248         private Pattern localePattern = PatternCache.get("//ldml/identity/language\\[@type=\"(\\w+)\"\\]");
249         private String localeID;
250         private Map<Category, CasingTypeAndErrFlag> caseMap = new EnumMap<>(Category.class);
251         private Map<Category, CasingType> overrideMap = new EnumMap<>(Category.class);
252 
253         @Override
handlePathValue(String path, String value)254         public void handlePathValue(String path, String value) {
255             // Parse casing info.
256             if (path.contains("casingItem")) {
257                 XPathParts parts = XPathParts.getFrozenInstance(path);
258                 Category category = Category.valueOf(parts.getAttributeValue(-1, "type").replace('-', '_'));
259                 CasingType casingType = CasingType.valueOf(value);
260                 boolean errFlag = Boolean.parseBoolean(parts.getAttributeValue(-1, "forceError"));
261                 for (CasingTypeAndErrFlag typeAndFlag : CasingTypeAndErrFlag.values()) {
262                     if (casingType == typeAndFlag.type() && errFlag == typeAndFlag.flag()) {
263                         caseMap.put(category, typeAndFlag);
264                         break;
265                     }
266                 }
267                 if (Boolean.valueOf(parts.getAttributeValue(-1, "override"))) {
268                     overrideMap.put(category, casingType);
269                 }
270             } else {
271                 // Parse the locale that the casing is for.
272                 Matcher matcher = localePattern.matcher(path);
273                 if (matcher.matches()) {
274                     localeID = matcher.group(1);
275                 }
276             }
277         }
278 
addParsedResult(Map<String, Map<Category, CasingTypeAndErrFlag>> map)279         public void addParsedResult(Map<String, Map<Category, CasingTypeAndErrFlag>> map) {
280             map.put(localeID, caseMap);
281         }
282 
getOverrides()283         public Map<Category, CasingType> getOverrides() {
284             return overrideMap;
285         }
286     }
287 }
288