1 package org.unicode.cldr.tool;
2 
3 import java.io.File;
4 import java.io.PrintWriter;
5 import java.util.ArrayList;
6 import java.util.HashMap;
7 import java.util.Iterator;
8 import java.util.List;
9 import java.util.Map;
10 import java.util.Set;
11 import java.util.regex.Matcher;
12 import java.util.regex.Pattern;
13 
14 import org.unicode.cldr.draft.FileUtilities;
15 import org.unicode.cldr.tool.Option.Options;
16 import org.unicode.cldr.util.CLDRConfig;
17 import org.unicode.cldr.util.CLDRFile;
18 import org.unicode.cldr.util.CLDRFile.DraftStatus;
19 import org.unicode.cldr.util.CLDRPaths;
20 import org.unicode.cldr.util.CoverageInfo;
21 import org.unicode.cldr.util.Factory;
22 import org.unicode.cldr.util.LocaleIDParser;
23 import org.unicode.cldr.util.Organization;
24 import org.unicode.cldr.util.PatternCache;
25 import org.unicode.cldr.util.RegexFileParser;
26 import org.unicode.cldr.util.RegexFileParser.RegexLineParser;
27 import org.unicode.cldr.util.RegexLookup;
28 import org.unicode.cldr.util.StandardCodes;
29 import org.unicode.cldr.util.XMLSource;
30 import org.unicode.cldr.util.XPathParts;
31 
32 import com.ibm.icu.util.Output;
33 
34 /**
35  * Factory for filtering CLDRFiles by organization and replacing certain values.
36  * Organization coverage data is in org/unicode/cldr/util/data/Locales.txt.
37  *
38  * @author jchye
39  */
40 public class FilterFactory extends Factory {
41     /**
42      * Types of data modification supported.
43      */
44     private enum ModificationType {
45         xpath, value;
46     }
47 
48     private Factory rawFactory;
49     private String organization;
50     private boolean modifyValues;
51 
52     private List<Modifier> modifiers = new ArrayList<>();
53 
54     /**
55      * Creates a new Factory for filtering CLDRFiles.
56      *
57      * @param rawFactory
58      *            the factory to be filtered
59      * @param organization
60      *            the organization that the filtering is catered towards
61      * @param modifyValues
62      *            true if certain values in the data should be modified or replaced
63      */
FilterFactory(Factory rawFactory, String organization, boolean modifyValues)64     private FilterFactory(Factory rawFactory, String organization, boolean modifyValues) {
65         this.rawFactory = rawFactory;
66         this.organization = organization;
67         setSupplementalDirectory(rawFactory.getSupplementalDirectory());
68         this.modifyValues = modifyValues;
69     }
70 
load(Factory rawFactory, String organization, boolean usesAltValue)71     public static FilterFactory load(Factory rawFactory, String organization, boolean usesAltValue) {
72         FilterFactory filterFactory = new FilterFactory(rawFactory, organization, usesAltValue);
73         filterFactory.loadModifiers("dataModifiers.txt");
74         return filterFactory;
75     }
76 
77     @Override
getSourceDirectories()78     public File[] getSourceDirectories() {
79         return rawFactory.getSourceDirectories();
80     }
81 
82     @Override
getSourceDirectoriesForLocale(String localeID)83     public List<File> getSourceDirectoriesForLocale(String localeID) {
84         return rawFactory.getSourceDirectoriesForLocale(localeID);
85     }
86 
87     @Override
handleMake(String localeID, boolean resolved, DraftStatus minimalDraftStatus)88     protected CLDRFile handleMake(String localeID, boolean resolved, DraftStatus minimalDraftStatus) {
89         if (resolved) {
90             return new CLDRFile(makeResolvingSource(localeID, minimalDraftStatus));
91         } else {
92             return filterCldrFile(localeID, minimalDraftStatus);
93         }
94     }
95 
96     /**
97      * @return a filtered CLDRFile.
98      */
filterCldrFile(String localeID, DraftStatus minimalDraftStatus)99     private CLDRFile filterCldrFile(String localeID, DraftStatus minimalDraftStatus) {
100         CLDRFile rawFile = rawFactory.make(localeID, false, minimalDraftStatus).cloneAsThawed();
101 
102         filterAltValues(rawFile);
103         filterCoverage(rawFile);
104         removeRedundantPaths(rawFile);
105         return rawFile;
106     }
107 
108     /**
109      * Replaces the value for certain XPaths with their alternate value.
110      *
111      * @param rawFile
112      */
filterAltValues(CLDRFile rawFile)113     private void filterAltValues(CLDRFile rawFile) {
114         if (!modifyValues) return;
115 
116         for (Modifier modifier : modifiers) {
117             modifier = modifier.filterLocale(rawFile.getLocaleID());
118             if (!modifier.isEmpty()) {
119                 modifier.modifyFile(rawFile);
120             }
121         }
122     }
123 
124     /**
125      * Filters a CLDRFile according to the specified organization's coverage level.
126      *
127      * @param rawFile
128      */
filterCoverage(CLDRFile rawFile)129     private void filterCoverage(CLDRFile rawFile) {
130         if (organization == null) return;
131 
132         int minLevel = StandardCodes.make()
133             .getLocaleCoverageLevel(organization, rawFile.getLocaleID())
134             .getLevel();
135         CoverageInfo covInfo = CLDRConfig.getInstance().getCoverageInfo();
136         for (String xpath : rawFile) {
137             // Locale metadata shouldn't be stripped.
138             int level = covInfo.getCoverageValue(xpath, rawFile.getLocaleID());
139             if (level > minLevel) {
140                 rawFile.remove(xpath);
141             }
142         }
143     }
144 
145     /**
146      * Removes paths with duplicate values that can be found elsewhere in the file.
147      * @param rawFile
148      */
removeRedundantPaths(CLDRFile rawFile)149     private void removeRedundantPaths(CLDRFile rawFile) {
150         if (organization == null || rawFile.getLocaleID().equals("root")) return;
151 
152         String parent = LocaleIDParser.getParent(rawFile.getLocaleID());
153         CLDRFile resolvedParent = rawFactory.make(parent, true);
154         List<String> duplicatePaths = new ArrayList<>();
155         for (String xpath : rawFile) {
156             if (xpath.startsWith("//ldml/identity")) {
157                 continue;
158             }
159             String value = rawFile.getStringValue(xpath);
160             // Remove count="x" if the value is equivalent to count="other".
161             if (xpath.contains("[@count=")) {
162                 XPathParts parts = XPathParts.getFrozenInstance(xpath);
163                 String count = parts.getAttributeValue(-1, "count");
164                 if (!count.equals("other")) {
165                     parts = parts.cloneAsThawed();
166                     parts.setAttribute(-1, "count", "other");
167                     String otherPath = parts.toString();
168                     if (value.equals(rawFile.getStringValue(otherPath))) {
169                         duplicatePaths.add(xpath);
170                         continue;
171                     }
172                 }
173             }
174             // Remove xpaths with values also found in the parent.
175             String sourceLocale = resolvedParent.getSourceLocaleID(xpath, null);
176             if (!sourceLocale.equals(XMLSource.CODE_FALLBACK_ID)) {
177                 String parentValue = resolvedParent.getStringValue(xpath);
178                 if (value.equals(parentValue)) {
179                     duplicatePaths.add(xpath);
180                 }
181             }
182         }
183         for (String xpath : duplicatePaths) {
184             rawFile.remove(xpath);
185         }
186     }
187 
188     @Override
getMinimalDraftStatus()189     public DraftStatus getMinimalDraftStatus() {
190         return rawFactory.getMinimalDraftStatus();
191     }
192 
193     @Override
handleGetAvailable()194     protected Set<String> handleGetAvailable() {
195         return rawFactory.getAvailable();
196     }
197 
198     /**
199      * Wrapper class for holding information about a value modification entry.
200      */
201     private class ModifierEntry {
202         String oldValue;
203         String newValue;
204         Map<String, String> options;
205 
ModifierEntry(String oldValue, String newValue, Map<String, String> options)206         public ModifierEntry(String oldValue, String newValue, Map<String, String> options) {
207             this.oldValue = oldValue;
208             this.newValue = newValue;
209             this.options = options;
210         }
211 
212         /**
213          * @param locale
214          *            the locale to be matched
215          * @return true if the locale matches the locale filter in this entry.
216          */
localeMatches(String locale)217         public boolean localeMatches(String locale) {
218             String pattern = options.get("locale");
219             return pattern == null ? true : locale.matches(pattern);
220         }
221     }
222 
223     /**
224      * Class for performing a specific type of data modification on a CLDRFile.
225      */
226     private abstract class Modifier {
227         protected List<ModifierEntry> entries = new ArrayList<>();
228 
modifyFile(CLDRFile file)229         public abstract void modifyFile(CLDRFile file);
230 
filterLocale(String locale)231         public abstract Modifier filterLocale(String locale);
232 
233         /**
234          * @return the list of modifiers meant for the specified locale.
235          */
getModifiersForLocale(String locale)236         protected List<ModifierEntry> getModifiersForLocale(String locale) {
237             List<ModifierEntry> newFilters = new ArrayList<>();
238             for (ModifierEntry filter : entries) {
239                 if (filter.localeMatches(locale)) {
240                     newFilters.add(filter);
241                 }
242             }
243             return newFilters;
244         }
245 
246         /**
247          *
248          * @param filter
249          */
addModifierEntry(ModifierEntry entry)250         public void addModifierEntry(ModifierEntry entry) {
251             entries.add(entry);
252         }
253 
isEmpty()254         public boolean isEmpty() {
255             return entries.size() == 0;
256         }
257     }
258 
259     /**
260      * Maps the value of an XPath onto another XPath.
261      */
262     private class PathModifier extends Modifier {
263         @Override
modifyFile(CLDRFile file)264         public void modifyFile(CLDRFile file) {
265             // For certain alternate values, use them as the main values.
266             for (ModifierEntry entry : entries) {
267                 String oldPath = entry.oldValue;
268                 String value = file.getStringValue(oldPath);
269                 if (value != null) {
270                     String newPath = entry.newValue;
271                     file.add(newPath, value);
272                     file.remove(oldPath);
273                 }
274             }
275         }
276 
277         @Override
filterLocale(String locale)278         public Modifier filterLocale(String locale) {
279             PathModifier newModifier = new PathModifier();
280             newModifier.entries = getModifiersForLocale(locale);
281             return newModifier;
282         }
283     }
284 
285     /**
286      * Replaces certain values with other values.
287      */
288     private class ValueModifier extends Modifier {
289         @Override
modifyFile(CLDRFile file)290         public void modifyFile(CLDRFile file) {
291             // Replace values.
292             for (ModifierEntry entry : entries) {
293                 String filteringPath = entry.options.get("xpath");
294                 if (filteringPath != null && isValidXPath(filteringPath)) {
295                     // For non-regex XPaths, look them up directly.
296                     String value = file.getStringValue(filteringPath);
297                     if (value != null) {
298                         value = value.replaceAll(entry.oldValue, entry.newValue);
299                         file.add(filteringPath, value);
300                     }
301                 } else {
302                     Iterator<String> iterator = file.iterator();
303                     if (filteringPath != null) {
304                         Matcher matcher = PatternCache.get(filteringPath).matcher("");
305                         iterator = file.iterator(matcher);
306                     }
307                     while (iterator.hasNext()) {
308                         String xpath = iterator.next();
309                         String originalValue = file.getStringValue(xpath);
310                         String value = originalValue.replaceAll(entry.oldValue, entry.newValue);
311                         if (!value.equals(originalValue)) {
312                             file.add(xpath, value);
313                         }
314                     }
315                 }
316             }
317         }
318 
319         @Override
filterLocale(String locale)320         public Modifier filterLocale(String locale) {
321             ValueModifier newModifier = new ValueModifier();
322             newModifier.entries = getModifiersForLocale(locale);
323             return newModifier;
324         }
325     }
326 
327     /**
328      * Maps the value of XPaths onto other XPaths using regexes.
329      */
330     private class PathRegexModifier extends Modifier {
331         private RegexLookup<String> xpathLookup = new RegexLookup<>();
332 
333         @Override
addModifierEntry(ModifierEntry entry)334         public void addModifierEntry(ModifierEntry entry) {
335             super.addModifierEntry(entry);
336             xpathLookup.add(entry.oldValue, entry.newValue);
337         }
338 
339         @Override
modifyFile(CLDRFile file)340         public void modifyFile(CLDRFile file) {
341             if (xpathLookup.size() > 0) {
342                 Output<String[]> arguments = new Output<>();
343                 for (String xpath : file) {
344                     String newValue = xpathLookup.get(xpath, null, arguments, null, null);
345                     if (newValue != null) {
346                         String newPath = RegexLookup.replace(newValue, arguments.value);
347                         String value = file.getStringValue(xpath);
348                         file.add(newPath, value);
349                         file.remove(xpath);
350                     }
351                 }
352             }
353         }
354 
355         @Override
filterLocale(String locale)356         public Modifier filterLocale(String locale) {
357             PathRegexModifier newModifier = new PathRegexModifier();
358             newModifier.entries = getModifiersForLocale(locale);
359             for (ModifierEntry entry : newModifier.entries) {
360                 newModifier.xpathLookup.add(entry.oldValue, entry.newValue);
361             }
362             return newModifier;
363         }
364     }
365 
366     /**
367      * Loads modifiers from a specified file.
368      */
loadModifiers(String filename)369     private void loadModifiers(String filename) {
370         if (!modifyValues) return;
371         final Modifier pathModifier = new PathModifier();
372         final Modifier pathRegexModifier = new PathRegexModifier();
373         final Modifier valueModifier = new ValueModifier();
374         RegexFileParser fileParser = new RegexFileParser();
375         fileParser.setLineParser(new RegexLineParser() {
376             @Override
377             public void parse(String line) {
378                 String[] contents = line.split("\\s*+;\\s*+");
379                 ModificationType filterType = ModificationType.valueOf(contents[0]);
380                 String oldValue = contents[1];
381                 String newValue = contents[2];
382                 // Process remaining options.
383                 Map<String, String> options = new HashMap<>();
384                 for (int i = 3; i < contents.length; i++) {
385                     String rawLine = contents[i];
386                     int pos = rawLine.indexOf('=');
387                     if (pos < 0) {
388                         throw new IllegalArgumentException("Invalid option: " + rawLine);
389                     }
390                     String optionType = rawLine.substring(0, pos).trim();
391                     options.put(optionType, rawLine.substring(pos + 1).trim());
392                 }
393 
394                 switch (filterType) {
395                 case xpath:
396                     if (isValidXPath(oldValue)) {
397                         pathModifier.addModifierEntry(new ModifierEntry(oldValue, newValue, options));
398                     } else {
399                         pathRegexModifier.addModifierEntry(new ModifierEntry(fixXPathRegex(oldValue),
400                             newValue, options));
401                     }
402                     break;
403                 case value:
404                     String xpath = options.get("xpath");
405                     if (xpath != null && !isValidXPath(xpath)) {
406                         options.put("xpath", fixXPathRegex(xpath));
407                     }
408                     valueModifier.addModifierEntry(new ModifierEntry(oldValue, newValue, options));
409                     break;
410                 }
411             }
412         });
413         fileParser.parse(FilterFactory.class, filename);
414         modifiers.add(pathModifier);
415         modifiers.add(pathRegexModifier);
416         modifiers.add(valueModifier);
417     }
418 
419     private Pattern XPATH_PATTERN = PatternCache.get("/(/\\w++(\\[@\\w++=\"[^\"()%\\\\]+\"])*)++");
420 
421     /**
422      * @param path
423      * @return true if path is a valid XPath and not a regex.
424      */
isValidXPath(String path)425     private boolean isValidXPath(String path) {
426         return XPATH_PATTERN.matcher(path).matches();
427     }
428 
429     /**
430      * Converts an xpath into a proper regex pattern.
431      *
432      * @param path
433      * @return
434      */
fixXPathRegex(String path)435     private String fixXPathRegex(String path) {
436         return '^' + path.replace("[@", "\\[@");
437     }
438 
439     private static final Options options = new Options(
440         "Filters CLDR XML files according to orgnizational coverage levels and an " +
441             "input file of replacement values/xpaths.")
442                 //        .add("org", 'o', ".*", "google", "The organization that the filtering is for. If set, also removes duplicate paths.")
443                 .add("org", 'o', ".*", Organization.cldr.name(), "The organization that the filtering is for. If set, also removes duplicate paths.")
444                 .add("locales", 'l', ".*", ".*", "A regular expression indicating the locales to be filtered");
445 
446     /**
447      * Run FilterFactory for a specific organization.
448      *
449      * @param args
450      * @throws Exception
451      */
main(String[] args)452     public static void main(String[] args) throws Exception {
453         options.parse(args, true);
454         Factory rawFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, options.get("locales").getValue());
455         String org = options.get("org").getValue();
456         FilterFactory filterFactory = FilterFactory.load(rawFactory, org, true);
457         String outputDir = CLDRPaths.GEN_DIRECTORY + "/filter";
458         for (String locale : rawFactory.getAvailable()) {
459             try (PrintWriter out = FileUtilities.openUTF8Writer(outputDir, locale + ".xml");) {
460                 filterFactory.make(locale, false).write(out);
461             }
462 //            out.close();
463         }
464     }
465 }
466