1 package org.unicode.cldr.tool; 2 3 import java.io.File; 4 import java.io.IOException; 5 import java.io.PrintWriter; 6 import java.util.HashSet; 7 import java.util.LinkedHashMap; 8 import java.util.List; 9 import java.util.Map; 10 import java.util.Set; 11 import java.util.TreeMap; 12 import java.util.TreeSet; 13 import java.util.regex.Matcher; 14 import java.util.regex.Pattern; 15 16 import org.unicode.cldr.draft.FileUtilities; 17 import org.unicode.cldr.test.DisplayAndInputProcessor; 18 import org.unicode.cldr.tool.Option.Options; 19 import org.unicode.cldr.tool.Option.Params; 20 import org.unicode.cldr.util.Annotations; 21 import org.unicode.cldr.util.Annotations.AnnotationSet; 22 import org.unicode.cldr.util.CLDRConfig; 23 import org.unicode.cldr.util.CLDRFile; 24 import org.unicode.cldr.util.CLDRPaths; 25 import org.unicode.cldr.util.CldrUtility; 26 import org.unicode.cldr.util.Emoji; 27 import org.unicode.cldr.util.Factory; 28 import org.unicode.cldr.util.Level; 29 import org.unicode.cldr.util.Organization; 30 import org.unicode.cldr.util.SimpleXMLSource; 31 import org.unicode.cldr.util.XPathParts.Comments.CommentType; 32 33 import com.google.common.base.Joiner; 34 import com.google.common.base.Splitter; 35 import com.google.common.collect.ImmutableSortedSet; 36 import com.ibm.icu.impl.Utility; 37 import com.ibm.icu.impl.locale.XCldrStub.ImmutableMap; 38 import com.ibm.icu.text.UnicodeSet; 39 40 public class GenerateDerivedAnnotations { 41 // Use EmojiData.getDerivableNames() to update this for each version of Unicode. 42 43 private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance(); 44 45 static final UnicodeSet SKIP = new UnicodeSet() 46 .add(Annotations.ENGLISH_MARKER) 47 .add(Annotations.BAD_MARKER) 48 .add(Annotations.MISSING_MARKER) 49 .freeze(); 50 51 static Map<String,String> codepointToIsoCurrencyCode; 52 static { 53 final Splitter tabSplitter = Splitter.on('\t').trimResults(); 54 Map<String,String> _codepointToIsoCurrencyCode = new TreeMap<>(); 55 for (String line : FileUtilities.in(CldrUtility.class, "data/codepointToIsoCurrencyCode.tsv")) { 56 if (line.startsWith("#")) { 57 continue; 58 } 59 List<String> parts = tabSplitter.splitToList(line); 60 _codepointToIsoCurrencyCode.put(parts.get(0), parts.get(1)); 61 } 62 codepointToIsoCurrencyCode = ImmutableMap.copyOf(_codepointToIsoCurrencyCode); 63 } 64 65 private enum MyOptions { 66 fileFilter(new Params().setHelp("filter files by dir/locale, eg: ^main/en$ or .*/en").setMatch(".*").setDefault(".*")), 67 missing(new Params().setHelp("only missing").setMatch("")), 68 ; 69 70 // BOILERPLATE TO COPY 71 final Option option; 72 MyOptions(Params params)73 private MyOptions(Params params) { 74 option = new Option(this, params); 75 } 76 77 private static Options myOptions = new Options(); 78 static { 79 for (MyOptions option : MyOptions.values()) { myOptions.add(option, option.option)80 myOptions.add(option, option.option); 81 } 82 } 83 parse(String[] args)84 private static Set<String> parse(String[] args) { 85 return myOptions.parse(MyOptions.values()[0], args, true); 86 } 87 } 88 main(String[] args)89 public static void main(String[] args) throws IOException { 90 MyOptions.parse(args); 91 92 boolean missingOnly = MyOptions.missing.option.doesOccur(); 93 if (missingOnly) { 94 System.out.println("With the 'missing' argument files will not be written, only the missing items will be written to the console"); 95 } 96 97 Matcher localeMatcher = Pattern.compile(MyOptions.fileFilter.option.getValue()).matcher(""); 98 Joiner BAR = Joiner.on(" | "); 99 AnnotationSet enAnnotations = Annotations.getDataSet("en"); 100 CLDRFile english = CLDR_CONFIG.getEnglish(); 101 102 UnicodeSet derivables = new UnicodeSet(Emoji.getAllRgiNoES()) 103 .addAll(codepointToIsoCurrencyCode.keySet()) 104 .removeAll(enAnnotations.keySet()) 105 .freeze(); 106 107 for (String d : derivables) { 108 if (d.contains("")) { 109 System.out.println(d + "\t" + Utility.hex(d)); 110 } 111 } 112 113 Map<String, UnicodeSet> localeToFailures = new LinkedHashMap<>(); 114 Set<String> locales = ImmutableSortedSet.copyOf(Annotations.getAvailable()); 115 final Factory cldrFactory = CLDRConfig.getInstance().getCldrFactory(); 116 117 for (String locale : locales) { 118 if ("root".equals(locale)) { 119 continue; 120 } 121 if (!localeMatcher.reset(locale).matches()) { 122 continue; 123 } 124 UnicodeSet failures = new UnicodeSet(Emoji.getAllRgiNoES()); 125 localeToFailures.put(locale, failures); 126 127 AnnotationSet annotations; 128 try { 129 annotations = Annotations.getDataSet(locale); 130 failures.removeAll(annotations.getExplicitValues()); 131 } catch (Exception e) { 132 System.out.println("Can't create annotations for: " + locale + "\n\t" + e.getMessage()); 133 annotations = Annotations.getDataSet(locale); 134 continue; 135 } 136 CLDRFile target = new CLDRFile(new SimpleXMLSource(locale)); 137 CLDRFile main = null; 138 DisplayAndInputProcessor DAIP = new DisplayAndInputProcessor(target); 139 Exception[] internalException = new Exception[1]; 140 141 target.addComment("//ldml", "Derived short names and annotations, using GenerateDerivedAnnotations.java. See warnings in /annotations/ file.", 142 CommentType.PREBLOCK); 143 for (String derivable : derivables) { 144 String shortName = null; 145 try { 146 shortName = annotations.getShortName(derivable); 147 } catch (Exception e) { 148 } 149 150 if (shortName == null) { 151 String currencyCode = codepointToIsoCurrencyCode.get(derivable); 152 if (currencyCode != null) { 153 if (main == null) { 154 main = cldrFactory.make(locale, true); 155 } 156 shortName = main.getName(CLDRFile.CURRENCY_NAME, currencyCode); 157 if (shortName.contentEquals(currencyCode)) { 158 shortName = null; // don't want fallback raw code 159 } 160 } 161 } 162 163 if (shortName == null || SKIP.containsSome(shortName)) { 164 continue; // missing 165 } 166 Set<String> keywords = annotations.getKeywordsMinus(derivable); 167 String path = "//ldml/annotations/annotation[@cp=\"" + derivable + "\"]"; 168 if (!keywords.isEmpty()) { 169 Set<String> keywordsFixed = new HashSet<>(); 170 for (String keyword : keywords) { 171 if (!SKIP.containsSome(keyword)) { 172 keywordsFixed.add(keyword); 173 } 174 } 175 if (!keywordsFixed.isEmpty()) { 176 String value = BAR.join(keywordsFixed); 177 String newValue = DAIP.processInput(path, value, internalException); 178 target.add(path, newValue); 179 } 180 } 181 failures.remove(derivable); 182 String ttsPath = path + "[@type=\"tts\"]"; 183 String shortName2 = DAIP.processInput(path, shortName, internalException); 184 target.add(ttsPath, shortName2); 185 } 186 failures.freeze(); 187 if (!failures.isEmpty()) { 188 Level level = CLDR_CONFIG.getStandardCodes().getLocaleCoverageLevel(Organization.cldr, locale); 189 System.out.println("Failures\t" + locale 190 + "\t" + level 191 + "\t" + english.getName(locale) 192 + "\t" + failures.size() 193 + "\t" + failures.toPattern(false)); 194 } 195 if (missingOnly) { 196 continue; 197 } 198 try (PrintWriter pw = FileUtilities.openUTF8Writer(CLDRPaths.COMMON_DIRECTORY + "annotationsDerived", locale + ".xml")) { 199 target.write(pw); 200 } 201 } 202 Factory factory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "annotationsDerived", ".*"); 203 for (String locale : locales) { 204 if ("root".equals(locale)) { 205 continue; 206 } 207 if (!localeMatcher.reset(locale).matches()) { 208 continue; 209 } 210 CLDRFile cldrFileUnresolved = factory.make(locale, false); 211 CLDRFile cldrFileResolved = factory.make(locale, true); 212 Set<String> toRemove = new TreeSet<>(); // TreeSet just makes debugging easier 213 boolean gotOne = false; 214 for (String xpath : cldrFileUnresolved) { 215 if (xpath.startsWith("//ldml/identity")) { 216 continue; 217 } 218 219 String value = cldrFileUnresolved.getStringValue(xpath); 220 221 // remove items that are the same as their bailey values. This also catches Inheritance Marker 222 223 String bailey = cldrFileResolved.getConstructedBaileyValue(xpath, null, null); 224 if (value.equals(bailey)) { 225 toRemove.add(xpath); 226 continue; 227 } 228 gotOne = true; 229 } 230 if (!gotOne) { 231 if (locale.equals("sr_Cyrl")) { 232 System.err.println("TODO: keep from deleting files with non-empty children"); 233 } else { 234 System.out.println("Removing empty " + locale); 235 new File(CLDRPaths.COMMON_DIRECTORY + "annotationsDerived", locale + ".xml").deleteOnExit(); 236 } 237 } else if (!toRemove.isEmpty()) { 238 System.out.println("Removing " + toRemove.size() + " items from " + locale); 239 CLDRFile fileToWrite = cldrFileUnresolved.cloneAsThawed(); 240 fileToWrite.removeAll(toRemove, false); 241 File file = new File(CLDRPaths.COMMON_DIRECTORY + "annotationsDerived", locale + ".xml"); 242 try (PrintWriter pw = new PrintWriter(file)) { 243 fileToWrite.write(pw); 244 } 245 } 246 } 247 System.out.println("Be sure to run CLDRModify passes afterwards, and generate transformed locales (like de-CH)."); 248 } 249 250 } 251