1 package org.unicode.cldr.tool; 2 3 import java.io.IOException; 4 import java.io.PrintWriter; 5 import java.util.ArrayList; 6 import java.util.Arrays; 7 import java.util.HashSet; 8 import java.util.LinkedHashMap; 9 import java.util.LinkedHashSet; 10 import java.util.Map.Entry; 11 import java.util.Set; 12 import java.util.TreeMap; 13 import java.util.TreeSet; 14 15 import org.unicode.cldr.draft.FileUtilities; 16 import org.unicode.cldr.tool.Option.Options; 17 import org.unicode.cldr.util.CLDRConfig; 18 import org.unicode.cldr.util.CLDRFile; 19 import org.unicode.cldr.util.CLDRFile.DraftStatus; 20 import org.unicode.cldr.util.CLDRLocale; 21 import org.unicode.cldr.util.CLDRPaths; 22 import org.unicode.cldr.util.ChainedMap; 23 import org.unicode.cldr.util.ChainedMap.M3; 24 import org.unicode.cldr.util.ChainedMap.M4; 25 import org.unicode.cldr.util.Counter; 26 import org.unicode.cldr.util.Factory; 27 import org.unicode.cldr.util.LanguageTagParser; 28 import org.unicode.cldr.util.PathHeader; 29 import org.unicode.cldr.util.PathHeader.SectionId; 30 import org.unicode.cldr.util.SupplementalDataInfo; 31 32 import com.google.common.base.Objects; 33 import com.ibm.icu.impl.Relation; 34 import com.ibm.icu.text.UnicodeSet; 35 36 public class ShowRegionalVariants { 37 private static String MY_DIR; 38 39 private static final boolean SKIP_SUPPRESSED_PATHS = true; 40 41 private static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 42 private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = CONFIG.getSupplementalDataInfo(); 43 private static final Factory FACTORY = CONFIG.getCldrFactory(); 44 private static final CLDRFile ENGLISH = CONFIG.getEnglish(); 45 private static final CLDRLocale ROOT = CLDRLocale.getInstance("root"); 46 //private static final CLDRLocale en_US_POSIX = CLDRLocale.getInstance("en_US_POSIX"); 47 private static final CLDRLocale SWISS_HIGH_GERMAN = CLDRLocale.getInstance("de_CH"); 48 49 final static Options myOptions = new Options(); 50 51 enum MyOptions { 52 targetDir(".*", CLDRPaths.GEN_DIRECTORY + "/regional/", "target output file."),; 53 // boilderplate 54 final Option option; 55 MyOptions(String argumentPattern, String defaultArgument, String helpText)56 MyOptions(String argumentPattern, String defaultArgument, String helpText) { 57 option = myOptions.add(this, argumentPattern, defaultArgument, helpText); 58 } 59 } 60 main(String[] args)61 public static void main(String[] args) throws IOException { 62 myOptions.parse(MyOptions.targetDir, args, true); 63 64 MY_DIR = MyOptions.targetDir.option.getValue(); 65 66 Set<String> coverageLocales = CONFIG.getStandardCodes().getLocaleCoverageLocales("cldr"); 67 Set<String> dc = new HashSet<>(SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales()); 68 Set<String> skipLocales = new HashSet<>(Arrays.asList("root", "en_US_POSIX", "sr_Latn")); 69 70 Relation<CLDRLocale, CLDRLocale> parentToChildren = Relation.of(new TreeMap<CLDRLocale, Set<CLDRLocale>>(), TreeSet.class); 71 // first, collect all locales for lookup by parents. 72 73 for (String locale : FACTORY.getAvailable()) { 74 if (skipLocales.contains(locale.toString()) 75 || dc.contains(locale.toString())) { 76 continue; 77 } 78 CLDRLocale loc = CLDRLocale.getInstance(locale); 79 80 if (!coverageLocales.contains(loc.getLanguage())) { 81 continue; 82 } 83 CLDRLocale parent = null; 84 for (CLDRLocale current = loc;; current = parent) { 85 parent = current.getParent(); 86 if (!dc.contains(parent.toString())) { // skip over default content 87 break; 88 } 89 } 90 if (ROOT.equals(parent)) { 91 continue; 92 } else if ("root".equals(parent.toString())) { 93 throw new IllegalArgumentException("CLDRLocale failure"); 94 } 95 parentToChildren.put(parent, loc); 96 } 97 98 // show inheritance 99 System.out.println("Locale Name\tCode\tRegion\tInherits from\tCode"); 100 showInheritance(parentToChildren); 101 102 // next find out the unique items in children 103 Relation<String, String> valueToAncestors = Relation.of(new LinkedHashMap<String, Set<String>>(), LinkedHashSet.class); 104 105 int count = 0; 106 107 try ( 108 PrintWriter grandSummary = FileUtilities.openUTF8Writer(MY_DIR, "GrandSummary.txt"); 109 PrintWriter summary = FileUtilities.openUTF8Writer(MY_DIR, "Summary.txt"); 110 PrintWriter detailFile = FileUtilities.openUTF8Writer(MY_DIR, "details.txt");) { 111 grandSummary.println("Parent\tName\tTotal Diff Count\tChildren"); 112 summary.println("Parent\tName\tDiff Count\tChild\tChild Name"); 113 detailFile 114 .println( 115 "№\tBase\tParent Locales I\tParent Locales II\tChild Locales\tEnglish value\tParent value I\tParent value II\tChild value\tCorrected Child value\tComments\tFix Parent value?\tSection\tPage\tHeader\tCode"); 116 PathHeader.Factory phf = PathHeader.getFactory(ENGLISH); 117 String lastBase = ""; 118 for (Entry<CLDRLocale, Set<CLDRLocale>> item : parentToChildren.keyValuesSet()) { 119 CLDRLocale parent = item.getKey(); 120 String base = parent.getLanguage(); 121 122 CLDRFile parentFile = FACTORY.make(parent.toString(), true, DraftStatus.contributed); 123 M4<PathHeader, String, CLDRLocale, Boolean> pathToValuesToLocales = ChainedMap.of( 124 new TreeMap<PathHeader, Object>(), 125 new TreeMap<String, Object>(), 126 new TreeMap<CLDRLocale, Object>(), 127 Boolean.class); 128 129 Counter<CLDRLocale> childDiffs = new Counter<>(); 130 131 for (CLDRLocale child : item.getValue()) { 132 //childDiffs.add(child, 0); // make sure it shows up 133 String childString = child.toString(); 134 CLDRFile childFile = FACTORY.make(childString, false, DraftStatus.contributed); 135 for (String path : childFile) { 136 if (SKIP_SUPPRESSED_PATHS) { 137 if (path.contains("/currency") && path.contains("/symbol")) { 138 continue; 139 } 140 } 141 String childValue = childFile.getStringValue(path); 142 if (childValue == null) { 143 continue; 144 } 145 String parentValue = parentFile.getStringValue(path); 146 if (parentValue == null) { 147 parentValue = "∅∅∅"; 148 } 149 if (!Objects.equal(childValue, parentValue)) { 150 if (SKIP_SUPPRESSED_PATHS) { 151 if ("∅∅∅".equals(childValue) || "∅∅∅".equals(parentValue)) { 152 continue; // skip suppressed paths 153 } 154 } 155 if (parentValue != null) { 156 if (child.equals(SWISS_HIGH_GERMAN)) { 157 String norm = parentValue.replace("ß", "ss"); 158 if (childValue.equals(norm)) { 159 continue; 160 } 161 } else if (base.equals("en")) { 162 if (sameExceptEnd(childValue, "re", parentValue, "er") 163 || sameExceptEnd(childValue, "res", parentValue, "ers")) { 164 continue; 165 } 166 } 167 } 168 PathHeader pheader = phf.fromPath(path); 169 if (SectionId.Special == pheader.getSectionId()) { 170 continue; 171 } 172 pathToValuesToLocales.put(pheader, childValue, child, Boolean.TRUE); 173 childDiffs.add(child, 1); 174 } 175 } 176 } 177 178 long totalChildDiffs = childDiffs.getTotal(); 179 if (totalChildDiffs == 0) { 180 continue; 181 } 182 183 if (!base.equals(lastBase)) { 184 detailFile.println(); 185 // if (detailFile != null) { 186 // detailFile.close(); 187 // } 188 // detailFile = FileUtilities.openUTF8Writer(MY_DIR, "detail-" + base + ".txt"); 189 // detailFile.println("Section\tPage\tHeader\tCode\tLocales\tvalue\tParent Locales\tvalue\tParent Locales\tvalue"); 190 // lastBase = base; 191 } 192 193 grandSummary.println(parent + "\t" + ENGLISH.getName(parent.toString()) + "\t" + totalChildDiffs + "\t" + item.getValue()); 194 for (CLDRLocale s : childDiffs.getKeysetSortedByKey()) { 195 long childDiffValue = childDiffs.get(s); 196 if (childDiffValue == 0) { 197 continue; 198 } 199 summary.println(parent + "\t" + ENGLISH.getName(parent.toString()) + "\t" + childDiffValue + "\t" + s + "\t" 200 + ENGLISH.getName(s.toString())); 201 } 202 203 ArrayList<CLDRFile> parentChain = new ArrayList<CLDRFile>(); 204 for (CLDRLocale current = parent;;) { 205 parentChain.add(FACTORY.make(current.toString(), true)); 206 CLDRLocale grand = current.getParent(); 207 if (ROOT.equals(grand)) { 208 break; 209 } 210 current = grand; 211 } 212 213 for (PathHeader ph : pathToValuesToLocales.keySet()) { 214 M3<String, CLDRLocale, Boolean> values = pathToValuesToLocales.get(ph); 215 valueToAncestors.clear(); 216 for (String value : values.keySet()) { 217 Set<CLDRLocale> childLocales = values.get(value).keySet(); 218 String englishValue = ENGLISH.getStringValue(ph.getOriginalPath()); 219 String originalPath = ph.getOriginalPath(); 220 for (CLDRFile grand : parentChain) { 221 valueToAncestors.put(quote(grand.getStringValue(originalPath)), grand.getLocaleID()); 222 } 223 Set<Entry<String, Set<String>>> keyValuesSet = valueToAncestors.keyValuesSet(); 224 final int countParents = keyValuesSet.size(); 225 if (countParents < 1 || countParents > 2) { 226 throw new IllegalArgumentException("Too few/many parents"); 227 } 228 229 // // № Base Parent Locales I Parent Locales II Child Locales English value Parent value I Parent value II Child value 230 // Corrected Child value Comments Fix Parent value? Section Page Header Code 231 232 detailFile.print( 233 ++count 234 + "\t" + base); 235 236 for (Entry<String, Set<String>> entry : keyValuesSet) { 237 detailFile.print("\t" + entry.getValue()); 238 } 239 if (countParents == 1) { 240 detailFile.print("\t"); 241 } 242 detailFile.print("" 243 + "\t" + childLocales 244 + "\t" + quote(englishValue)); 245 for (Entry<String, Set<String>> entry : keyValuesSet) { 246 detailFile.print("\t" + entry.getKey()); 247 } 248 if (countParents == 1) { 249 detailFile.print("\t"); 250 } 251 detailFile.print("" 252 + "\t" + quote(value) 253 + "\t" + "" 254 + "\t" + "" 255 + "\t" + "" 256 + "\t" + ph); 257 detailFile.println(); 258 } 259 } 260 261 } 262 } 263 System.out.println("DONE"); 264 // if (detailFile != null) { 265 // detailFile.close(); 266 // } 267 } 268 showInheritance(Relation<CLDRLocale, CLDRLocale> parentToChildren)269 private static void showInheritance(Relation<CLDRLocale, CLDRLocale> parentToChildren) { 270 Set<CLDRLocale> values = parentToChildren.values(); 271 Set<CLDRLocale> topParents = new TreeSet<>(parentToChildren.keySet()); 272 topParents.removeAll(values); 273 showInheritance(topParents, "", parentToChildren); 274 } 275 showInheritance(Set<CLDRLocale> topParents, String prefix, Relation<CLDRLocale, CLDRLocale> parentToChildren)276 private static void showInheritance(Set<CLDRLocale> topParents, String prefix, Relation<CLDRLocale, CLDRLocale> parentToChildren) { 277 for (CLDRLocale locale : topParents) { 278 String current = nameForLocale(locale) + "\t" + prefix; 279 System.out.println(current); 280 Set<CLDRLocale> newChildren = parentToChildren.get(locale); 281 if (newChildren == null) { 282 continue; 283 } 284 showInheritance(newChildren, current, parentToChildren); 285 } 286 } 287 288 static final LikelySubtags LS = new LikelySubtags(); 289 nameForLocale(CLDRLocale key)290 private static String nameForLocale(CLDRLocale key) { 291 String country = key.getCountry(); 292 if (country.isEmpty()) { 293 String max = LS.maximize(key.toString()); 294 LanguageTagParser ltp = new LanguageTagParser().set(max); 295 country = "(" + ltp.getRegion() + ")"; 296 } 297 return ENGLISH.getName(key.toString(), false, CLDRFile.SHORT_ALTS) + "\t" + key + "\t" + country; 298 } 299 sameExceptEnd(String childValue, String childEnding, String parentValue, String parentEnding)300 private static boolean sameExceptEnd(String childValue, String childEnding, String parentValue, String parentEnding) { 301 if (childValue.endsWith(childEnding) 302 && parentValue.endsWith(parentEnding) 303 && childValue.substring(0, childValue.length() - childEnding.length()).equals( 304 parentValue.substring(0, parentValue.length() - parentEnding.length()))) { 305 return true; 306 } 307 return false; 308 } 309 310 static final UnicodeSet SPREAD_SHEET_SENSITIVE = new UnicodeSet().add('=').add('+').add('0', '9'); 311 quote(String value)312 private static String quote(String value) { 313 if (value == null || value.isEmpty()) { 314 return "∅∅∅"; 315 } 316 int first = value.codePointAt(0); 317 return SPREAD_SHEET_SENSITIVE.contains(first) ? "'" + value : value; 318 } 319 } 320