1 package org.unicode.cldr.test; 2 3 import java.util.Collection; 4 import java.util.List; 5 import java.util.regex.Pattern; 6 7 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype; 8 import org.unicode.cldr.util.CLDRConfig; 9 import org.unicode.cldr.util.CLDRFile; 10 import org.unicode.cldr.util.GrammarInfo; 11 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature; 12 import org.unicode.cldr.util.GrammarInfo.GrammaticalScope; 13 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget; 14 import org.unicode.cldr.util.PatternCache; 15 import org.unicode.cldr.util.UnitConverter; 16 import org.unicode.cldr.util.UnitConverter.UnitId; 17 import org.unicode.cldr.util.UnitPathType; 18 import org.unicode.cldr.util.XPathParts; 19 20 import com.ibm.icu.text.SimpleFormatter; 21 import com.ibm.icu.text.UnicodeSet; 22 23 public class CheckUnits extends CheckCLDR { 24 private static final Pattern HOUR_SYMBOL = PatternCache.get("h{1,2}"); 25 private static final Pattern MINUTE_SYMBOL = PatternCache.get("m{1,2}"); 26 private static final Pattern SECONDS_SYMBOL = PatternCache.get("ss"); 27 private static final UnicodeSet DISALLOW_LONG_POWER = new UnicodeSet("[²³]").freeze(); 28 29 static final UnitConverter unitConverter = CLDRConfig.getInstance().getSupplementalDataInfo().getUnitConverter(); 30 31 private Collection<String> genders = null; 32 33 @Override setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)34 public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors) { 35 super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors); 36 37 GrammarInfo grammarInfo = CLDRConfig.getInstance().getSupplementalDataInfo().getGrammarInfo(cldrFileToCheck.getLocaleID()); 38 genders = grammarInfo == null ? null : grammarInfo.get(GrammaticalTarget.nominal, GrammaticalFeature.grammaticalGender, GrammaticalScope.units); 39 40 return this; 41 } 42 43 @Override handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result)44 public CheckCLDR handleCheck(String path, String fullPath, String value, Options options, 45 List<CheckStatus> result) { 46 47 if (value == null || !path.startsWith("//ldml/units")) { 48 return this; 49 } 50 final XPathParts parts = XPathParts.getFrozenInstance(path); 51 String finalElement = parts.getElement(-1); 52 53 if (genders != null && !genders.isEmpty() && finalElement.equals("gender")) { 54 if (!genders.contains(value)) { 55 result.add(new CheckStatus().setCause(this) 56 .setMainType(CheckStatus.errorType) 57 .setSubtype(Subtype.invalidGenderCode) 58 .setMessage("The gender value for this locale must be one of: {0}", genders)); 59 } 60 } 61 62 // Note, the following test has some overlaps with the checkAndReplacePlaceholders 63 // test in CheckForExamplars (why there?). That is probably OK, they check in 64 // different ways, but some errors will produce two somewhat different error messages. 65 UnitPathType pathType = UnitPathType.getPathType(parts); 66 if (pathType != null) { 67 int min = 0; 68 int max = 0; 69 switch(pathType) { 70 case power: case prefix: 71 min = 1; 72 max = 1; 73 break; 74 case times: case per: 75 min = 2; 76 max = 2; 77 break; 78 case perUnit: case coordinate: // coordinateUnitPattern 79 min = 1; 80 max = 1; 81 break; 82 case unit: 83 min = 0; 84 max = 1; 85 break; 86 default: // 0, 0 87 } 88 if (max > 0) { 89 try { 90 SimpleFormatter sf = SimpleFormatter.compileMinMaxArguments(value, min, max); 91 } catch (Exception e) { 92 result.add(new CheckStatus().setCause(this).setMainType(CheckStatus.errorType) 93 .setSubtype(Subtype.invalidPlaceHolder) 94 .setMessage("Invalid unit pattern, must have min " + min + " and max " + max + " distinct placeholders of the form {n}")); 95 } 96 } 97 String idType; 98 switch(pathType) { 99 case power: { 100 final String width = parts.getAttributeValue(-3, "type"); 101 if (value != null && "long".contentEquals(width)) { 102 if (DISALLOW_LONG_POWER.containsSome(fixedValueIfInherited(value, path))) { 103 String unresolvedValue = getCldrFileToCheck().getUnresolved().getStringValue(path); 104 if (unresolvedValue != null) { 105 final String message = genders == null 106 ? "Long value for power can’t use superscripts; it must be spelled out." 107 : "Long value for power can’t use superscripts; it must be spelled out. [NOTE: values can vary by gender.]"; 108 result.add(new CheckStatus().setCause(this).setMainType(CheckStatus.errorType) 109 .setSubtype(Subtype.longPowerWithSubscripts) 110 .setMessage(message)); 111 } 112 } 113 } 114 } 115 // fall through 116 case prefix: 117 idType = parts.getAttributeValue(-2, "type"); 118 for (String shortUnitId : pathType.sampleComposedShortUnitIds.get(idType)) { 119 final UnitId unitId = unitConverter.createUnitId(shortUnitId); 120 final String width = parts.getAttributeValue(-3, "type"); 121 String count = parts.getAttributeValue(-1, "count"); 122 String caseVariant = parts.getAttributeValue(-1, "case"); 123 final CLDRFile cldrFile = getCldrFileToCheck(); 124 String explicitPattern = UnitPathType.unit.getTrans(cldrFile, width, shortUnitId, count, caseVariant, null, null); 125 if (explicitPattern != null) { 126 String composedPattern = unitId.toString(cldrFile, width, count, caseVariant, null, false); 127 if (composedPattern != null && !explicitPattern.equals(composedPattern)) { 128 unitId.toString(cldrFile, width, count, caseVariant, null, false); // for debugging 129 result.add(new CheckStatus().setCause(this).setMainType(CheckStatus.warningType) 130 .setSubtype(Subtype.mismatchedUnitComponent) 131 .setMessage("Mismatched component: «{0}» produces «{1}», but the explicit translation is «{2}». See http://cldr.unicode.org/translation/units-1/units#TOC-Compound-Units", value, composedPattern, explicitPattern)); 132 } 133 } 134 } 135 break; 136 default: 137 break; 138 } 139 } 140 141 if (pathType == UnitPathType.duration) { 142 XPathParts xpp = parts; 143 String durationUnitType = xpp.findAttributeValue("durationUnit", "type"); 144 boolean hasHourSymbol = HOUR_SYMBOL.matcher(value).find(); 145 boolean hasMinuteSymbol = MINUTE_SYMBOL.matcher(value).find(); 146 boolean hasSecondsSymbol = SECONDS_SYMBOL.matcher(value).find(); 147 148 if (durationUnitType.contains("h") && !hasHourSymbol) { 149 /* Changed message from "The hour symbol (h or hh) is missing" 150 * to "The hour indicator should be either h or hh for duration" 151 * per http://unicode.org/cldr/trac/ticket/10999 152 */ 153 result.add(new CheckStatus().setCause(this) 154 .setMainType(CheckStatus.errorType) 155 .setSubtype(Subtype.invalidDurationUnitPattern) 156 .setMessage("The hour indicator should be either h or hh for duration.")); 157 } else if (durationUnitType.contains("m") && !hasMinuteSymbol) { 158 result.add(new CheckStatus().setCause(this) 159 .setMainType(CheckStatus.errorType) 160 .setSubtype(Subtype.invalidDurationUnitPattern) 161 .setMessage("The minutes symbol (m or mm) is missing.")); 162 } else if (durationUnitType.contains("s") && !hasSecondsSymbol) { 163 result.add(new CheckStatus().setCause(this) 164 .setMainType(CheckStatus.errorType) 165 .setSubtype(Subtype.invalidDurationUnitPattern) 166 .setMessage("The seconds symbol (ss) is missing.")); 167 } 168 } 169 return this; 170 } 171 } 172