1 package org.unicode.cldr.test;
2 
3 import java.util.Collection;
4 import java.util.List;
5 import java.util.regex.Pattern;
6 
7 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
8 import org.unicode.cldr.util.CLDRConfig;
9 import org.unicode.cldr.util.CLDRFile;
10 import org.unicode.cldr.util.GrammarInfo;
11 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature;
12 import org.unicode.cldr.util.GrammarInfo.GrammaticalScope;
13 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget;
14 import org.unicode.cldr.util.PatternCache;
15 import org.unicode.cldr.util.UnitConverter;
16 import org.unicode.cldr.util.UnitConverter.UnitId;
17 import org.unicode.cldr.util.UnitPathType;
18 import org.unicode.cldr.util.XPathParts;
19 
20 import com.ibm.icu.text.SimpleFormatter;
21 import com.ibm.icu.text.UnicodeSet;
22 
23 public class CheckUnits extends CheckCLDR {
24     private static final Pattern HOUR_SYMBOL = PatternCache.get("h{1,2}");
25     private static final Pattern MINUTE_SYMBOL = PatternCache.get("m{1,2}");
26     private static final Pattern SECONDS_SYMBOL = PatternCache.get("ss");
27     private static final UnicodeSet DISALLOW_LONG_POWER = new UnicodeSet("[²³]").freeze();
28 
29     static final UnitConverter unitConverter = CLDRConfig.getInstance().getSupplementalDataInfo().getUnitConverter();
30 
31     private Collection<String> genders = null;
32 
33     @Override
setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)34     public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors) {
35         super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors);
36 
37         GrammarInfo grammarInfo = CLDRConfig.getInstance().getSupplementalDataInfo().getGrammarInfo(cldrFileToCheck.getLocaleID());
38         genders = grammarInfo == null ? null : grammarInfo.get(GrammaticalTarget.nominal, GrammaticalFeature.grammaticalGender, GrammaticalScope.units);
39 
40         return this;
41     }
42 
43     @Override
handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result)44     public CheckCLDR handleCheck(String path, String fullPath, String value, Options options,
45         List<CheckStatus> result) {
46 
47         if (value == null || !path.startsWith("//ldml/units")) {
48             return this;
49         }
50         final XPathParts parts = XPathParts.getFrozenInstance(path);
51         String finalElement = parts.getElement(-1);
52 
53         if (genders != null && !genders.isEmpty() && finalElement.equals("gender")) {
54             if (!genders.contains(value)) {
55                 result.add(new CheckStatus().setCause(this)
56                     .setMainType(CheckStatus.errorType)
57                     .setSubtype(Subtype.invalidGenderCode)
58                     .setMessage("The gender value for this locale must be one of: {0}", genders));
59             }
60         }
61 
62         // Note, the following test has some overlaps with the checkAndReplacePlaceholders
63         // test in CheckForExamplars (why there?). That is probably OK, they check in
64         // different ways, but some errors will produce two somewhat different error messages.
65         UnitPathType pathType = UnitPathType.getPathType(parts);
66         if (pathType != null) {
67             int min = 0;
68             int max = 0;
69             switch(pathType) {
70             case power: case prefix:
71                 min = 1;
72                 max = 1;
73                 break;
74             case times: case per:
75                 min = 2;
76                 max = 2;
77                 break;
78             case perUnit: case coordinate: // coordinateUnitPattern
79                 min = 1;
80                 max = 1;
81                 break;
82             case unit:
83                 min = 0;
84                 max = 1;
85                 break;
86             default: // 0, 0
87             }
88             if (max > 0) {
89                 try {
90                     SimpleFormatter sf = SimpleFormatter.compileMinMaxArguments(value, min, max);
91                 } catch (Exception e) {
92                     result.add(new CheckStatus().setCause(this).setMainType(CheckStatus.errorType)
93                         .setSubtype(Subtype.invalidPlaceHolder)
94                         .setMessage("Invalid unit pattern, must have min " + min + " and max " + max + " distinct placeholders of the form {n}"));
95                 }
96             }
97             String idType;
98             switch(pathType) {
99             case power: {
100                 final String width = parts.getAttributeValue(-3, "type");
101                 if (value != null && "long".contentEquals(width)) {
102                     if (DISALLOW_LONG_POWER.containsSome(fixedValueIfInherited(value, path))) {
103                         String unresolvedValue = getCldrFileToCheck().getUnresolved().getStringValue(path);
104                         if (unresolvedValue != null) {
105                             final String message = genders == null
106                                 ? "Long value for power can’t use superscripts; it must be spelled out."
107                                     : "Long value for power can’t use superscripts; it must be spelled out. [NOTE: values can vary by gender.]";
108                             result.add(new CheckStatus().setCause(this).setMainType(CheckStatus.errorType)
109                                 .setSubtype(Subtype.longPowerWithSubscripts)
110                                 .setMessage(message));
111                         }
112                     }
113                 }
114             }
115             // fall through
116             case prefix:
117                 idType = parts.getAttributeValue(-2, "type");
118                 for (String shortUnitId : pathType.sampleComposedShortUnitIds.get(idType)) {
119                     final UnitId unitId = unitConverter.createUnitId(shortUnitId);
120                     final String width = parts.getAttributeValue(-3, "type");
121                     String count = parts.getAttributeValue(-1, "count");
122                     String caseVariant = parts.getAttributeValue(-1, "case");
123                     final CLDRFile cldrFile = getCldrFileToCheck();
124                     String explicitPattern = UnitPathType.unit.getTrans(cldrFile, width, shortUnitId, count, caseVariant, null, null);
125                     if (explicitPattern != null) {
126                         String composedPattern = unitId.toString(cldrFile, width, count, caseVariant, null, false);
127                         if (composedPattern != null && !explicitPattern.equals(composedPattern)) {
128                             unitId.toString(cldrFile, width, count, caseVariant, null, false); // for debugging
129                             result.add(new CheckStatus().setCause(this).setMainType(CheckStatus.warningType)
130                                 .setSubtype(Subtype.mismatchedUnitComponent)
131                                 .setMessage("Mismatched component: «{0}» produces «{1}», but the explicit translation is «{2}». See http://cldr.unicode.org/translation/units-1/units#TOC-Compound-Units", value, composedPattern, explicitPattern));
132                         }
133                     }
134                 }
135                 break;
136             default:
137                 break;
138             }
139         }
140 
141         if (pathType == UnitPathType.duration) {
142             XPathParts xpp = parts;
143             String durationUnitType = xpp.findAttributeValue("durationUnit", "type");
144             boolean hasHourSymbol = HOUR_SYMBOL.matcher(value).find();
145             boolean hasMinuteSymbol = MINUTE_SYMBOL.matcher(value).find();
146             boolean hasSecondsSymbol = SECONDS_SYMBOL.matcher(value).find();
147 
148             if (durationUnitType.contains("h") && !hasHourSymbol) {
149                 /* Changed message from "The hour symbol (h or hh) is missing"
150                  *  to "The hour indicator should be either h or hh for duration"
151                  *  per http://unicode.org/cldr/trac/ticket/10999
152                  */
153                 result.add(new CheckStatus().setCause(this)
154                     .setMainType(CheckStatus.errorType)
155                     .setSubtype(Subtype.invalidDurationUnitPattern)
156                     .setMessage("The hour indicator should be either h or hh for duration."));
157             } else if (durationUnitType.contains("m") && !hasMinuteSymbol) {
158                 result.add(new CheckStatus().setCause(this)
159                     .setMainType(CheckStatus.errorType)
160                     .setSubtype(Subtype.invalidDurationUnitPattern)
161                     .setMessage("The minutes symbol (m or mm) is missing."));
162             } else if (durationUnitType.contains("s") && !hasSecondsSymbol) {
163                 result.add(new CheckStatus().setCause(this)
164                     .setMainType(CheckStatus.errorType)
165                     .setSubtype(Subtype.invalidDurationUnitPattern)
166                     .setMessage("The seconds symbol (ss) is missing."));
167             }
168         }
169         return this;
170     }
171 }
172