1 package org.unicode.cldr.test;
2 
3 import java.util.LinkedHashSet;
4 import java.util.List;
5 import java.util.Set;
6 import java.util.regex.Pattern;
7 
8 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
9 import org.unicode.cldr.util.ApproximateWidth;
10 import org.unicode.cldr.util.CLDRFile;
11 import org.unicode.cldr.util.Level;
12 import org.unicode.cldr.util.PatternCache;
13 import org.unicode.cldr.util.RegexLookup;
14 import org.unicode.cldr.util.SupplementalDataInfo;
15 
16 public class CheckWidths extends CheckCLDR {
17     // remember to add this class to the list in CheckCLDR.getCheckAll
18     // to run just this test, on just locales starting with 'nl', use CheckCLDR with -fnl.* -t.*CheckWidths.*
19     private static CoverageLevel2 coverageLevel;
20     private Level requiredLevel;
21 
22     public static final int MAX_COMPONENTS_PER_ANNOTATION = 16;
23 
24     SupplementalDataInfo supplementalData;
25 
26     private static final double EM = ApproximateWidth.getWidth("月");
27 
28     private static final boolean DEBUG = true;
29 
30     private enum Measure {
31         CODE_POINTS, DISPLAY_WIDTH, SET_ELEMENTS
32     }
33 
34     private enum LimitType {
35         MINIMUM, MAXIMUM
36     }
37 
38     private enum Special {
39         NONE, QUOTES, PLACEHOLDERS, NUMBERSYMBOLS, NUMBERFORMAT, BARS
40     }
41 
42     private static final Pattern PLACEHOLDER_PATTERN = PatternCache.get("\\{\\d\\}");
43 
44     private static class Limit {
45         final double warningReference;
46         final double errorReference;
47         final LimitType limit;
48         final Measure measure;
49         final Special special;
50         final String message;
51         final Subtype subtype;
52         final boolean debug;
53 
Limit(double warningReference, double errorReference, Measure measure, LimitType limit, Special special, boolean debug)54         public Limit(double warningReference, double errorReference, Measure measure, LimitType limit, Special special, boolean debug) {
55             this.debug = debug;
56             this.warningReference = warningReference;
57             this.errorReference = errorReference;
58             this.limit = limit;
59             this.measure = measure;
60             this.special = special;
61             switch (limit) {
62             case MINIMUM:
63                 this.subtype = Subtype.valueTooNarrow;
64                 switch (measure) {
65                 case CODE_POINTS:
66                     this.message = "Expected no fewer than {0} character(s), but was {1}.";
67                     break;
68                 case DISPLAY_WIDTH:
69                     this.message = "Too narrow by about {2}% (with common fonts).";
70                     break;
71                 default:
72                     throw new IllegalArgumentException();
73                 }
74                 break;
75             case MAXIMUM:
76                 switch (measure) {
77                 case CODE_POINTS:
78                     this.message = "Expected no more than {0} character(s), but was {1}.";
79                     this.subtype = Subtype.valueTooWide;
80                     break;
81                 case DISPLAY_WIDTH:
82                     this.message = "Too wide by about {2}% (with common fonts).";
83                     this.subtype = Subtype.valueTooWide;
84                     break;
85                 case SET_ELEMENTS:
86                     this.message = "Expected no more than {0} items(s), but was {1}.";
87                     this.subtype = Subtype.tooManyValues;
88                     break;
89                 default:
90                     throw new IllegalArgumentException();
91                 }
92                 break;
93             default:
94                 throw new IllegalArgumentException();
95             }
96         }
97 
Limit(double d, double e, Measure displayWidth, LimitType maximum, Special placeholders)98         public Limit(double d, double e, Measure displayWidth, LimitType maximum, Special placeholders) {
99             this(d, e, displayWidth, maximum, placeholders, false);
100         }
101 
hasProblem(String value, List<CheckStatus> result, CheckCLDR cause, Boolean aliasedAndComprenehsive)102         boolean hasProblem(String value, List<CheckStatus> result, CheckCLDR cause, Boolean aliasedAndComprenehsive) {
103             switch (special) {
104             case NUMBERFORMAT:
105                 String[] values = value.split(";", 2);
106                 // If it's a number format with positive and negative subpatterns, just check the longer one.
107                 value = (values.length == 2 && values[1].length() > values[0].length()) ? values[1] : values[0];
108                 value = value.replace("'", "");
109                 break;
110             case QUOTES:
111                 value = value.replace("'", "");
112                 break;
113             case PLACEHOLDERS:
114                 value = PLACEHOLDER_PATTERN.matcher(value).replaceAll("");
115                 break;
116             case NUMBERSYMBOLS:
117                 value = value.replaceAll("[\u200E\u200F\u061C]", ""); // don't include LRM/RLM/ALM when checking length of number symbols
118                 break;
119             case BARS:
120                 value = value.replaceAll("[^|]", "")+"|"; // Check the number of items by counting separators. Bit of a hack...
121                 break;
122             default:
123             }
124             double valueMeasure = measure == Measure.DISPLAY_WIDTH ? ApproximateWidth.getWidth(value)
125                 : value.codePointCount(0, value.length()) ;
126             CheckStatus.Type errorType = CheckStatus.warningType;
127             switch (limit) {
128             case MINIMUM:
129                 if (valueMeasure >= warningReference) {
130                     return false;
131                 }
132                 if (valueMeasure < errorReference && cause.getPhase() != Phase.BUILD && !aliasedAndComprenehsive) {
133                     errorType = CheckStatus.errorType;
134                 }
135                 break;
136             case MAXIMUM:
137                 if (valueMeasure <= warningReference) {
138                     return false;
139                 }
140                 if (valueMeasure > errorReference && cause.getPhase() != Phase.BUILD && !aliasedAndComprenehsive) {
141                     // Workaround for ST submission phase only per TC discussion 2018-05-30
142                     // Make too many keywords be only a warning until we decide policy (JCE)
143                     if (cause.getPhase() == Phase.SUBMISSION && measure.equals(Measure.SET_ELEMENTS)) {
144                         errorType = CheckStatus.warningType;
145                     } else {
146                         errorType = CheckStatus.errorType;
147                     }
148                 }
149                 break;
150             }
151             // the 115 is so that we don't show small percentages
152             // the /10 ...*10 is to round to multiples of 10% percent
153             double percent = (int) (Math.abs(115 * valueMeasure / warningReference - 100.0d) / 10 + 0.49999d) * 10;
154             result.add(new CheckStatus().setCause(cause)
155                 .setMainType(errorType)
156                 .setSubtype(subtype)
157                 .setMessage(message, warningReference, valueMeasure, percent));
158             return true;
159         }
160     }
161 
162     // WARNING: errors must occur before warnings!!
163     // we allow unusual units and English units to be a little longer
164     static final String ALLOW_LONGER = "(area-acre" +
165         "|area-square-foot" +
166         "|area-square-mile" +
167         "|length-foot" +
168         "|length-inch" +
169         "|length-mile" +
170         "|length-light-year" +
171         "|length-yard" +
172         "|mass-ounce" +
173         "|mass-pound" +
174         "|power-horsepower" +
175         "|pressure-inch-hg" +
176         "|pressure-millimeter-of-mercury" +
177         "|speed-mile-per-hour" +
178         "|temperature-fahrenheit" +
179         "|volume-cubic-mile" +
180         "|acceleration-g-force" +
181         "|speed-kilometer-per-hour" +
182         "|speed-meter-per-second" +
183         "|pressure-pound-per-square-inch" +
184         ")";
185 
186     static final String ALLOW_LONGEST = "consumption-liter-per-100kilometers";
187 
188     static RegexLookup<Limit[]> lookup = new RegexLookup<Limit[]>()
189         .setPatternTransform(RegexLookup.RegexFinderTransformPath)
190         .addVariable("%A", "\"[^\"]+\"")
191         .addVariable("%P", "\"[ap]m\"")
192         .addVariable("%Q", "[^ap].*|[ap][^m].*") // Anything but am or pm
193         .add("//ldml/delimiters/(quotation|alternateQuotation)", new Limit[] {
194             new Limit(1, 1, Measure.CODE_POINTS, LimitType.MAXIMUM, Special.NONE)
195         })
196 
197         // Numeric items should be no more than a single character
198 
199         .add("//ldml/numbers/symbols[@numberSystem=%A]/(decimal|group|minus|percent|perMille|plus)", new Limit[] {
200             new Limit(1, 1, Measure.CODE_POINTS, LimitType.MAXIMUM, Special.NUMBERSYMBOLS)
201         })
202 
203         // Now widths
204         // The following are rough measures, just to check strange cases
205 
206         .add("//ldml/characters/ellipsis[@type=\"(final|initial|medial)\"]", new Limit[] {
207             new Limit(2 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
208         })
209 
210         .add("//ldml/localeDisplayNames/localeDisplayPattern/", new Limit[] { // {0}: {1}, {0} ({1}), ,
211             new Limit(2 * EM, 3 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
212         })
213 
214         .add("//ldml/listPatterns/listPattern/listPatternPart[@type=%A]", new Limit[] { // {0} and {1}
215             new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
216         })
217 
218         .add("//ldml/dates/timeZoneNames/fallbackFormat", new Limit[] { // {1} ({0})
219             new Limit(2 * EM, 3 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
220         })
221 
222         .add("//ldml/dates/timeZoneNames/(regionFormat|hourFormat)", new Limit[] { // {0} Time,
223             // +HH:mm;-HH:mm
224             new Limit(10 * EM, 20 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
225         })
226 
227         .add("//ldml/dates/timeZoneNames/(gmtFormat|gmtZeroFormat)", new Limit[] { // GMT{0}, GMT
228             new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
229         })
230 
231         // Era Abbreviations
232 
233         // Allow longer for Japanese calendar eras
234         .add("//ldml/dates/calendars/calendar[@type=\"japanese\"]/.*/eraAbbr/era[@type=%A]", new Limit[] {
235             new Limit(12 * EM, 16 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
236         })
237         // Allow longer for ROC calendar eras
238         .add("//ldml/dates/calendars/calendar[@type=\"roc\"]/.*/eraAbbr/era[@type=%A]", new Limit[] {
239             new Limit(4 * EM, 8 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
240         })
241         .add("//ldml/dates/calendars/calendar.*/eraAbbr/era[@type=%A]", new Limit[] {
242             new Limit(3 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
243         })
244 
245         // am/pm abbreviated
246         .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=%P]", new Limit[] {
247             new Limit(4 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
248         })
249         // other day periods abbreviated
250         .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=%Q]", new Limit[] {
251             new Limit(8 * EM, 12 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
252         })
253         // am/pm wide
254         .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=%P]", new Limit[] {
255             new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
256         })
257         // other day periods wide
258         .add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=%Q]", new Limit[] {
259             new Limit(10 * EM, 20 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
260         })
261 
262         // Narrow items
263 
264         .add("//ldml/dates/calendars/calendar.*[@type=\"narrow\"](?!/cyclic|/dayPeriod|/monthPattern)", new Limit[] {
265             new Limit(1.5 * EM, 2.25 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
266         })
267         // \"(?!am|pm)[^\"]+\"\\
268 
269         // Compact number formats
270 
271         .add("//ldml/numbers/decimalFormats[@numberSystem=%A]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=%A]/pattern[@type=\"1",
272             new Limit[] {
273                 new Limit(4 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NUMBERFORMAT)
274         })
275         // Catch -future/past Narrow units  and allow much wider values
276         .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"[^\"]+-(future|past)\"]/unitPattern", new Limit[] {
277             new Limit(10 * EM, 15 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
278         })
279         // Catch widest units and allow a bit wider
280         .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"" + ALLOW_LONGEST + "\"]/unitPattern", new Limit[] {
281             new Limit(5 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
282         })
283         // Catch special units and allow a bit wider
284         .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"" + ALLOW_LONGER + "\"]/unitPattern", new Limit[] {
285             new Limit(4 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
286         })
287         // Narrow units
288         .add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=%A]/unitPattern", new Limit[] {
289             new Limit(3 * EM, 4 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
290         })
291         // Short units
292         .add("//ldml/units/unitLength[@type=\"short\"]/unit[@type=%A]/unitPattern", new Limit[] {
293             new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
294         })
295 
296         // Currency Symbols
297         .add("//ldml/numbers/currencies/currency[@type=%A]/symbol", new Limit[] {
298             new Limit(3 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
299         })
300 
301         // "grinning cat face with smiling eyes" should be normal max ~= 160 em
302         // emoji names (not keywords)
303         .add("//ldml/annotations/annotation[@cp=%A][@type=%A]", new Limit[] {
304             new Limit(20 * EM, 100 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE),
305         })
306         .add("//ldml/annotations/annotation[@cp=%A]", new Limit[] {
307             new Limit(5, MAX_COMPONENTS_PER_ANNOTATION, Measure.SET_ELEMENTS, LimitType.MAXIMUM, Special.BARS) // Allow up to 5 with no warning, up to 7 with no error.
308         })
309         ;
310 
311     static {
312         System.out.println("EMs: " + ApproximateWidth.getWidth("grinning cat face with smiling eyes"));
313     }
314 
315     Set<Limit> found = new LinkedHashSet<Limit>();
316 
317     @Override
handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result)318     public CheckCLDR handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result) {
319         if (value == null) {
320             return this; // skip
321         }
322         //        String testPrefix = "//ldml/units/unitLength[@type=\"narrow\"]";
323         //        if (path.startsWith(testPrefix)) {
324         //            int i = 0;
325         //        }
326         // Limits item0 =
327         // lookup.get("//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000000000\"][@count=\"other\"]");
328         // item0.check("123456789", result, this);
329 
330         Limit[] items = lookup.get(path);
331         CLDRFile.Status status = new CLDRFile.Status();
332         this.getCldrFileToCheck().getSourceLocaleID(path, status);
333         // This was put in specifically to deal with the fact that we added a bunch of new units in CLDR 26
334         // and didn't put the narrow forms of them into modern coverage.  If/when the narrow forms of all units
335         // are modern coverage, then we can safely remove the aliasedAndComprehensive check.  Right now if an
336         // item is aliased and coverage is comprehensive, then it can't generate anything worse than a warning.
337         Boolean aliasedAndComprenehsive = (coverageLevel.getLevel(path).compareTo(Level.COMPREHENSIVE) == 0)
338             && (status.pathWhereFound.compareTo(path) != 0);
339         if (items != null) {
340             for (Limit item : items) {
341                 if (item.hasProblem(value, result, this, aliasedAndComprenehsive)) {
342                     if (DEBUG && !found.contains(item)) {
343                         found.add(item);
344                     }
345                     break; // only one error per item
346                 }
347             }
348         }
349         return this;
350     }
351 
setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)352     public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options,
353         List<CheckStatus> possibleErrors) {
354         final String localeID = cldrFileToCheck.getLocaleID();
355         supplementalData = SupplementalDataInfo.getInstance(cldrFileToCheck.getSupplementalDirectory());
356         coverageLevel = CoverageLevel2.getInstance(supplementalData, localeID);
357 
358         super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors);
359         return this;
360     }
361 }
362