1 package org.unicode.cldr.util;
2 
3 import java.util.Collections;
4 import java.util.EnumSet;
5 import java.util.Set;
6 
7 import com.ibm.icu.impl.PatternTokenizer;
8 import com.ibm.icu.text.DateTimePatternGenerator.FormatParser;
9 import com.ibm.icu.text.UnicodeSet;
10 
11 public class DateTimeCanonicalizer {
12 
13     public enum DateTimePatternType {
14         NA, STOCK, AVAILABLE, INTERVAL, GMT;
15 
16         public static final Set<DateTimePatternType> STOCK_AVAILABLE_INTERVAL_PATTERNS = Collections
17             .unmodifiableSet(EnumSet.of(DateTimePatternType.STOCK, DateTimePatternType.AVAILABLE,
18                 DateTimePatternType.INTERVAL));
19 
fromPath(String path)20         public static DateTimePatternType fromPath(String path) {
21             return !path.contains("/dates") ? DateTimePatternType.NA
22                 : path.contains("/pattern") && (path.contains("/dateFormats") || path.contains("/timeFormats") || path.contains("/dateTimeFormatLength"))
23                     ? DateTimePatternType.STOCK
24                     : path.contains("/dateFormatItem") ? DateTimePatternType.AVAILABLE
25                         : path.contains("/intervalFormatItem") ? DateTimePatternType.INTERVAL
26                             : path.contains("/timeZoneNames/hourFormat") ? DateTimePatternType.GMT
27                                 : DateTimePatternType.NA;
28         }
29     }
30 
31     private boolean fixYears = false; // true to fix the years to y
32 
33     private FormatParser formatDateParser = new FormatParser();
34 
35     // TODO make ICU's FormatParser.PatternTokenizer public (and clean up API)
36 
37     private transient PatternTokenizer tokenizer = new PatternTokenizer()
38         .setSyntaxCharacters(new UnicodeSet("[a-zA-Z]"))
39         .setExtraQuotingCharacters(new UnicodeSet("[[[:script=Latn:][:script=Cyrl:]]&[[:L:][:M:]]]"))
40         // .setEscapeCharacters(new UnicodeSet("[^\\u0020-\\u007E]")) // WARNING: DateFormat doesn't accept \\uXXXX
41         .setUsingQuote(true);
42 
DateTimeCanonicalizer(boolean fixYears)43     public DateTimeCanonicalizer(boolean fixYears) {
44         this.fixYears = fixYears;
45     }
46 
getCanonicalDatePattern(String path, String value, DateTimePatternType datetimePatternType)47     public String getCanonicalDatePattern(String path, String value, DateTimePatternType datetimePatternType) {
48         formatDateParser.set(value);
49 
50         // ensure that all y fields are single y, except for the stock short, which can be y or yy.
51         String newValue;
52         if (fixYears) {
53             StringBuilder result = new StringBuilder();
54             for (Object item : formatDateParser.getItems()) {
55                 String itemString = item.toString();
56                 if (item instanceof String) {
57                     result.append(tokenizer.quoteLiteral(itemString));
58                 } else if (!itemString.startsWith("y")
59                     || (datetimePatternType == DateTimePatternType.STOCK
60                         && path.contains("short")
61                         && itemString.equals("yy"))) {
62                     result.append(itemString);
63                 } else {
64                     result.append('y');
65                 }
66             }
67             newValue = result.toString();
68         } else {
69             newValue = formatDateParser.toString();
70         }
71 
72         if (!value.equals(newValue)) {
73             value = newValue;
74         }
75         return value;
76     }
77 }