1 /* 2 ********************************************************************** 3 * Copyright (c) 2002-2013, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * Author: John Emmons 7 ********************************************************************** 8 */ 9 package org.unicode.cldr.posix; 10 11 import java.text.StringCharacterIterator; 12 import java.util.HashMap; 13 import java.util.Iterator; 14 import java.util.Map; 15 16 import org.unicode.cldr.util.CLDRFile; 17 18 import com.ibm.icu.lang.UCharacter; 19 import com.ibm.icu.text.UTF16; 20 import com.ibm.icu.text.UnicodeSet; 21 22 public class POSIXUtilities { 23 24 private static UnicodeSet repertoire = new UnicodeSet(0x0000, 0x10FFFF); 25 private static CLDRFile char_fallbk; 26 private static Map<Integer, String> controlCodeNames = new HashMap<Integer, String>(); 27 28 // Since UCharacter.getExtendedName() in ICU doesn't provide the names for control characters 29 // we have to force the issue here. Required elements for the POSIX portable character set will be 30 // used when necessary (in lower case). Otherwise, the name from the Unicode data file is used. initControlCodeNames()31 private static void initControlCodeNames() { 32 controlCodeNames.put(0x0000, "NULL"); 33 controlCodeNames.put(0x0001, "START_OF_HEADING"); 34 controlCodeNames.put(0x0002, "START_OF_TEXT"); 35 controlCodeNames.put(0x0003, "END_OF_TEXT"); 36 controlCodeNames.put(0x0004, "END_OF_TRANSMISSION"); 37 controlCodeNames.put(0x0005, "ENQUIRY"); 38 controlCodeNames.put(0x0006, "ACKNOWLEDGE"); 39 controlCodeNames.put(0x0007, "ALERT"); 40 controlCodeNames.put(0x0008, "BACKSPACE"); 41 controlCodeNames.put(0x0009, "tab"); // Required element for POSIX portable character set 42 controlCodeNames.put(0x000A, "newline"); // Required element for POSIX portable character set 43 controlCodeNames.put(0x000B, "vertical-tab"); // Required element for POSIX portable character set 44 controlCodeNames.put(0x000C, "form-feed"); // Required element for POSIX portable character set 45 controlCodeNames.put(0x000D, "carriage-return"); // Required element for POSIX portable character set 46 controlCodeNames.put(0x000E, "SHIFT_OUT"); 47 controlCodeNames.put(0x000F, "SHIFT_IN"); 48 controlCodeNames.put(0x0010, "DATA_LINK_ESCAPE"); 49 controlCodeNames.put(0x0011, "DEVICE_CONTROL_ONE"); 50 controlCodeNames.put(0x0012, "DEVICE_CONTROL_TWO"); 51 controlCodeNames.put(0x0013, "DEVICE_CONTROL_THREE"); 52 controlCodeNames.put(0x0014, "DEVICE_CONTROL_FOUR"); 53 controlCodeNames.put(0x0015, "NEGATIVE_ACKNOWLEDGE"); 54 controlCodeNames.put(0x0016, "SYNCHRONOUS_IDLE"); 55 controlCodeNames.put(0x0017, "END_OF_TRANSMISSION_BLOCK"); 56 controlCodeNames.put(0x0018, "CANCEL"); 57 controlCodeNames.put(0x0019, "END_OF_MEDIUM"); 58 controlCodeNames.put(0x001A, "SUBSTITUTE"); 59 controlCodeNames.put(0x001B, "ESCAPE"); 60 controlCodeNames.put(0x001C, "INFORMATION_SEPARATOR_FOUR"); 61 controlCodeNames.put(0x001D, "INFORMATION_SEPARATOR_THREE"); 62 controlCodeNames.put(0x001E, "INFORMATION_SEPARATOR_TWO"); 63 controlCodeNames.put(0x001F, "INFORMATION_SEPARATOR_ONE"); 64 controlCodeNames.put(0x007F, "DELETE"); 65 controlCodeNames.put(0x0080, "CONTROL-0080"); 66 controlCodeNames.put(0x0081, "CONTROL-0081"); 67 controlCodeNames.put(0x0082, "BREAK_PERMITTED_HERE"); 68 controlCodeNames.put(0x0083, "NO_BREAK_HERE"); 69 controlCodeNames.put(0x0084, "CONTROL-0084"); 70 controlCodeNames.put(0x0085, "NEXT_LINE"); 71 controlCodeNames.put(0x0086, "START_OF_SELECTED_AREA"); 72 controlCodeNames.put(0x0087, "END_OF_SELECTED_AREA"); 73 controlCodeNames.put(0x0088, "CHARACTER_TABULATION_SET"); 74 controlCodeNames.put(0x0089, "CHARACTER_TABULATION_WITH_JUSTIFICATION"); 75 controlCodeNames.put(0x008A, "LINE_TABULATION_SET"); 76 controlCodeNames.put(0x008B, "PARTIAL_LINE_FORWARD"); 77 controlCodeNames.put(0x008C, "PARTIAL_LINE_BACKWARD"); 78 controlCodeNames.put(0x008D, "REVERSE_LINE_FEED"); 79 controlCodeNames.put(0x008E, "SINGLE_SHIFT_TWO"); 80 controlCodeNames.put(0x008F, "SINGLE_SHIFT_THREE"); 81 controlCodeNames.put(0x0090, "DEVICE_CONTROL_STRING"); 82 controlCodeNames.put(0x0091, "PRIVATE_USE_ONE"); 83 controlCodeNames.put(0x0092, "PRIVATE_USE_TWO"); 84 controlCodeNames.put(0x0093, "SET_TRANSMIT_STATE"); 85 controlCodeNames.put(0x0094, "CANCEL_CHARACTER"); 86 controlCodeNames.put(0x0095, "MESSAGE_WAITING"); 87 controlCodeNames.put(0x0096, "START_OF_GUARDED_AREA"); 88 controlCodeNames.put(0x0097, "END_OF_GUARDED_AREA"); 89 controlCodeNames.put(0x0098, "START_OF_STRING"); 90 controlCodeNames.put(0x0099, "CONTROL-0099"); 91 controlCodeNames.put(0x009A, "SINGLE_CHARACTER_INTRODUCER"); 92 controlCodeNames.put(0x009B, "CONTROL_SEQUENCE_INTRODUCER"); 93 controlCodeNames.put(0x009C, "STRING_TERMINATOR"); 94 controlCodeNames.put(0x009D, "OPERATING_SYSTEM_COMMAND"); 95 controlCodeNames.put(0x009E, "PRIVACY_MESSAGE"); 96 controlCodeNames.put(0x009F, "APPLICATION_PROGRAM_COMMAND"); 97 } 98 setRepertoire(UnicodeSet rep)99 public static void setRepertoire(UnicodeSet rep) { 100 repertoire = rep; 101 } 102 setCharFallback(CLDRFile fallbk)103 public static void setCharFallback(CLDRFile fallbk) { 104 char_fallbk = fallbk; 105 } 106 POSIXContraction(String s)107 public static String POSIXContraction(String s) { 108 int cp; 109 StringBuffer result = new StringBuffer(); 110 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 111 cp = UTF16.charAt(s, i); 112 result.append(POSIXCharName(cp)); 113 } 114 return result.toString().replaceAll("><", "-"); 115 } 116 POSIXCharName(String s)117 public static String POSIXCharName(String s) { 118 int cp; 119 StringBuffer result = new StringBuffer(); 120 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 121 cp = UTF16.charAt(s, i); 122 result.append(POSIXCharName(cp)); 123 } 124 return result.toString(); 125 } 126 POSIXCharName(int cp)127 public static String POSIXCharName(int cp) { 128 129 StringBuffer result = new StringBuffer(); 130 result.append("<"); 131 if ((cp >= 0x0041 && cp <= 0x005A) || 132 (cp >= 0x0061 && cp <= 0x007A)) // Latin letters 133 result.append((char) cp); 134 else if (cp >= 0x0030 && cp <= 0x0039) // digits 135 { 136 String n = UCharacter.getExtendedName(cp); 137 result.append(n.replaceAll(" ", "_").replaceAll("DIGIT_", "").toLowerCase()); 138 } else if ((cp >= 0x0000 && cp <= 0x001F) || (cp >= 0x007F && cp <= 0x009F)) { // Controls 139 if (controlCodeNames.isEmpty()) { 140 initControlCodeNames(); 141 } 142 result.append(controlCodeNames.get(cp)); 143 } else if (cp == 0x0020) 144 result.append("space"); // Required elements for POSIX portable character set 145 else // everything else 146 { 147 String n = UCharacter.getExtendedName(cp); 148 result.append(n.replaceAll(" ", "_").replaceAll("<", "").replaceAll(">", "").toUpperCase()); 149 } 150 151 int i = result.indexOf("_("); 152 if (i >= 0) 153 result.setLength(i); 154 155 result.append(">"); 156 157 if (!repertoire.contains(cp)) { 158 System.out.println("WARNING: character " + result.toString() + " is not in the target codeset."); 159 160 String substituteString = ""; 161 boolean SubFound = false; 162 String SearchLocation = "//supplementalData/characters/character-fallback/character[@value=\"" 163 + UCharacter.toString(cp) + "\"]/substitute"; 164 165 for (Iterator<String> it = char_fallbk.iterator(SearchLocation, char_fallbk.getComparator()); it.hasNext() 166 && !SubFound;) { 167 String path = it.next(); 168 substituteString = char_fallbk.getStringValue(path); 169 if (repertoire.containsAll(substituteString)) 170 SubFound = true; 171 } 172 173 if (SubFound) { 174 System.out.println(" Substituted: " + POSIXUtilities.POSIXCharName(substituteString)); 175 result = new StringBuffer(POSIXUtilities.POSIXCharName(substituteString)); 176 } else 177 System.out.println(" No acceptable substitute found. The resulting locale source may not compile."); 178 } 179 180 return result.toString(); 181 } 182 POSIXCharFullName(String s)183 public static String POSIXCharFullName(String s) { 184 int cp; 185 StringBuffer result = new StringBuffer(); 186 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 187 cp = UTF16.charAt(s, i); 188 result.append(POSIXCharFullName(cp)); 189 } 190 return result.toString(); 191 } 192 POSIXCharFullName(int cp)193 public static String POSIXCharFullName(int cp) { 194 StringBuffer result = new StringBuffer(); 195 result.append("<"); 196 String n = UCharacter.getExtendedName(cp); 197 result.append(n.replaceAll(" ", "_").replaceAll("<", "").replaceAll(">", "").toUpperCase()); 198 199 int i = result.indexOf("_("); 200 if (i >= 0) 201 result.setLength(i); 202 203 result.append(">"); 204 205 return result.toString(); 206 } 207 208 // POSIXCharNameNP replaces all non-portable characters with their expanded POSIX character name. 209 POSIXCharNameNP(String s)210 public static String POSIXCharNameNP(String s) { 211 int cp; 212 StringBuffer result = new StringBuffer(); 213 for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { 214 cp = UTF16.charAt(s, i); 215 if (cp <= 0x007F) 216 result.append((char) cp); 217 else 218 result.append(POSIXCharName(cp)); 219 } 220 return result.toString(); 221 } 222 POSIXDateTimeFormat(String s, boolean UseAltDigits, POSIXVariant variant)223 public static String POSIXDateTimeFormat(String s, boolean UseAltDigits, POSIXVariant variant) { 224 225 // This is an array of the POSIX date / time field descriptors and their corresponding representations 226 // in LDML. We use these to replace the LDML fields with POSIX field descriptors. 227 228 String[][] FieldDescriptors = { 229 { "/d/", "<SOLIDUS>%d<SOLIDUS>", "<SOLIDUS>%d<SOLIDUS>", "<SOLIDUS>%d<SOLIDUS>" }, 230 { "/", "<SOLIDUS>", "<SOLIDUS>", "<SOLIDUS>" }, 231 { "DDD", "%j", "%j", "%j" }, 232 { "EEEE", "%A", "%A", "%A" }, 233 { "EEE", "%a", "%a", "%a" }, 234 { "G", "%N", "%N", "%N" }, 235 { "HH", "%H", "%OH", "%H" }, 236 { "H", "%H", "%OH", "%k" }, // solaris defines exact mapping for "H"" 237 { "KK", "%I", "%OI", "%I" }, 238 { "K", "%I", "%OI", "%l" }, 239 { "MMMM", "%B", "%B", "%B" }, 240 { "MMM", "%b", "%b", "%b" }, 241 { "MM", "%m", "%Om", "%m" }, 242 { "M", "%m", "%Om", "%m" }, 243 { "VVVV", "%Z", "%Z", "%Z" }, 244 { "V", "%Z", "%Z", "%Z" }, 245 { "a", "%p", "%p", "%p" }, 246 { "dd", "%d", "%Od", "%d" }, 247 { "d", "%e", "%Oe", "%e" }, 248 { "hh", "%I", "%OI", "%I" }, 249 { "h", "%I", "%OI", "%l" }, // solaris defines exact mapping for "h" 250 { "kk", "%H", "%OH", "%H" }, 251 { "k", "%H", "%OH", "%k" }, 252 { "mm", "%M", "%OM", "%M" }, 253 { "m", "%M", "%OM", "%M" }, 254 { "vvvv", "%Z", "%Z", "%Z" }, 255 { "v", "%Z", "%Z", "%Z" }, 256 { "yyyy", "%Y", "%Oy", "%Y" }, 257 { "yy", "%y", "%Oy", "%y" }, 258 { "y", "%Y", "%Oy", "%Y" }, 259 { "zzzz", "%Z", "%Z", "%Z" }, 260 { "zzz", "%Z", "%Z", "%Z" }, 261 { "zz", "%Z", "%Z", "%Z" }, 262 { "z", "%Z", "%Z", "%Z" }, 263 { "ss", "%S", "%OS", "%S" }, 264 { "s", "%S", "%OS", "%S" } 265 }; 266 267 boolean inquotes = false; 268 StringBuffer result = new StringBuffer(""); 269 270 for (int pos = 0; pos < s.length();) { 271 boolean replaced = false; 272 for (int i = 0; i < FieldDescriptors.length && !replaced && !inquotes; i++) { 273 if (s.indexOf(FieldDescriptors[i][0], pos) == pos) { 274 if (UseAltDigits) 275 result.append(FieldDescriptors[i][2]); 276 else if (variant.platform.equals(POSIXVariant.SOLARIS)) 277 result.append(FieldDescriptors[i][3]); 278 else 279 result.append(FieldDescriptors[i][1]); 280 replaced = true; 281 pos += FieldDescriptors[i][0].length(); 282 } 283 } 284 285 if (!replaced) { 286 if (s.charAt(pos) == '\'') { 287 if (pos < (s.length() - 1) && s.charAt(pos + 1) == '\'') { 288 result.append('\''); 289 pos++; 290 } else 291 inquotes = !inquotes; 292 } else 293 result.append(s.charAt(pos)); 294 pos++; 295 } 296 } 297 return result.toString(); 298 299 } 300 POSIXGrouping(String grouping_pattern)301 public static String POSIXGrouping(String grouping_pattern) { 302 303 // Parse the decimal pattern to get the number of digits to use in the POSIX style pattern. 304 305 int i = grouping_pattern.indexOf("."); 306 int j; 307 boolean first_grouping = true; 308 String result; 309 310 if (i < 0) 311 result = "-1"; 312 else { 313 result = new String(); 314 while ((j = grouping_pattern.lastIndexOf(",", i - 1)) > 0) { 315 if (!first_grouping) 316 result = result.concat(";"); 317 Integer num_digits = new Integer(i - j - 1); 318 result = result.concat(num_digits.toString()); 319 320 first_grouping = false; 321 i = j; 322 } 323 } 324 325 if (result.length() == 0) 326 result = "-1"; 327 328 return result; 329 330 } 331 isBetween(int a, int b, int c)332 public static boolean isBetween(int a, int b, int c) { 333 return ((a < b && b < c) || (c < b && b < a)); 334 } 335 POSIXYesNoExpr(String s)336 public static String POSIXYesNoExpr(String s) { 337 StringBuffer result = new StringBuffer(); 338 String[] YesNoElements; 339 YesNoElements = s.split(":"); 340 for (int i = 0; i < YesNoElements.length; i++) { 341 String cur = YesNoElements[i]; 342 if (cur.length() >= 1 && cur.toLowerCase().equals(cur)) { 343 if (result.length() > 0) 344 result.append(")|("); 345 else 346 result.append("^(("); 347 348 StringCharacterIterator si = new StringCharacterIterator(cur); 349 boolean OptLastChars = false; 350 for (char c = si.first(); c != StringCharacterIterator.DONE; c = si.next()) { 351 if (c != Character.toUpperCase(c)) { 352 if (si.getIndex() == 1) { 353 result.append("("); 354 OptLastChars = true; 355 } 356 result.append("["); 357 result.append(c); 358 result.append(Character.toUpperCase(c)); 359 result.append("]"); 360 } else 361 result.append(c); 362 } 363 if (OptLastChars) 364 result.append(")?"); 365 } 366 } 367 result.append("))"); 368 return (POSIXCharNameNP(result.toString())); 369 } 370 } 371