1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2013, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: John Emmons
7  **********************************************************************
8  */
9 package org.unicode.cldr.posix;
10 
11 import java.text.StringCharacterIterator;
12 import java.util.HashMap;
13 import java.util.Iterator;
14 import java.util.Map;
15 
16 import org.unicode.cldr.util.CLDRFile;
17 
18 import com.ibm.icu.lang.UCharacter;
19 import com.ibm.icu.text.UTF16;
20 import com.ibm.icu.text.UnicodeSet;
21 
22 public class POSIXUtilities {
23 
24     private static UnicodeSet repertoire = new UnicodeSet(0x0000, 0x10FFFF);
25     private static CLDRFile char_fallbk;
26     private static Map<Integer, String> controlCodeNames = new HashMap<Integer, String>();
27 
28     // Since UCharacter.getExtendedName() in ICU doesn't provide the names for control characters
29     // we have to force the issue here. Required elements for the POSIX portable character set will be
30     // used when necessary (in lower case). Otherwise, the name from the Unicode data file is used.
initControlCodeNames()31     private static void initControlCodeNames() {
32         controlCodeNames.put(0x0000, "NULL");
33         controlCodeNames.put(0x0001, "START_OF_HEADING");
34         controlCodeNames.put(0x0002, "START_OF_TEXT");
35         controlCodeNames.put(0x0003, "END_OF_TEXT");
36         controlCodeNames.put(0x0004, "END_OF_TRANSMISSION");
37         controlCodeNames.put(0x0005, "ENQUIRY");
38         controlCodeNames.put(0x0006, "ACKNOWLEDGE");
39         controlCodeNames.put(0x0007, "ALERT");
40         controlCodeNames.put(0x0008, "BACKSPACE");
41         controlCodeNames.put(0x0009, "tab"); // Required element for POSIX portable character set
42         controlCodeNames.put(0x000A, "newline"); // Required element for POSIX portable character set
43         controlCodeNames.put(0x000B, "vertical-tab"); // Required element for POSIX portable character set
44         controlCodeNames.put(0x000C, "form-feed"); // Required element for POSIX portable character set
45         controlCodeNames.put(0x000D, "carriage-return"); // Required element for POSIX portable character set
46         controlCodeNames.put(0x000E, "SHIFT_OUT");
47         controlCodeNames.put(0x000F, "SHIFT_IN");
48         controlCodeNames.put(0x0010, "DATA_LINK_ESCAPE");
49         controlCodeNames.put(0x0011, "DEVICE_CONTROL_ONE");
50         controlCodeNames.put(0x0012, "DEVICE_CONTROL_TWO");
51         controlCodeNames.put(0x0013, "DEVICE_CONTROL_THREE");
52         controlCodeNames.put(0x0014, "DEVICE_CONTROL_FOUR");
53         controlCodeNames.put(0x0015, "NEGATIVE_ACKNOWLEDGE");
54         controlCodeNames.put(0x0016, "SYNCHRONOUS_IDLE");
55         controlCodeNames.put(0x0017, "END_OF_TRANSMISSION_BLOCK");
56         controlCodeNames.put(0x0018, "CANCEL");
57         controlCodeNames.put(0x0019, "END_OF_MEDIUM");
58         controlCodeNames.put(0x001A, "SUBSTITUTE");
59         controlCodeNames.put(0x001B, "ESCAPE");
60         controlCodeNames.put(0x001C, "INFORMATION_SEPARATOR_FOUR");
61         controlCodeNames.put(0x001D, "INFORMATION_SEPARATOR_THREE");
62         controlCodeNames.put(0x001E, "INFORMATION_SEPARATOR_TWO");
63         controlCodeNames.put(0x001F, "INFORMATION_SEPARATOR_ONE");
64         controlCodeNames.put(0x007F, "DELETE");
65         controlCodeNames.put(0x0080, "CONTROL-0080");
66         controlCodeNames.put(0x0081, "CONTROL-0081");
67         controlCodeNames.put(0x0082, "BREAK_PERMITTED_HERE");
68         controlCodeNames.put(0x0083, "NO_BREAK_HERE");
69         controlCodeNames.put(0x0084, "CONTROL-0084");
70         controlCodeNames.put(0x0085, "NEXT_LINE");
71         controlCodeNames.put(0x0086, "START_OF_SELECTED_AREA");
72         controlCodeNames.put(0x0087, "END_OF_SELECTED_AREA");
73         controlCodeNames.put(0x0088, "CHARACTER_TABULATION_SET");
74         controlCodeNames.put(0x0089, "CHARACTER_TABULATION_WITH_JUSTIFICATION");
75         controlCodeNames.put(0x008A, "LINE_TABULATION_SET");
76         controlCodeNames.put(0x008B, "PARTIAL_LINE_FORWARD");
77         controlCodeNames.put(0x008C, "PARTIAL_LINE_BACKWARD");
78         controlCodeNames.put(0x008D, "REVERSE_LINE_FEED");
79         controlCodeNames.put(0x008E, "SINGLE_SHIFT_TWO");
80         controlCodeNames.put(0x008F, "SINGLE_SHIFT_THREE");
81         controlCodeNames.put(0x0090, "DEVICE_CONTROL_STRING");
82         controlCodeNames.put(0x0091, "PRIVATE_USE_ONE");
83         controlCodeNames.put(0x0092, "PRIVATE_USE_TWO");
84         controlCodeNames.put(0x0093, "SET_TRANSMIT_STATE");
85         controlCodeNames.put(0x0094, "CANCEL_CHARACTER");
86         controlCodeNames.put(0x0095, "MESSAGE_WAITING");
87         controlCodeNames.put(0x0096, "START_OF_GUARDED_AREA");
88         controlCodeNames.put(0x0097, "END_OF_GUARDED_AREA");
89         controlCodeNames.put(0x0098, "START_OF_STRING");
90         controlCodeNames.put(0x0099, "CONTROL-0099");
91         controlCodeNames.put(0x009A, "SINGLE_CHARACTER_INTRODUCER");
92         controlCodeNames.put(0x009B, "CONTROL_SEQUENCE_INTRODUCER");
93         controlCodeNames.put(0x009C, "STRING_TERMINATOR");
94         controlCodeNames.put(0x009D, "OPERATING_SYSTEM_COMMAND");
95         controlCodeNames.put(0x009E, "PRIVACY_MESSAGE");
96         controlCodeNames.put(0x009F, "APPLICATION_PROGRAM_COMMAND");
97     }
98 
setRepertoire(UnicodeSet rep)99     public static void setRepertoire(UnicodeSet rep) {
100         repertoire = rep;
101     }
102 
setCharFallback(CLDRFile fallbk)103     public static void setCharFallback(CLDRFile fallbk) {
104         char_fallbk = fallbk;
105     }
106 
POSIXContraction(String s)107     public static String POSIXContraction(String s) {
108         int cp;
109         StringBuffer result = new StringBuffer();
110         for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
111             cp = UTF16.charAt(s, i);
112             result.append(POSIXCharName(cp));
113         }
114         return result.toString().replaceAll("><", "-");
115     }
116 
POSIXCharName(String s)117     public static String POSIXCharName(String s) {
118         int cp;
119         StringBuffer result = new StringBuffer();
120         for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
121             cp = UTF16.charAt(s, i);
122             result.append(POSIXCharName(cp));
123         }
124         return result.toString();
125     }
126 
POSIXCharName(int cp)127     public static String POSIXCharName(int cp) {
128 
129         StringBuffer result = new StringBuffer();
130         result.append("<");
131         if ((cp >= 0x0041 && cp <= 0x005A) ||
132             (cp >= 0x0061 && cp <= 0x007A)) // Latin letters
133             result.append((char) cp);
134         else if (cp >= 0x0030 && cp <= 0x0039) // digits
135         {
136             String n = UCharacter.getExtendedName(cp);
137             result.append(n.replaceAll(" ", "_").replaceAll("DIGIT_", "").toLowerCase());
138         } else if ((cp >= 0x0000 && cp <= 0x001F) || (cp >= 0x007F && cp <= 0x009F)) { // Controls
139             if (controlCodeNames.isEmpty()) {
140                 initControlCodeNames();
141             }
142             result.append(controlCodeNames.get(cp));
143         } else if (cp == 0x0020)
144             result.append("space"); // Required elements for POSIX portable character set
145         else // everything else
146         {
147             String n = UCharacter.getExtendedName(cp);
148             result.append(n.replaceAll(" ", "_").replaceAll("<", "").replaceAll(">", "").toUpperCase());
149         }
150 
151         int i = result.indexOf("_(");
152         if (i >= 0)
153             result.setLength(i);
154 
155         result.append(">");
156 
157         if (!repertoire.contains(cp)) {
158             System.out.println("WARNING: character " + result.toString() + " is not in the target codeset.");
159 
160             String substituteString = "";
161             boolean SubFound = false;
162             String SearchLocation = "//supplementalData/characters/character-fallback/character[@value=\""
163                 + UCharacter.toString(cp) + "\"]/substitute";
164 
165             for (Iterator<String> it = char_fallbk.iterator(SearchLocation, char_fallbk.getComparator()); it.hasNext()
166                 && !SubFound;) {
167                 String path = it.next();
168                 substituteString = char_fallbk.getStringValue(path);
169                 if (repertoire.containsAll(substituteString))
170                     SubFound = true;
171             }
172 
173             if (SubFound) {
174                 System.out.println("	Substituted: " + POSIXUtilities.POSIXCharName(substituteString));
175                 result = new StringBuffer(POSIXUtilities.POSIXCharName(substituteString));
176             } else
177                 System.out.println("	No acceptable substitute found. The resulting locale source may not compile.");
178         }
179 
180         return result.toString();
181     }
182 
POSIXCharFullName(String s)183     public static String POSIXCharFullName(String s) {
184         int cp;
185         StringBuffer result = new StringBuffer();
186         for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
187             cp = UTF16.charAt(s, i);
188             result.append(POSIXCharFullName(cp));
189         }
190         return result.toString();
191     }
192 
POSIXCharFullName(int cp)193     public static String POSIXCharFullName(int cp) {
194         StringBuffer result = new StringBuffer();
195         result.append("<");
196         String n = UCharacter.getExtendedName(cp);
197         result.append(n.replaceAll(" ", "_").replaceAll("<", "").replaceAll(">", "").toUpperCase());
198 
199         int i = result.indexOf("_(");
200         if (i >= 0)
201             result.setLength(i);
202 
203         result.append(">");
204 
205         return result.toString();
206     }
207 
208     // POSIXCharNameNP replaces all non-portable characters with their expanded POSIX character name.
209 
POSIXCharNameNP(String s)210     public static String POSIXCharNameNP(String s) {
211         int cp;
212         StringBuffer result = new StringBuffer();
213         for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
214             cp = UTF16.charAt(s, i);
215             if (cp <= 0x007F)
216                 result.append((char) cp);
217             else
218                 result.append(POSIXCharName(cp));
219         }
220         return result.toString();
221     }
222 
POSIXDateTimeFormat(String s, boolean UseAltDigits, POSIXVariant variant)223     public static String POSIXDateTimeFormat(String s, boolean UseAltDigits, POSIXVariant variant) {
224 
225         // This is an array of the POSIX date / time field descriptors and their corresponding representations
226         // in LDML. We use these to replace the LDML fields with POSIX field descriptors.
227 
228         String[][] FieldDescriptors = {
229             { "/d/", "<SOLIDUS>%d<SOLIDUS>", "<SOLIDUS>%d<SOLIDUS>", "<SOLIDUS>%d<SOLIDUS>" },
230             { "/", "<SOLIDUS>", "<SOLIDUS>", "<SOLIDUS>" },
231             { "DDD", "%j", "%j", "%j" },
232             { "EEEE", "%A", "%A", "%A" },
233             { "EEE", "%a", "%a", "%a" },
234             { "G", "%N", "%N", "%N" },
235             { "HH", "%H", "%OH", "%H" },
236             { "H", "%H", "%OH", "%k" }, // solaris defines exact mapping for "H""
237             { "KK", "%I", "%OI", "%I" },
238             { "K", "%I", "%OI", "%l" },
239             { "MMMM", "%B", "%B", "%B" },
240             { "MMM", "%b", "%b", "%b" },
241             { "MM", "%m", "%Om", "%m" },
242             { "M", "%m", "%Om", "%m" },
243             { "VVVV", "%Z", "%Z", "%Z" },
244             { "V", "%Z", "%Z", "%Z" },
245             { "a", "%p", "%p", "%p" },
246             { "dd", "%d", "%Od", "%d" },
247             { "d", "%e", "%Oe", "%e" },
248             { "hh", "%I", "%OI", "%I" },
249             { "h", "%I", "%OI", "%l" }, // solaris defines exact mapping for "h"
250             { "kk", "%H", "%OH", "%H" },
251             { "k", "%H", "%OH", "%k" },
252             { "mm", "%M", "%OM", "%M" },
253             { "m", "%M", "%OM", "%M" },
254             { "vvvv", "%Z", "%Z", "%Z" },
255             { "v", "%Z", "%Z", "%Z" },
256             { "yyyy", "%Y", "%Oy", "%Y" },
257             { "yy", "%y", "%Oy", "%y" },
258             { "y", "%Y", "%Oy", "%Y" },
259             { "zzzz", "%Z", "%Z", "%Z" },
260             { "zzz", "%Z", "%Z", "%Z" },
261             { "zz", "%Z", "%Z", "%Z" },
262             { "z", "%Z", "%Z", "%Z" },
263             { "ss", "%S", "%OS", "%S" },
264             { "s", "%S", "%OS", "%S" }
265         };
266 
267         boolean inquotes = false;
268         StringBuffer result = new StringBuffer("");
269 
270         for (int pos = 0; pos < s.length();) {
271             boolean replaced = false;
272             for (int i = 0; i < FieldDescriptors.length && !replaced && !inquotes; i++) {
273                 if (s.indexOf(FieldDescriptors[i][0], pos) == pos) {
274                     if (UseAltDigits)
275                         result.append(FieldDescriptors[i][2]);
276                     else if (variant.platform.equals(POSIXVariant.SOLARIS))
277                         result.append(FieldDescriptors[i][3]);
278                     else
279                         result.append(FieldDescriptors[i][1]);
280                     replaced = true;
281                     pos += FieldDescriptors[i][0].length();
282                 }
283             }
284 
285             if (!replaced) {
286                 if (s.charAt(pos) == '\'') {
287                     if (pos < (s.length() - 1) && s.charAt(pos + 1) == '\'') {
288                         result.append('\'');
289                         pos++;
290                     } else
291                         inquotes = !inquotes;
292                 } else
293                     result.append(s.charAt(pos));
294                 pos++;
295             }
296         }
297         return result.toString();
298 
299     }
300 
POSIXGrouping(String grouping_pattern)301     public static String POSIXGrouping(String grouping_pattern) {
302 
303         // Parse the decimal pattern to get the number of digits to use in the POSIX style pattern.
304 
305         int i = grouping_pattern.indexOf(".");
306         int j;
307         boolean first_grouping = true;
308         String result;
309 
310         if (i < 0)
311             result = "-1";
312         else {
313             result = new String();
314             while ((j = grouping_pattern.lastIndexOf(",", i - 1)) > 0) {
315                 if (!first_grouping)
316                     result = result.concat(";");
317                 Integer num_digits = new Integer(i - j - 1);
318                 result = result.concat(num_digits.toString());
319 
320                 first_grouping = false;
321                 i = j;
322             }
323         }
324 
325         if (result.length() == 0)
326             result = "-1";
327 
328         return result;
329 
330     }
331 
isBetween(int a, int b, int c)332     public static boolean isBetween(int a, int b, int c) {
333         return ((a < b && b < c) || (c < b && b < a));
334     }
335 
POSIXYesNoExpr(String s)336     public static String POSIXYesNoExpr(String s) {
337         StringBuffer result = new StringBuffer();
338         String[] YesNoElements;
339         YesNoElements = s.split(":");
340         for (int i = 0; i < YesNoElements.length; i++) {
341             String cur = YesNoElements[i];
342             if (cur.length() >= 1 && cur.toLowerCase().equals(cur)) {
343                 if (result.length() > 0)
344                     result.append(")|(");
345                 else
346                     result.append("^((");
347 
348                 StringCharacterIterator si = new StringCharacterIterator(cur);
349                 boolean OptLastChars = false;
350                 for (char c = si.first(); c != StringCharacterIterator.DONE; c = si.next()) {
351                     if (c != Character.toUpperCase(c)) {
352                         if (si.getIndex() == 1) {
353                             result.append("(");
354                             OptLastChars = true;
355                         }
356                         result.append("[");
357                         result.append(c);
358                         result.append(Character.toUpperCase(c));
359                         result.append("]");
360                     } else
361                         result.append(c);
362                 }
363                 if (OptLastChars)
364                     result.append(")?");
365             }
366         }
367         result.append("))");
368         return (POSIXCharNameNP(result.toString()));
369     }
370 }
371