1 package org.unicode.cldr.draft;
2 
3 import java.io.BufferedReader;
4 import java.io.BufferedWriter;
5 import java.io.File;
6 import java.io.FileInputStream;
7 import java.io.FileNotFoundException;
8 import java.io.FileOutputStream;
9 import java.io.IOException;
10 import java.io.InputStream;
11 import java.io.InputStreamReader;
12 import java.io.OutputStreamWriter;
13 import java.io.PrintWriter;
14 import java.net.URL;
15 import java.nio.charset.Charset;
16 import java.util.ArrayList;
17 import java.util.List;
18 import java.util.Locale;
19 import java.util.regex.Pattern;
20 
21 import org.unicode.cldr.util.CldrUtility;
22 import org.unicode.cldr.util.PatternCache;
23 import org.unicode.cldr.util.With;
24 import org.unicode.cldr.util.With.SimpleIterator;
25 
26 import com.ibm.icu.util.ICUUncheckedIOException;
27 
28 public final class FileUtilities {
29     public static final boolean SHOW_FILES;
30     static {
31         boolean showFiles = false;
32         try {
33             showFiles = System.getProperty("SHOW_FILES") != null;
34         } catch (SecurityException ignored) {
35         }
36         SHOW_FILES = showFiles;
37     }
38 
39     public static final PrintWriter CONSOLE = new PrintWriter(System.out, true);
40 
41     private static PrintWriter log = CONSOLE;
42 
openUTF8Reader(String dir, String filename)43     public static BufferedReader openUTF8Reader(String dir, String filename) throws IOException {
44         return openReader(dir, filename, "UTF-8");
45     }
46 
openReader(String dir, String filename, String encoding)47     public static BufferedReader openReader(String dir, String filename, String encoding) throws IOException {
48         File file = dir.length() == 0 ? new File(filename) : new File(dir, filename);
49         if (SHOW_FILES && log != null) {
50             log.println("Opening File: "
51                 + file.getCanonicalPath());
52         }
53         return new BufferedReader(
54             new InputStreamReader(
55                 new FileInputStream(file),
56                 encoding),
57             4 * 1024);
58     }
59 
openUTF8Writer(String dir, String filename)60     public static PrintWriter openUTF8Writer(String dir, String filename) throws IOException {
61         return openWriter(dir, filename, "UTF-8");
62     }
63 
openWriter(String dir, String filename, String encoding)64     public static PrintWriter openWriter(String dir, String filename, String encoding) throws IOException {
65         File file = new File(dir, filename);
66         if (SHOW_FILES && log != null) {
67             log.println("Creating File: "
68                 + file.getCanonicalPath());
69         }
70         String parentName = file.getParent();
71         if (parentName != null) {
72             File parent = new File(parentName);
73             parent.mkdirs();
74         }
75         return new PrintWriter(
76             new BufferedWriter(
77                 new OutputStreamWriter(
78                     new FileOutputStream(file),
79                     encoding),
80                 4 * 1024));
81     }
82 
83     public static abstract class SemiFileReader extends FileProcessor {
84         public final static Pattern SPLIT = PatternCache.get("\\s*;\\s*");
85 
handleLine(int lineCount, int start, int end, String[] items)86         protected abstract boolean handleLine(int lineCount, int start, int end, String[] items);
87 
handleEnd()88         protected void handleEnd() {
89         }
90 
isCodePoint()91         protected boolean isCodePoint() {
92             return true;
93         }
94 
splitLine(String line)95         protected String[] splitLine(String line) {
96             return SPLIT.split(line);
97         }
98 
99         @Override
handleLine(int lineCount, String line)100         protected boolean handleLine(int lineCount, String line) {
101             String[] parts = splitLine(line);
102             int start, end;
103             if (isCodePoint()) {
104                 String source = parts[0];
105                 int range = source.indexOf("..");
106                 if (range >= 0) {
107                     start = Integer.parseInt(source.substring(0, range), 16);
108                     end = Integer.parseInt(source.substring(range + 2), 16);
109                 } else {
110                     start = end = Integer.parseInt(source, 16);
111                 }
112             } else {
113                 start = end = -1;
114             }
115             return handleLine(lineCount, start, end, parts);
116         }
117     }
118 
119     public static class FileProcessor {
120         private int lineCount;
121 
handleStart()122         protected void handleStart() {
123         }
124 
125         /**
126          * Return false to abort
127          *
128          * @param lineCount
129          * @param line
130          * @return
131          */
handleLine(int lineCount, String line)132         protected boolean handleLine(int lineCount, String line) {
133             return true;
134         }
135 
handleEnd()136         protected void handleEnd() {
137         }
138 
getLineCount()139         public int getLineCount() {
140             return lineCount;
141         }
142 
handleComment(String line, int commentCharPosition)143         public void handleComment(String line, int commentCharPosition) {
144         }
145 
process(Class<?> classLocation, String fileName)146         public FileProcessor process(Class<?> classLocation, String fileName) {
147             try {
148                 BufferedReader in = openFile(classLocation, fileName);
149                 return process(in, fileName);
150             } catch (Exception e) {
151                 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e);
152             }
153 
154         }
155 
process(String fileName)156         public FileProcessor process(String fileName) {
157             try {
158                 FileInputStream fileStream = new FileInputStream(fileName);
159                 InputStreamReader reader = new InputStreamReader(fileStream, UTF8);
160                 BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64);
161                 return process(bufferedReader, fileName);
162             } catch (Exception e) {
163                 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e);
164             }
165         }
166 
process(String directory, String fileName)167         public FileProcessor process(String directory, String fileName) {
168             try {
169                 FileInputStream fileStream = new FileInputStream(directory + File.separator + fileName);
170                 InputStreamReader reader = new InputStreamReader(fileStream, UTF8);
171                 BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64);
172                 return process(bufferedReader, fileName);
173             } catch (Exception e) {
174                 throw new ICUUncheckedIOException(lineCount + ":\t" + 0, e);
175             }
176         }
177 
process(BufferedReader in, String fileName)178         public FileProcessor process(BufferedReader in, String fileName) {
179             handleStart();
180             String line = null;
181             lineCount = 1;
182             try {
183                 for (;; ++lineCount) {
184                     line = in.readLine();
185                     if (line == null) {
186                         break;
187                     }
188                     int comment = line.indexOf("#");
189                     if (comment >= 0) {
190                         handleComment(line, comment);
191                         line = line.substring(0, comment);
192                     }
193                     if (line.startsWith("\uFEFF")) {
194                         line = line.substring(1);
195                     }
196                     line = line.trim();
197                     if (line.length() == 0) {
198                         continue;
199                     }
200                     if (!handleLine(lineCount, line)) {
201                         break;
202                     }
203                 }
204                 in.close();
205                 handleEnd();
206             } catch (Exception e) {
207                 throw (RuntimeException) new ICUUncheckedIOException(lineCount + ":\t" + line, e);
208             }
209             return this;
210         }
211     }
212 
213     //
214     // public static SemiFileReader fillMapFromSemi(Class classLocation, String fileName, SemiFileReader handler) {
215     // return handler.process(classLocation, fileName);
216     // }
openFile(Class<?> class1, String file)217     public static BufferedReader openFile(Class<?> class1, String file) {
218         return openFile(class1, file, UTF8);
219     }
220 
openFile(Class<?> class1, String file, Charset charset)221     public static BufferedReader openFile(Class<?> class1, String file, Charset charset) {
222         // URL path = null;
223         // String externalForm = null;
224         try {
225             // //System.out.println("Reading:\t" + file1.getCanonicalPath());
226             // path = class1.getResource(file);
227             // externalForm = path.toExternalForm();
228             // if (externalForm.startsWith("file:")) {
229             // externalForm = externalForm.substring(5);
230             // }
231             // File file1 = new File(externalForm);
232             // boolean x = file1.canRead();
233             // final InputStream resourceAsStream = new FileInputStream(file1);
234             final InputStream resourceAsStream = class1.getResourceAsStream(file);
235             // String foo = class1.getResource(".").toString();
236             if (charset == null) {
237                 charset = UTF8;
238             }
239             InputStreamReader reader = new InputStreamReader(resourceAsStream, charset);
240             BufferedReader bufferedReader = new BufferedReader(reader, 1024 * 64);
241             return bufferedReader;
242         } catch (Exception e) {
243             String className = class1 == null ? null : class1.getCanonicalName();
244             String canonicalName = null;
245             try {
246                 String relativeFileName = getRelativeFileName(class1, "../util/");
247                 canonicalName = new File(relativeFileName).getCanonicalPath();
248             } catch (Exception e1) {
249                 throw new ICUUncheckedIOException("Couldn't open file: " + file + "; relative to class: "
250                     + className, e);
251             }
252             throw new ICUUncheckedIOException("Couldn't open file " + file + "; in path " + canonicalName + "; relative to class: "
253                 + className, e);
254         }
255     }
256 
openFile(String directory, String file, Charset charset)257     public static BufferedReader openFile(String directory, String file, Charset charset) {
258         try {
259             return new BufferedReader(new InputStreamReader(new FileInputStream(new File(directory, file)), charset));
260         } catch (FileNotFoundException e) {
261             throw new ICUUncheckedIOException(e); // handle dang'd checked exception
262         }
263     }
264 
openFile(File file, Charset charset)265     public static BufferedReader openFile(File file, Charset charset) {
266         try {
267             return new BufferedReader(new InputStreamReader(new FileInputStream(file), charset));
268         } catch (FileNotFoundException e) {
269             throw new ICUUncheckedIOException(e); // handle dang'd checked exception
270         }
271     }
272 
openFile(File file)273     public static BufferedReader openFile(File file) {
274         return openFile(file, UTF8);
275     }
276 
openFile(String directory, String file)277     public static BufferedReader openFile(String directory, String file) {
278         return openFile(directory, file, UTF8);
279     }
280 
281     public static final Charset UTF8 = Charset.forName("utf-8");
282 
splitCommaSeparated(String line)283     public static String[] splitCommaSeparated(String line) {
284         // items are separated by ','
285         // each item is of the form abc...
286         // or "..." (required if a comma or quote is contained)
287         // " in a field is represented by ""
288         List<String> result = new ArrayList<String>();
289         StringBuilder item = new StringBuilder();
290         boolean inQuote = false;
291         for (int i = 0; i < line.length(); ++i) {
292             char ch = line.charAt(i); // don't worry about supplementaries
293             switch (ch) {
294             case '"':
295                 inQuote = !inQuote;
296                 // at start or end, that's enough
297                 // if get a quote when we are not in a quote, and not at start, then add it and return to inQuote
298                 if (inQuote && item.length() != 0) {
299                     item.append('"');
300                     inQuote = true;
301                 }
302                 break;
303             case ',':
304                 if (!inQuote) {
305                     result.add(item.toString());
306                     item.setLength(0);
307                 } else {
308                     item.append(ch);
309                 }
310                 break;
311             default:
312                 item.append(ch);
313                 break;
314             }
315         }
316         result.add(item.toString());
317         return result.toArray(new String[result.size()]);
318     }
319 
appendFile(Class<?> class1, String filename, PrintWriter out)320     public static void appendFile(Class<?> class1, String filename, PrintWriter out) {
321         appendFile(class1, filename, UTF8, null, out);
322     }
323 
appendFile(Class<?> class1, String filename, Charset charset, String[] replacementList, PrintWriter out)324     public static void appendFile(Class<?> class1, String filename, Charset charset, String[] replacementList,
325         PrintWriter out) {
326         BufferedReader br = openFile(class1, filename, charset);
327         try {
328             try {
329                 appendBufferedReader(br, out, replacementList);
330             } finally {
331                 br.close();
332             }
333         } catch (IOException e) {
334             throw new ICUUncheckedIOException(e); // wrap darn'd checked exception
335         }
336     }
337 
appendFile(String filename, String encoding, PrintWriter output)338     public static void appendFile(String filename, String encoding, PrintWriter output) throws IOException {
339         appendFile(filename, encoding, output, null);
340     }
341 
appendFile(String filename, String encoding, PrintWriter output, String[] replacementList)342     public static void appendFile(String filename, String encoding, PrintWriter output, String[] replacementList) throws IOException {
343         BufferedReader br = openReader("", filename, encoding);
344         try {
345             appendBufferedReader(br, output, replacementList);
346         } finally {
347             br.close();
348         }
349     }
350 
appendBufferedReader(BufferedReader br, PrintWriter output, String[] replacementList)351     public static void appendBufferedReader(BufferedReader br,
352         PrintWriter output, String[] replacementList) throws IOException {
353         while (true) {
354             String line = br.readLine();
355             if (line == null) break;
356             if (replacementList != null) {
357                 for (int i = 0; i < replacementList.length; i += 2) {
358                     line = replace(line, replacementList[i], replacementList[i + 1]);
359                 }
360             }
361             output.println(line);
362         }
363         br.close();
364     }
365 
366     /**
367      * Replaces all occurrences of piece with replacement, and returns new String
368      */
replace(String source, String piece, String replacement)369     public static String replace(String source, String piece, String replacement) {
370         if (source == null || source.length() < piece.length()) return source;
371         int pos = 0;
372         while (true) {
373             pos = source.indexOf(piece, pos);
374             if (pos < 0) return source;
375             source = source.substring(0, pos) + replacement + source.substring(pos + piece.length());
376             pos += replacement.length();
377         }
378     }
379 
replace(String source, String[][] replacements)380     public static String replace(String source, String[][] replacements) {
381         return replace(source, replacements, replacements.length);
382     }
383 
replace(String source, String[][] replacements, int count)384     public static String replace(String source, String[][] replacements, int count) {
385         for (int i = 0; i < count; ++i) {
386             source = replace(source, replacements[i][0], replacements[i][1]);
387         }
388         return source;
389     }
390 
replace(String source, String[][] replacements, boolean reverse)391     public static String replace(String source, String[][] replacements, boolean reverse) {
392         if (!reverse) return replace(source, replacements);
393         for (int i = 0; i < replacements.length; ++i) {
394             source = replace(source, replacements[i][1], replacements[i][0]);
395         }
396         return source;
397     }
398 
anchorize(String source)399     public static String anchorize(String source) {
400         String result = source.toLowerCase(Locale.ENGLISH).replaceAll("[^\\p{L}\\p{N}]+", "_");
401         if (result.endsWith("_")) result = result.substring(0, result.length() - 1);
402         if (result.startsWith("_")) result = result.substring(1);
403         return result;
404     }
405 
copyFile(Class<?> class1, String sourceFile, String targetDirectory)406     public static void copyFile(Class<?> class1, String sourceFile, String targetDirectory) {
407         copyFile(class1, sourceFile, targetDirectory, sourceFile, null);
408     }
409 
copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName)410     public static void copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName) {
411         copyFile(class1, sourceFile, targetDirectory, newName, null);
412     }
413 
copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName, String[] replacementList)414     public static void copyFile(Class<?> class1, String sourceFile, String targetDirectory, String newName, String[] replacementList) {
415         try {
416             PrintWriter out = openUTF8Writer(targetDirectory, newName);
417             appendFile(class1, sourceFile, UTF8, replacementList, out);
418             out.close();
419         } catch (IOException e) {
420             throw new ICUUncheckedIOException(e); // dang'd checked exceptions
421         }
422     }
423 
getRelativeFileName(Class<?> class1, String filename)424     public static String getRelativeFileName(Class<?> class1, String filename) {
425         URL resource = class1.getResource(filename);
426         String resourceString = resource.toString();
427         if (resourceString.startsWith("file:")) {
428             return resourceString.substring(5);
429         } else if (resourceString.startsWith("jar:file:")) {
430             return resourceString.substring(9);
431         } else {
432             throw new ICUUncheckedIOException("File not found: " + resourceString);
433         }
434     }
435 
436     /**
437      * Simple API to iterate over file lines. Example:
438      * for (String s : FileUtilities.in(directory,name)) {
439      * ...
440      * }
441      *
442      * @author markdavis
443      *
444      */
in(Class<?> class1, String file)445     public static Iterable<String> in(Class<?> class1, String file) {
446         return With.in(new FileLines(openFile(class1, file, UTF8)));
447     }
448 
449     /**
450      * Simple API to iterate over file lines. Example:
451      * for (String s : FileUtilities.in(directory,name)) {
452      * ...
453      * }
454      *
455      * @author markdavis
456      *
457      */
in(Class<?> class1, String file, Charset charset)458     public static Iterable<String> in(Class<?> class1, String file, Charset charset) {
459         return With.in(new FileLines(openFile(class1, file, charset)));
460     }
461 
462     /**
463      * Simple API to iterate over file lines. Example:
464      * for (String s : FileUtilities.in(directory,name)) {
465      * ...
466      * }
467      *
468      * @author markdavis
469      *
470      */
in(String directory, String file)471     public static Iterable<String> in(String directory, String file) {
472         return With.in(new FileLines(openFile(directory, file, UTF8)));
473     }
474 
475     /**
476      * Simple API to iterate over file lines. Example:
477      * for (String s : FileUtilities.in(directory,name)) {
478      * ...
479      * }
480      *
481      * @author markdavis
482      *
483      */
in(BufferedReader reader)484     public static Iterable<String> in(BufferedReader reader) {
485         return With.in(new FileLines(reader));
486     }
487 
488     /**
489      * Simple API to iterate over file lines. Example:
490      * for (String s : FileUtilities.in(directory,name)) {
491      * ...
492      * }
493      *
494      * @author markdavis
495      *
496      */
in(String directory, String file, Charset charset)497     public static Iterable<String> in(String directory, String file, Charset charset) {
498         return With.in(new FileLines(openFile(directory, file, charset)));
499     }
500 
501     private static class FileLines implements SimpleIterator<String> {
502         private BufferedReader input;
503 
FileLines(BufferedReader input)504         public FileLines(BufferedReader input) {
505             this.input = input;
506         }
507 
508         @Override
next()509         public String next() {
510             try {
511                 String result = input.readLine();
512                 if (result == null) {
513                     input.close();
514                 }
515                 return result;
516             } catch (IOException e) {
517                 throw new ICUUncheckedIOException(e); // handle dang'd checked exception
518             }
519         }
520 
521     }
522 
cleanLine(String line)523     public static String cleanLine(String line) {
524         int comment = line.indexOf("#");
525         if (comment >= 0) {
526             line = line.substring(0, comment);
527         }
528         if (line.startsWith("\uFEFF")) {
529             line = line.substring(1);
530         }
531         return line.trim();
532     }
533 
534     public final static Pattern SEMI_SPLIT = PatternCache.get("\\s*;\\s*");
535     private static final boolean SHOW_SKIP = false;
536 
cleanSemiFields(String line)537     public static String[] cleanSemiFields(String line) {
538         line = cleanLine(line);
539         return line.isEmpty() ? null : SEMI_SPLIT.split(line);
540     }
541 
542     public interface LineHandler {
543         /**
544          * Return false if line was skipped
545          *
546          * @param line
547          * @return
548          */
handle(String line)549         boolean handle(String line) throws Exception;
550     }
551 
handleFile(String filename, LineHandler handler)552     public static void handleFile(String filename, LineHandler handler) throws IOException {
553         BufferedReader in = CldrUtility.getUTF8Data(filename);
554         while (true) {
555             String line = in.readLine();
556             if (line == null) {
557                 break;
558             }
559             try {
560                 if (!handler.handle(line)) {
561                     if (SHOW_SKIP) System.out.println("Skipping line: " + line);
562                 }
563             } catch (Exception e) {
564                 throw new ICUUncheckedIOException("Problem with line: " + line, e);
565             }
566         }
567         in.close();
568     }
569 
in(File file)570     public static Iterable<String> in(File file) {
571         return With.in(new FileLines(openFile(file, UTF8)));
572     }
573 }
574