1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2004, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.util;
10 
11 import java.io.FileInputStream;
12 import java.io.IOException;
13 import java.io.InputStream;
14 import java.io.InputStreamReader;
15 import java.io.Reader;
16 import java.nio.charset.Charset;
17 import java.util.ArrayList;
18 import java.util.List;
19 import java.util.Stack;
20 
21 import org.xml.sax.Attributes;
22 import org.xml.sax.ContentHandler;
23 import org.xml.sax.ErrorHandler;
24 import org.xml.sax.InputSource;
25 import org.xml.sax.Locator;
26 import org.xml.sax.SAXException;
27 import org.xml.sax.SAXParseException;
28 import org.xml.sax.XMLReader;
29 import org.xml.sax.ext.DeclHandler;
30 import org.xml.sax.ext.LexicalHandler;
31 import org.xml.sax.helpers.XMLReaderFactory;
32 
33 import com.google.common.base.Function;
34 import com.ibm.icu.util.ICUException;
35 import com.ibm.icu.util.ICUUncheckedIOException;
36 
37 /**
38  * Convenience class to make reading XML data files easier. The main method is read();
39  * This is meant for XML data files, so the contents of elements must either be all other elements, or
40  * just text. It is thus not suitable for XML files with MIXED content;
41  * all text content in a mixed element is discarded.
42  *
43  * @author davis
44  */
45 public class XMLFileReader {
46     static final boolean SHOW_ALL = false;
47     /**
48      * Handlers to use in read()
49      */
50     public static int CONTENT_HANDLER = 1, ERROR_HANDLER = 2, LEXICAL_HANDLER = 4, DECLARATION_HANDLER = 8;
51 
52     private MyContentHandler DEFAULT_DECLHANDLER = new MyContentHandler();
53     // TODO Add way to skip gathering value contents
54     // private ElementOnlyContentHandler ELEMENT_ONLY_DECLHANDLER = new ElementOnlyContentHandler();
55     private SimpleHandler simpleHandler;
56 
57     public static class SimpleHandler {
handlePathValue(String path, String value)58         public void handlePathValue(String path, String value) {
59         }
60 
handleComment(String path, String comment)61         public void handleComment(String path, String comment) {
62         }
63 
handleElementDecl(String name, String model)64         public void handleElementDecl(String name, String model) {
65         }
66 
handleAttributeDecl(String eName, String aName, String type, String mode, String value)67         public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) {
68         }
69 
handleEndDtd()70         public void handleEndDtd() {
71         }
72 
handleStartDtd(String name, String publicId, String systemId)73         public void handleStartDtd(String name, String publicId, String systemId) {
74         }
75     }
76 
setHandler(SimpleHandler simpleHandler)77     public XMLFileReader setHandler(SimpleHandler simpleHandler) {
78         this.simpleHandler = simpleHandler;
79         return this;
80     }
81 
82     /**
83      * Read an XML file. The order of the elements matches what was in the file.
84      *
85      * @param fileName
86      *            file to open
87      * @param handlers
88      *            a set of values for the handlers to use, eg CONTENT_HANDLER | ERROR_HANDLER
89      * @param validating
90      *            if a validating parse is requested
91      * @return list of alternating values.
92      */
read(String fileName, int handlers, boolean validating)93     public XMLFileReader read(String fileName, int handlers, boolean validating) {
94         try (InputStream fis0 = new FileInputStream(fileName);
95             InputStream fis = new StripUTF8BOMInputStream(fis0);
96             ) {
97             return read(fileName, fis, handlers, validating);
98         } catch (IOException e) {
99             throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + fileName).initCause(e);
100         }
101     }
102 
103     /**
104      * read from a Stream
105      * @param fileName
106      * @param handlers
107      * @param validating
108      * @param fis
109      * @return
110      */
read(String fileName, InputStream fis, int handlers, boolean validating)111     public XMLFileReader read(String fileName, InputStream fis, int handlers, boolean validating) {
112         try (InputStreamReader inputStreamReader = new InputStreamReader(fis, Charset.forName("UTF-8"))) {
113             return read(fileName, inputStreamReader, handlers, validating);
114         } catch (IOException e) {
115             throw new ICUUncheckedIOException(e);
116         }
117     }
118 
119     /**
120      * read from a CLDR resource
121      * @param fileName
122      * @param handlers
123      * @param validating
124      * @param fis
125      * @see CldrUtility#getInputStream(String)
126      * @return
127      */
readCLDRResource(String resName, int handlers, boolean validating)128     public XMLFileReader readCLDRResource(String resName, int handlers, boolean validating) {
129         try (InputStream inputStream = CldrUtility.getInputStream(resName)) {
130             return read(resName, inputStream, handlers, validating);
131         } catch (IOException e) {
132             throw new ICUUncheckedIOException(e);
133         }
134     }
135 
136     /**
137      * read from an arbitrary
138      * @param fileName
139      * @param handlers
140      * @param validating
141      * @param fis
142      * @see CldrUtility#getInputStream(String)
143      * @return
144      */
read(String resName, Class<?> callingClass, int handlers, boolean validating)145     public XMLFileReader read(String resName, Class<?> callingClass, int handlers, boolean validating) {
146         try (InputStream inputStream = CldrUtility.getInputStream(callingClass, resName)) {
147             return read(resName, inputStream, handlers, validating);
148         } catch (IOException e) {
149             throw new ICUUncheckedIOException(e);
150         }
151     }
152 
read(String systemID, Reader reader, int handlers, boolean validating)153     public XMLFileReader read(String systemID, Reader reader, int handlers, boolean validating) {
154         read(systemID, reader, handlers, validating, DEFAULT_DECLHANDLER.reset());
155         return this;
156     }
157 
read(String systemID, Reader reader, int handlers, boolean validating, AllHandler allHandler)158     public static void read(String systemID, Reader reader, int handlers, boolean validating, AllHandler allHandler) {
159         try {
160             XMLReader xmlReader = createXMLReader(validating);
161             if ((handlers & CONTENT_HANDLER) != 0) {
162                 xmlReader.setContentHandler(allHandler);
163             }
164             if ((handlers & ERROR_HANDLER) != 0) {
165                 xmlReader.setErrorHandler(allHandler);
166             }
167             if ((handlers & LEXICAL_HANDLER) != 0) {
168                 xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", allHandler);
169             }
170             if ((handlers & DECLARATION_HANDLER) != 0) {
171                 xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", allHandler);
172             }
173             InputSource is = new InputSource(reader);
174             is.setSystemId(systemID);
175             try {
176                 xmlReader.parse(is);
177             } catch (AbortException e) {
178             } // ok
179             reader.close();
180         } catch (SAXParseException e) {
181             throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID + "\tline:\t"
182                 + e.getLineNumber()).initCause(e);
183         } catch (SAXException e) {
184             throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID).initCause(e);
185         } catch (IOException e) {
186             throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID).initCause(e);
187         }
188     }
189 
190     public interface AllHandler extends ContentHandler, LexicalHandler, DeclHandler, ErrorHandler {
191 
192     }
193 
194 
195     /** Basis for handlers that provides for logging, with no actions on methods
196      */
197     static public class LoggingHandler implements AllHandler {
198         @Override
startDocument()199         public void startDocument() throws SAXException {
200             if (SHOW_ALL) Log.logln("startDocument");
201         }
202 
203         @Override
characters(char[] ch, int start, int length)204         public void characters(char[] ch, int start, int length) throws SAXException {
205             if (SHOW_ALL) Log.logln("characters");
206         }
207 
208         @Override
startElement(String namespaceURI, String localName, String qName, Attributes atts)209         public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
210             throws SAXException {
211             if (SHOW_ALL) Log.logln("startElement");
212         }
213 
214         @Override
endElement(String namespaceURI, String localName, String qName)215         public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
216             if (SHOW_ALL) Log.logln("endElement");
217         }
218 
219         @Override
startDTD(String name, String publicId, String systemId)220         public void startDTD(String name, String publicId, String systemId) throws SAXException {
221             if (SHOW_ALL) Log.logln("startDTD");
222         }
223 
224         @Override
endDTD()225         public void endDTD() throws SAXException {
226             if (SHOW_ALL) Log.logln("endDTD");
227         }
228 
229         @Override
comment(char[] ch, int start, int length)230         public void comment(char[] ch, int start, int length) throws SAXException {
231             if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length));
232         }
233 
234         @Override
elementDecl(String name, String model)235         public void elementDecl(String name, String model) throws SAXException {
236             if (SHOW_ALL) Log.logln("elementDecl");
237         }
238 
239         @Override
attributeDecl(String eName, String aName, String type, String mode, String value)240         public void attributeDecl(String eName, String aName, String type, String mode, String value)
241             throws SAXException {
242             if (SHOW_ALL) Log.logln("attributeDecl");
243         }
244 
245         @Override
ignorableWhitespace(char[] ch, int start, int length)246         public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
247             if (SHOW_ALL) Log.logln("ignorableWhitespace length: " + length);
248         }
249 
250         @Override
endDocument()251         public void endDocument() throws SAXException {
252             if (SHOW_ALL) Log.logln("endDocument");
253         }
254 
255         @Override
internalEntityDecl(String name, String value)256         public void internalEntityDecl(String name, String value) throws SAXException {
257             if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + value);
258         }
259 
260         @Override
externalEntityDecl(String name, String publicId, String systemId)261         public void externalEntityDecl(String name, String publicId, String systemId) throws SAXException {
262             if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + publicId + "\t" + systemId);
263         }
264 
notationDecl(String name, String publicId, String systemId)265         public void notationDecl(String name, String publicId, String systemId) {
266             if (SHOW_ALL) Log.logln("notationDecl: " + name
267                 + ", " + publicId
268                 + ", " + systemId);
269         }
270 
271         @Override
processingInstruction(String target, String data)272         public void processingInstruction(String target, String data)
273             throws SAXException {
274             if (SHOW_ALL) Log.logln("processingInstruction: " + target + ", " + data);
275         }
276 
277         @Override
skippedEntity(String name)278         public void skippedEntity(String name)
279             throws SAXException {
280             if (SHOW_ALL) Log.logln("skippedEntity: " + name);
281         }
282 
unparsedEntityDecl(String name, String publicId, String systemId, String notationName)283         public void unparsedEntityDecl(String name, String publicId,
284             String systemId, String notationName) {
285             if (SHOW_ALL) Log.logln("unparsedEntityDecl: " + name
286                 + ", " + publicId
287                 + ", " + systemId
288                 + ", " + notationName);
289         }
290 
291         @Override
setDocumentLocator(Locator locator)292         public void setDocumentLocator(Locator locator) {
293             if (SHOW_ALL) Log.logln("setDocumentLocator Locator " + locator);
294         }
295 
296         @Override
startPrefixMapping(String prefix, String uri)297         public void startPrefixMapping(String prefix, String uri) throws SAXException {
298             if (SHOW_ALL) Log.logln("startPrefixMapping prefix: " + prefix +
299                 ", uri: " + uri);
300         }
301 
302         @Override
endPrefixMapping(String prefix)303         public void endPrefixMapping(String prefix) throws SAXException {
304             if (SHOW_ALL) Log.logln("endPrefixMapping prefix: " + prefix);
305         }
306 
307         @Override
startEntity(String name)308         public void startEntity(String name) throws SAXException {
309             if (SHOW_ALL) Log.logln("startEntity name: " + name);
310         }
311 
312         @Override
endEntity(String name)313         public void endEntity(String name) throws SAXException {
314             if (SHOW_ALL) Log.logln("endEntity name: " + name);
315         }
316 
317         @Override
startCDATA()318         public void startCDATA() throws SAXException {
319             if (SHOW_ALL) Log.logln("startCDATA");
320         }
321 
322         @Override
endCDATA()323         public void endCDATA() throws SAXException {
324             if (SHOW_ALL) Log.logln("endCDATA");
325         }
326 
327         /*
328          * (non-Javadoc)
329          *
330          * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException)
331          */
332         @Override
error(SAXParseException exception)333         public void error(SAXParseException exception) throws SAXException {
334             if (SHOW_ALL) Log.logln("error: " + showSAX(exception));
335             throw exception;
336         }
337 
338         /*
339          * (non-Javadoc)
340          *
341          * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException)
342          */
343         @Override
fatalError(SAXParseException exception)344         public void fatalError(SAXParseException exception) throws SAXException {
345             if (SHOW_ALL) Log.logln("fatalError: " + showSAX(exception));
346             throw exception;
347         }
348 
349         /*
350          * (non-Javadoc)
351          *
352          * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException)
353          */
354         @Override
warning(SAXParseException exception)355         public void warning(SAXParseException exception) throws SAXException {
356             if (SHOW_ALL) Log.logln("warning: " + showSAX(exception));
357             throw exception;
358         }
359 
360     }
361 
362     public class MyContentHandler extends LoggingHandler {
363         StringBuffer chars = new StringBuffer();
364         StringBuffer commentChars = new StringBuffer();
365         Stack<String> startElements = new Stack<>();
366         StringBuffer tempPath = new StringBuffer();
367         boolean lastIsStart = false;
368 
reset()369         public MyContentHandler reset() {
370             chars.setLength(0);
371             tempPath = new StringBuffer("/");
372             startElements.clear();
373             startElements.push("/");
374             return this;
375         }
376 
377         @Override
characters(char[] ch, int start, int length)378         public void characters(char[] ch, int start, int length) throws SAXException {
379             if (lastIsStart) chars.append(ch, start, length);
380         }
381 
382         @Override
startElement(String namespaceURI, String localName, String qName, Attributes atts)383         public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
384             throws SAXException {
385             tempPath.setLength(0);
386             tempPath.append(startElements.peek()).append('/').append(qName);
387             for (int i = 0; i < atts.getLength(); ++i) {
388                 tempPath.append("[@").append(atts.getQName(i)).append("=\"").append(atts.getValue(i).replace('"', '\'')).append("\"]");
389             }
390             startElements.push(tempPath.toString());
391             chars.setLength(0); // clear garbage
392             lastIsStart = true;
393         }
394 
395         @Override
endElement(String namespaceURI, String localName, String qName)396         public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
397             String startElement = startElements.pop();
398             if (lastIsStart) {
399                 // System.out.println(startElement + ":" + chars);
400                 simpleHandler.handlePathValue(startElement, chars.toString());
401             }
402             chars.setLength(0);
403             lastIsStart = false;
404         }
405 
406         @Override
startDTD(String name, String publicId, String systemId)407         public void startDTD(String name, String publicId, String systemId) throws SAXException {
408             if (SHOW_ALL) Log.logln("startDTD name: " + name
409                 + ", publicId: " + publicId
410                 + ", systemId: " + systemId);
411             simpleHandler.handleStartDtd(name, publicId, systemId);
412         }
413 
414         @Override
endDTD()415         public void endDTD() throws SAXException {
416             if (SHOW_ALL) Log.logln("endDTD");
417             simpleHandler.handleEndDtd();
418         }
419 
420         @Override
comment(char[] ch, int start, int length)421         public void comment(char[] ch, int start, int length) throws SAXException {
422             if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length));
423             commentChars.append(ch, start, length);
424             simpleHandler.handleComment(startElements.peek(), commentChars.toString());
425             commentChars.setLength(0);
426         }
427 
428         @Override
elementDecl(String name, String model)429         public void elementDecl(String name, String model) throws SAXException {
430             simpleHandler.handleElementDecl(name, model);
431         }
432 
433         @Override
attributeDecl(String eName, String aName, String type, String mode, String value)434         public void attributeDecl(String eName, String aName, String type, String mode, String value)
435             throws SAXException {
436             simpleHandler.handleAttributeDecl(eName, aName, type, mode, value);
437         }
438 
439     }
440 
441     static final class AbortException extends RuntimeException {
442         private static final long serialVersionUID = 1L;
443     }
444 
445     /**
446      * Show a SAX exception in a readable form.
447      */
showSAX(SAXParseException exception)448     public static String showSAX(SAXParseException exception) {
449         return exception.getMessage()
450             + ";\t SystemID: " + exception.getSystemId()
451             + ";\t PublicID: " + exception.getPublicId()
452             + ";\t LineNumber: " + exception.getLineNumber()
453             + ";\t ColumnNumber: " + exception.getColumnNumber();
454     }
455 
createXMLReader(boolean validating)456     public static XMLReader createXMLReader(boolean validating) {
457         // weiv 07/20/2007: The laundry list below is somewhat obsolete
458         // I have moved the system's default parser (instantiated when "" is
459         // passed) to the top, so that we will always use that. I have also
460         // removed "org.apache.crimson.parser.XMLReaderImpl" as this one gets
461         // confused regarding UTF-8 encoding name.
462         String[] testList = {
463             System.getProperty("CLDR_DEFAULT_SAX_PARSER", ""), // defaults to "", system default.
464             "org.apache.xerces.parsers.SAXParser",
465             "gnu.xml.aelfred2.XmlReader",
466             "com.bluecast.xml.Piccolo",
467             "oracle.xml.parser.v2.SAXParser"
468         };
469         XMLReader result = null;
470         for (int i = 0; i < testList.length; ++i) {
471             try {
472                 result = (testList[i].length() != 0)
473                     ? XMLReaderFactory.createXMLReader(testList[i])
474                         : XMLReaderFactory.createXMLReader();
475                     result.setFeature("http://xml.org/sax/features/validation", validating);
476                     break;
477             } catch (SAXException e1) {
478             }
479         }
480         if (result == null)
481             throw new NoClassDefFoundError("No SAX parser is available, or unable to set validation correctly");
482         return result;
483     }
484 
485     static final class DebuggingInputStream extends InputStream {
486         InputStream contents;
487 
488         @Override
close()489         public void close() throws IOException {
490             contents.close();
491         }
492 
DebuggingInputStream(InputStream fis)493         public DebuggingInputStream(InputStream fis) {
494             contents = fis;
495         }
496 
497         @Override
read()498         public int read() throws IOException {
499             int x = contents.read();
500             System.out.println(Integer.toHexString(x) + ",");
501             return x;
502         }
503     }
504 
505 // class StripUTF8BOMInputStream does the same thing
506 //    public static final class FilterBomInputStream extends InputStream {
507 //        InputStream contents;
508 //        boolean first = true;
509 //
510 //        @Override
511 //        public void close() throws IOException {
512 //            contents.close();
513 //        }
514 //
515 //        public FilterBomInputStream(InputStream fis) {
516 //            contents = fis;
517 //        }
518 //
519 //        @Override
520 //        public int read() throws IOException {
521 //            int x = contents.read();
522 //            if (first) {
523 //                first = false;
524 //                // 0xEF,0xBB,0xBF
525 //                // SKIP bom
526 //                if (x == 0xEF) {
527 //                    int y = contents.read();
528 //                    if (y == 0xBB) {
529 //                        int z = contents.read();
530 //                        if (z == 0xBF) {
531 //                            x = contents.read();
532 //                        }
533 //                    }
534 //                }
535 //            }
536 //            return x;
537 //        }
538 //    }
539 
loadPathValues(String filename, List<Pair<String, String>> data, boolean validating)540     public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating) {
541         return loadPathValues(filename, data, validating, false);
542     }
543 
loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full)544     public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full) {
545         return loadPathValues(filename, data, validating, full, null);
546     }
547 
loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full, Function<String, String> valueFilter)548     public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full,
549         Function<String, String> valueFilter) {
550         try {
551             new XMLFileReader()
552             .setHandler(new PathValueListHandler(data, full, valueFilter))
553             .read(filename, -1, validating);
554             return data;
555         } catch (Exception e) {
556             throw new ICUException(filename, e);
557         }
558     }
559 
processPathValues(String filename, boolean validating, SimpleHandler simpleHandler)560     public static void processPathValues(String filename, boolean validating, SimpleHandler simpleHandler) {
561         try {
562             new XMLFileReader()
563             .setHandler(simpleHandler)
564             .read(filename, -1, validating);
565         } catch (Exception e) {
566             throw new ICUException(filename, e);
567         }
568     }
569 
570     static final class PathValueListHandler extends SimpleHandler {
571         List<Pair<String, String>> data;
572         boolean full;
573         private Function<String, String> valueFilter;
574 
PathValueListHandler(List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter)575         public PathValueListHandler(List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter) {
576             super();
577             this.data = data != null ? data : new ArrayList<>();
578             this.full = full;
579             this.valueFilter = valueFilter;
580         }
581 
582         @Override
handlePathValue(String path, String value)583         public void handlePathValue(String path, String value) {
584             if (valueFilter == null) {
585                 data.add(Pair.of(path, value));
586             } else {
587                 String filteredValue = valueFilter.apply(value);
588                 if (filteredValue != null) {
589                     data.add(Pair.of(path, filteredValue));
590                 }
591             }
592         }
593 
594         @Override
handleComment(String path, String comment)595         public void handleComment(String path, String comment) {
596             if (!full || path.equals("/")) {
597                 return;
598             }
599             data.add(Pair.of("!", comment));
600         }
601     }
602 }
603