1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2004, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.util;
10 
11 import java.io.FileInputStream;
12 import java.io.IOException;
13 import java.io.InputStream;
14 import java.io.InputStreamReader;
15 import java.io.Reader;
16 import java.nio.charset.Charset;
17 import java.util.ArrayList;
18 import java.util.List;
19 import java.util.Stack;
20 
21 import org.xml.sax.Attributes;
22 import org.xml.sax.ContentHandler;
23 import org.xml.sax.ErrorHandler;
24 import org.xml.sax.InputSource;
25 import org.xml.sax.Locator;
26 import org.xml.sax.SAXException;
27 import org.xml.sax.SAXParseException;
28 import org.xml.sax.XMLReader;
29 import org.xml.sax.ext.DeclHandler;
30 import org.xml.sax.ext.LexicalHandler;
31 import org.xml.sax.helpers.XMLReaderFactory;
32 
33 import com.google.common.base.Function;
34 
35 /**
36  * Convenience class to make reading XML data files easier. The main method is read();
37  * This is meant for XML data files, so the contents of elements must either be all other elements, or
38  * just text. It is thus not suitable for XML files with MIXED content;
39  * all text content in a mixed element is discarded.
40  *
41  * @author davis
42  */
43 public class XMLFileReader {
44     static final boolean SHOW_ALL = false;
45     /**
46      * Handlers to use in read()
47      */
48     public static int CONTENT_HANDLER = 1, ERROR_HANDLER = 2, LEXICAL_HANDLER = 4, DECLARATION_HANDLER = 8;
49 
50     private MyContentHandler DEFAULT_DECLHANDLER = new MyContentHandler();
51     private SimpleHandler simpleHandler;
52 
53     public static class SimpleHandler {
handlePathValue(String path, String value)54         public void handlePathValue(String path, String value) {
55         };
56 
handleComment(String path, String comment)57         public void handleComment(String path, String comment) {
58         };
59 
handleElementDecl(String name, String model)60         public void handleElementDecl(String name, String model) {
61         };
62 
handleAttributeDecl(String eName, String aName, String type, String mode, String value)63         public void handleAttributeDecl(String eName, String aName, String type, String mode, String value) {
64         };
65 
handleEndDtd()66         public void handleEndDtd() {
67         }
68 
handleStartDtd(String name, String publicId, String systemId)69         public void handleStartDtd(String name, String publicId, String systemId) {
70         };
71     }
72 
setHandler(SimpleHandler simpleHandler)73     public XMLFileReader setHandler(SimpleHandler simpleHandler) {
74         this.simpleHandler = simpleHandler;
75         return this;
76     }
77 
78     /**
79      * Read an XML file. Return a list of alternating items, where the even items are the paths,
80      * and the odd ones are values. The order of the elements matches what was in the file.
81      *
82      * @param fileName
83      *            file to open
84      * @param handlers
85      *            a set of values for the handlers to use, eg CONTENT_HANDLER | ERROR_HANDLER
86      * @param validating
87      *            if a validating parse is requested
88      * @return list of alternating values.
89      */
read(String fileName, int handlers, boolean validating)90     public XMLFileReader read(String fileName, int handlers, boolean validating) {
91         try {
92             InputStream fis = new FileInputStream(fileName);
93             fis = new FilterBomInputStream(fis);
94             return read(fileName, fis, handlers, validating);
95         } catch (IOException e) {
96             throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + fileName).initCause(e);
97         }
98     }
99 
100     /**
101      * read from a Stream
102      * @param fileName
103      * @param handlers
104      * @param validating
105      * @param fis
106      * @return
107      */
read(String fileName, InputStream fis, int handlers, boolean validating)108     public XMLFileReader read(String fileName, InputStream fis, int handlers, boolean validating) {
109         return read(fileName, new InputStreamReader(fis, Charset.forName("UTF-8")), handlers, validating);
110     }
111 
112     /**
113      * read from a CLDR resource
114      * @param fileName
115      * @param handlers
116      * @param validating
117      * @param fis
118      * @see CldrUtility#getInputStream(String)
119      * @return
120      */
readCLDRResource(String resName, int handlers, boolean validating)121     public XMLFileReader readCLDRResource(String resName, int handlers, boolean validating) {
122 
123         return read(resName, CldrUtility.getInputStream(resName), handlers, validating);
124     }
125 
126     /**
127      * read from an arbitrary
128      * @param fileName
129      * @param handlers
130      * @param validating
131      * @param fis
132      * @see CldrUtility#getInputStream(String)
133      * @return
134      */
read(String resName, Class<?> callingClass, int handlers, boolean validating)135     public XMLFileReader read(String resName, Class<?> callingClass, int handlers, boolean validating) {
136 
137         return read(resName, CldrUtility.getInputStream(callingClass, resName), handlers, validating);
138     }
139 
read(String systemID, Reader reader, int handlers, boolean validating)140     public XMLFileReader read(String systemID, Reader reader, int handlers, boolean validating) {
141         try {
142             XMLReader xmlReader = createXMLReader(validating);
143             DEFAULT_DECLHANDLER.reset();
144             if ((handlers & CONTENT_HANDLER) != 0) {
145                 xmlReader.setContentHandler(DEFAULT_DECLHANDLER);
146             }
147             if ((handlers & ERROR_HANDLER) != 0) {
148                 xmlReader.setErrorHandler(DEFAULT_DECLHANDLER);
149             }
150             if ((handlers & LEXICAL_HANDLER) != 0) {
151                 xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", DEFAULT_DECLHANDLER);
152             }
153             if ((handlers & DECLARATION_HANDLER) != 0) {
154                 xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", DEFAULT_DECLHANDLER);
155             }
156             InputSource is = new InputSource(reader);
157             is.setSystemId(systemID);
158             try {
159                 xmlReader.parse(is);
160             } catch (AbortException e) {
161             } // ok
162             reader.close();
163             return this;
164         } catch (SAXParseException e) {
165             throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID + "\tline:\t"
166                 + e.getLineNumber()).initCause(e);
167         } catch (SAXException e) {
168             throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID).initCause(e);
169         } catch (IOException e) {
170             throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + systemID).initCause(e);
171         }
172     }
173 
174     private class MyContentHandler implements ContentHandler, LexicalHandler, DeclHandler, ErrorHandler {
175         StringBuffer chars = new StringBuffer();
176         StringBuffer commentChars = new StringBuffer();
177         Stack<String> startElements = new Stack<String>();
178         StringBuffer tempPath = new StringBuffer();
179         boolean lastIsStart = false;
180 
reset()181         public void reset() {
182             chars.setLength(0);
183             tempPath = new StringBuffer("/");
184             startElements.clear();
185             startElements.push("/");
186         }
187 
characters(char[] ch, int start, int length)188         public void characters(char[] ch, int start, int length) throws SAXException {
189             if (lastIsStart) chars.append(ch, start, length);
190         }
191 
startElement(String namespaceURI, String localName, String qName, Attributes atts)192         public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
193             throws SAXException {
194             tempPath.setLength(0);
195             tempPath.append(startElements.peek()).append('/').append(qName);
196             for (int i = 0; i < atts.getLength(); ++i) {
197                 tempPath.append("[@").append(atts.getQName(i)).append("=\"").append(atts.getValue(i).replace('"', '\'')).append("\"]");
198             }
199             startElements.push(tempPath.toString());
200             chars.setLength(0); // clear garbage
201             lastIsStart = true;
202         }
203 
endElement(String namespaceURI, String localName, String qName)204         public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
205             String startElement = (String) startElements.pop();
206             if (lastIsStart) {
207                 // System.out.println(startElement + ":" + chars);
208                 simpleHandler.handlePathValue(startElement, chars.toString());
209             }
210             chars.setLength(0);
211             lastIsStart = false;
212         }
213 
startDTD(String name, String publicId, String systemId)214         public void startDTD(String name, String publicId, String systemId) throws SAXException {
215             if (SHOW_ALL) Log.logln("startDTD name: " + name
216                 + ", publicId: " + publicId
217                 + ", systemId: " + systemId);
218             simpleHandler.handleStartDtd(name, publicId, systemId);
219         }
220 
endDTD()221         public void endDTD() throws SAXException {
222             if (SHOW_ALL) Log.logln("endDTD");
223             simpleHandler.handleEndDtd();
224         }
225 
comment(char[] ch, int start, int length)226         public void comment(char[] ch, int start, int length) throws SAXException {
227             if (SHOW_ALL) Log.logln(" comment " + new String(ch, start, length));
228             commentChars.append(ch, start, length);
229             simpleHandler.handleComment((String) startElements.peek(), commentChars.toString());
230             commentChars.setLength(0);
231         }
232 
elementDecl(String name, String model)233         public void elementDecl(String name, String model) throws SAXException {
234             simpleHandler.handleElementDecl(name, model);
235         }
236 
attributeDecl(String eName, String aName, String type, String mode, String value)237         public void attributeDecl(String eName, String aName, String type, String mode, String value)
238             throws SAXException {
239             simpleHandler.handleAttributeDecl(eName, aName, type, mode, value);
240         }
241 
242         // ==== The following are just for debuggin =====
243 
startDocument()244         public void startDocument() throws SAXException {
245             if (SHOW_ALL) Log.logln("startDocument");
246         }
247 
ignorableWhitespace(char[] ch, int start, int length)248         public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
249             if (SHOW_ALL) Log.logln("ignorableWhitespace length: " + length);
250         }
251 
endDocument()252         public void endDocument() throws SAXException {
253             if (SHOW_ALL) Log.logln("endDocument");
254         }
255 
internalEntityDecl(String name, String value)256         public void internalEntityDecl(String name, String value) throws SAXException {
257             if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + value);
258         }
259 
externalEntityDecl(String name, String publicId, String systemId)260         public void externalEntityDecl(String name, String publicId, String systemId) throws SAXException {
261             if (SHOW_ALL) Log.logln("Internal Entity\t" + name + "\t" + publicId + "\t" + systemId);
262         }
263 
notationDecl(String name, String publicId, String systemId)264         public void notationDecl(String name, String publicId, String systemId) {
265             if (SHOW_ALL) Log.logln("notationDecl: " + name
266                 + ", " + publicId
267                 + ", " + systemId);
268         }
269 
processingInstruction(String target, String data)270         public void processingInstruction(String target, String data)
271             throws SAXException {
272             if (SHOW_ALL) Log.logln("processingInstruction: " + target + ", " + data);
273         }
274 
skippedEntity(String name)275         public void skippedEntity(String name)
276             throws SAXException {
277             if (SHOW_ALL) Log.logln("skippedEntity: " + name);
278         }
279 
unparsedEntityDecl(String name, String publicId, String systemId, String notationName)280         public void unparsedEntityDecl(String name, String publicId,
281             String systemId, String notationName) {
282             if (SHOW_ALL) Log.logln("unparsedEntityDecl: " + name
283                 + ", " + publicId
284                 + ", " + systemId
285                 + ", " + notationName);
286         }
287 
setDocumentLocator(Locator locator)288         public void setDocumentLocator(Locator locator) {
289             if (SHOW_ALL) Log.logln("setDocumentLocator Locator " + locator);
290         }
291 
startPrefixMapping(String prefix, String uri)292         public void startPrefixMapping(String prefix, String uri) throws SAXException {
293             if (SHOW_ALL) Log.logln("startPrefixMapping prefix: " + prefix +
294                 ", uri: " + uri);
295         }
296 
endPrefixMapping(String prefix)297         public void endPrefixMapping(String prefix) throws SAXException {
298             if (SHOW_ALL) Log.logln("endPrefixMapping prefix: " + prefix);
299         }
300 
startEntity(String name)301         public void startEntity(String name) throws SAXException {
302             if (SHOW_ALL) Log.logln("startEntity name: " + name);
303         }
304 
endEntity(String name)305         public void endEntity(String name) throws SAXException {
306             if (SHOW_ALL) Log.logln("endEntity name: " + name);
307         }
308 
startCDATA()309         public void startCDATA() throws SAXException {
310             if (SHOW_ALL) Log.logln("startCDATA");
311         }
312 
endCDATA()313         public void endCDATA() throws SAXException {
314             if (SHOW_ALL) Log.logln("endCDATA");
315         }
316 
317         /*
318          * (non-Javadoc)
319          *
320          * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException)
321          */
error(SAXParseException exception)322         public void error(SAXParseException exception) throws SAXException {
323             if (SHOW_ALL) Log.logln("error: " + showSAX(exception));
324             throw exception;
325         }
326 
327         /*
328          * (non-Javadoc)
329          *
330          * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException)
331          */
fatalError(SAXParseException exception)332         public void fatalError(SAXParseException exception) throws SAXException {
333             if (SHOW_ALL) Log.logln("fatalError: " + showSAX(exception));
334             throw exception;
335         }
336 
337         /*
338          * (non-Javadoc)
339          *
340          * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException)
341          */
warning(SAXParseException exception)342         public void warning(SAXParseException exception) throws SAXException {
343             if (SHOW_ALL) Log.logln("warning: " + showSAX(exception));
344             throw exception;
345         }
346     }
347 
348     static final class AbortException extends RuntimeException {
349         private static final long serialVersionUID = 1L;
350     }
351 
352     /**
353      * Show a SAX exception in a readable form.
354      */
showSAX(SAXParseException exception)355     public static String showSAX(SAXParseException exception) {
356         return exception.getMessage()
357             + ";\t SystemID: " + exception.getSystemId()
358             + ";\t PublicID: " + exception.getPublicId()
359             + ";\t LineNumber: " + exception.getLineNumber()
360             + ";\t ColumnNumber: " + exception.getColumnNumber();
361     }
362 
createXMLReader(boolean validating)363     public static XMLReader createXMLReader(boolean validating) {
364         // weiv 07/20/2007: The laundry list below is somewhat obsolete
365         // I have moved the system's default parser (instantiated when "" is
366         // passed) to the top, so that we will always use that. I have also
367         // removed "org.apache.crimson.parser.XMLReaderImpl" as this one gets
368         // confused regarding UTF-8 encoding name.
369         String[] testList = {
370             System.getProperty("CLDR_DEFAULT_SAX_PARSER", ""), // defaults to "", system default.
371             "org.apache.xerces.parsers.SAXParser",
372             "gnu.xml.aelfred2.XmlReader",
373             "com.bluecast.xml.Piccolo",
374             "oracle.xml.parser.v2.SAXParser"
375         };
376         XMLReader result = null;
377         for (int i = 0; i < testList.length; ++i) {
378             try {
379                 result = (testList[i].length() != 0)
380                     ? XMLReaderFactory.createXMLReader(testList[i])
381                     : XMLReaderFactory.createXMLReader();
382                 result.setFeature("http://xml.org/sax/features/validation", validating);
383                 break;
384             } catch (SAXException e1) {
385             }
386         }
387         if (result == null)
388             throw new NoClassDefFoundError("No SAX parser is available, or unable to set validation correctly");
389         try {
390             result.setEntityResolver(new CachingEntityResolver());
391         } catch (Throwable e) {
392             System.err
393                 .println("WARNING: Can't set caching entity resolver  -  error "
394                     + e.toString());
395             e.printStackTrace();
396         }
397         return result;
398     }
399 
400     static final class DebuggingInputStream extends InputStream {
401         InputStream contents;
402 
close()403         public void close() throws IOException {
404             contents.close();
405         }
406 
DebuggingInputStream(InputStream fis)407         public DebuggingInputStream(InputStream fis) {
408             contents = fis;
409         }
410 
read()411         public int read() throws IOException {
412             int x = contents.read();
413             System.out.println(Integer.toHexString(x) + ",");
414             return x;
415         }
416     }
417 
418     static final class FilterBomInputStream extends InputStream {
419         InputStream contents;
420         boolean first = true;
421 
close()422         public void close() throws IOException {
423             contents.close();
424         }
425 
FilterBomInputStream(InputStream fis)426         public FilterBomInputStream(InputStream fis) {
427             contents = fis;
428         }
429 
read()430         public int read() throws IOException {
431             int x = contents.read();
432             if (first) {
433                 first = false;
434                 // 0xEF,0xBB,0xBF
435                 // SKIP bom
436                 if (x == 0xEF) {
437                     int y = contents.read();
438                     if (y == 0xBB) {
439                         int z = contents.read();
440                         if (z == 0xBF) {
441                             x = contents.read();
442                         }
443                     }
444                 }
445             }
446             return x;
447         }
448     }
449 
loadPathValues(String filename, List<Pair<String, String>> data, boolean validating)450     public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating) {
451         return loadPathValues(filename, data, validating, false);
452     }
453 
loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full)454     public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full) {
455         return loadPathValues(filename, data, validating, full, null);
456     }
457 
loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full, Function<String, String> valueFilter)458     public static List<Pair<String, String>> loadPathValues(String filename, List<Pair<String, String>> data, boolean validating, boolean full,
459         Function<String, String> valueFilter) {
460         try {
461             new XMLFileReader()
462                 .setHandler(new PathValueListHandler(data, full, valueFilter))
463                 .read(filename, -1, validating);
464             return data;
465         } catch (Exception e) {
466             throw new IllegalArgumentException(filename, e);
467         }
468     }
469 
470     static final class PathValueListHandler extends SimpleHandler {
471         List<Pair<String, String>> data;
472         boolean full;
473         private Function<String, String> valueFilter;
474 
PathValueListHandler(List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter)475         public PathValueListHandler(List<Pair<String, String>> data, boolean full, Function<String, String> valueFilter) {
476             super();
477             this.data = data != null ? data : new ArrayList<Pair<String, String>>();
478             this.full = full;
479             this.valueFilter = valueFilter;
480         }
481 
482         @Override
handlePathValue(String path, String value)483         public void handlePathValue(String path, String value) {
484             if (valueFilter == null) {
485                 data.add(Pair.of(path, value));
486             } else {
487                 String filteredValue = valueFilter.apply(value);
488                 if (filteredValue != null) {
489                     data.add(Pair.of(path, filteredValue));
490                 }
491             }
492         }
493 
494         @Override
handleComment(String path, String comment)495         public void handleComment(String path, String comment) {
496             if (!full || path.equals("/")) {
497                 return;
498             }
499             data.add(Pair.of("!", comment));
500         }
501     }
502 }