1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package org.apache.harmony.xml.dom;
18 
19 import java.util.Map;
20 import java.util.TreeMap;
21 import org.w3c.dom.DOMConfiguration;
22 import org.w3c.dom.DOMError;
23 import org.w3c.dom.DOMErrorHandler;
24 import org.w3c.dom.DOMException;
25 import org.w3c.dom.DOMStringList;
26 import org.w3c.dom.NamedNodeMap;
27 import org.w3c.dom.Node;
28 
29 /**
30  * A minimal implementation of DOMConfiguration. This implementation uses inner
31  * parameter instances to centralize each parameter's behavior.
32  */
33 public final class DOMConfigurationImpl implements DOMConfiguration {
34 
35     private static final Map<String, Parameter> PARAMETERS
36             = new TreeMap<String, Parameter>(String.CASE_INSENSITIVE_ORDER);
37 
38     static {
39         /*
40          * True to canonicalize the document (unsupported). This includes
41          * removing DocumentType nodes from the tree and removing unused
42          * namespace declarations. Setting this to true also sets these
43          * parameters:
44          *   entities = false
45          *   normalize-characters = false
46          *   cdata-sections = false
47          *   namespaces = true
48          *   namespace-declarations = true
49          *   well-formed = true
50          *   element-content-whitespace = true
51          * Setting these parameters to another value shall revert the canonical
52          * form to false.
53          */
54         PARAMETERS.put("canonical-form", new FixedParameter(false));
55 
56         /*
57          * True to keep existing CDATA nodes; false to replace them/merge them
58          * into adjacent text nodes.
59          */
60         PARAMETERS.put("cdata-sections", new BooleanParameter() {
61             public Object get(DOMConfigurationImpl config) {
62                 return config.cdataSections;
63             }
64             public void set(DOMConfigurationImpl config, Object value) {
65                 config.cdataSections = (Boolean) value;
66             }
67         });
68 
69         /*
70          * True to check character normalization (unsupported).
71          */
72         PARAMETERS.put("check-character-normalization", new FixedParameter(false));
73 
74         /*
75          * True to keep comments in the document; false to discard them.
76          */
77         PARAMETERS.put("comments", new BooleanParameter() {
78             public Object get(DOMConfigurationImpl config) {
79                 return config.comments;
80             }
81             public void set(DOMConfigurationImpl config, Object value) {
82                 config.comments = (Boolean) value;
83             }
84         });
85 
86         /*
87          * True to expose schema normalized values. Setting this to true sets
88          * the validate parameter to true. Has no effect when validate is false.
89          */
90         PARAMETERS.put("datatype-normalization", new BooleanParameter() {
91             public Object get(DOMConfigurationImpl config) {
92                 return config.datatypeNormalization;
93             }
94             public void set(DOMConfigurationImpl config, Object value) {
95                 if ((Boolean) value) {
96                     config.datatypeNormalization = true;
97                     config.validate = true;
98                 } else {
99                     config.datatypeNormalization = false;
100                 }
101             }
102         });
103 
104         /*
105          * True to keep whitespace elements in the document; false to discard
106          * them (unsupported).
107          */
108         PARAMETERS.put("element-content-whitespace", new FixedParameter(true));
109 
110         /*
111          * True to keep entity references in the document; false to expand them.
112          */
113         PARAMETERS.put("entities", new BooleanParameter() {
114             public Object get(DOMConfigurationImpl config) {
115                 return config.entities;
116             }
117             public void set(DOMConfigurationImpl config, Object value) {
118                 config.entities = (Boolean) value;
119             }
120         });
121 
122         /*
123          * Handler to be invoked when errors are encountered.
124          */
125         PARAMETERS.put("error-handler", new Parameter() {
126             public Object get(DOMConfigurationImpl config) {
127                 return config.errorHandler;
128             }
129             public void set(DOMConfigurationImpl config, Object value) {
130                 config.errorHandler = (DOMErrorHandler) value;
131             }
132             public boolean canSet(DOMConfigurationImpl config, Object value) {
133                 return value == null || value instanceof DOMErrorHandler;
134             }
135         });
136 
137         /*
138          * Bulk alias to set the following parameter values:
139          *   validate-if-schema = false
140          *   entities = false
141          *   datatype-normalization = false
142          *   cdata-sections = false
143          *   namespace-declarations = true
144          *   well-formed = true
145          *   element-content-whitespace = true
146          *   comments = true
147          *   namespaces = true.
148          * Querying this returns true if all of the above parameters have the
149          * listed values; false otherwise.
150          */
151         PARAMETERS.put("infoset", new BooleanParameter() {
152             public Object get(DOMConfigurationImpl config) {
153                 // validate-if-schema is always false
154                 // element-content-whitespace is always true
155                 // namespace-declarations is always true
156                 return !config.entities
157                         && !config.datatypeNormalization
158                         && !config.cdataSections
159                         && config.wellFormed
160                         && config.comments
161                         && config.namespaces;
162             }
163             public void set(DOMConfigurationImpl config, Object value) {
164                 if ((Boolean) value) {
165                     // validate-if-schema is always false
166                     // element-content-whitespace is always true
167                     // namespace-declarations is always true
168                     config.entities = false;
169                     config.datatypeNormalization = false;
170                     config.cdataSections = false;
171                     config.wellFormed = true;
172                     config.comments = true;
173                     config.namespaces = true;
174                 }
175             }
176         });
177 
178         /*
179          * True to perform namespace processing; false for none.
180          */
181         PARAMETERS.put("namespaces", new BooleanParameter() {
182             public Object get(DOMConfigurationImpl config) {
183                 return config.namespaces;
184             }
185             public void set(DOMConfigurationImpl config, Object value) {
186                 config.namespaces = (Boolean) value;
187             }
188         });
189 
190         /**
191          * True to include namespace declarations; false to discard them
192          * (unsupported). Even when namespace declarations are discarded,
193          * prefixes are retained.
194          *
195          * Has no effect if namespaces is false.
196          */
197         PARAMETERS.put("namespace-declarations", new FixedParameter(true));
198 
199         /*
200          * True to fully normalize characters (unsupported).
201          */
202         PARAMETERS.put("normalize-characters", new FixedParameter(false));
203 
204         /*
205          * A list of whitespace-separated URIs representing the schemas to validate
206          * against. Has no effect if schema-type is null.
207          */
208         PARAMETERS.put("schema-location", new Parameter() {
209             public Object get(DOMConfigurationImpl config) {
210                 return config.schemaLocation;
211             }
212             public void set(DOMConfigurationImpl config, Object value) {
213                 config.schemaLocation = (String) value;
214             }
215             public boolean canSet(DOMConfigurationImpl config, Object value) {
216                 return value == null || value instanceof String;
217             }
218         });
219 
220         /*
221          * URI representing the type of schema language, such as
222          * "http://www.w3.org/2001/XMLSchema" or "http://www.w3.org/TR/REC-xml".
223          */
224         PARAMETERS.put("schema-type", new Parameter() {
225             public Object get(DOMConfigurationImpl config) {
226                 return config.schemaType;
227             }
228             public void set(DOMConfigurationImpl config, Object value) {
229                 config.schemaType = (String) value;
230             }
231             public boolean canSet(DOMConfigurationImpl config, Object value) {
232                 return value == null || value instanceof String;
233             }
234         });
235 
236         /*
237          * True to split CDATA sections containing "]]>"; false to signal an
238          * error instead.
239          */
240         PARAMETERS.put("split-cdata-sections", new BooleanParameter() {
241             public Object get(DOMConfigurationImpl config) {
242                 return config.splitCdataSections;
243             }
244             public void set(DOMConfigurationImpl config, Object value) {
245                 config.splitCdataSections = (Boolean) value;
246             }
247         });
248 
249         /*
250          * True to require validation against a schema or DTD. Validation will
251          * recompute element content whitespace, ID and schema type data.
252          *
253          * Setting this unsets validate-if-schema.
254          */
255         PARAMETERS.put("validate", new BooleanParameter() {
256             public Object get(DOMConfigurationImpl config) {
257                 return config.validate;
258             }
259             public void set(DOMConfigurationImpl config, Object value) {
260                 // validate-if-schema is always false
261                 config.validate = (Boolean) value;
262             }
263         });
264 
265         /*
266          * True to validate if a schema was declared (unsupported). Setting this
267          * unsets validate.
268          */
269         PARAMETERS.put("validate-if-schema", new FixedParameter(false));
270 
271         /*
272          * True to report invalid characters in node names, attributes, elements,
273          * comments, text, CDATA sections and processing instructions.
274          */
275         PARAMETERS.put("well-formed", new BooleanParameter() {
276             public Object get(DOMConfigurationImpl config) {
277                 return config.wellFormed;
278             }
279             public void set(DOMConfigurationImpl config, Object value) {
280                 config.wellFormed = (Boolean) value;
281             }
282         });
283 
284         // TODO add "resource-resolver" property for use with LS feature...
285     }
286 
287     private boolean cdataSections = true;
288     private boolean comments = true;
289     private boolean datatypeNormalization = false;
290     private boolean entities = true;
291     private DOMErrorHandler errorHandler;
292     private boolean namespaces = true;
293     private String schemaLocation;
294     private String schemaType;
295     private boolean splitCdataSections = true;
296     private boolean validate = false;
297     private boolean wellFormed = true;
298 
299     interface Parameter {
get(DOMConfigurationImpl config)300         Object get(DOMConfigurationImpl config);
set(DOMConfigurationImpl config, Object value)301         void set(DOMConfigurationImpl config, Object value);
canSet(DOMConfigurationImpl config, Object value)302         boolean canSet(DOMConfigurationImpl config, Object value);
303     }
304 
305     static class FixedParameter implements Parameter {
306         final Object onlyValue;
FixedParameter(Object onlyValue)307         FixedParameter(Object onlyValue) {
308             this.onlyValue = onlyValue;
309         }
get(DOMConfigurationImpl config)310         public Object get(DOMConfigurationImpl config) {
311             return onlyValue;
312         }
set(DOMConfigurationImpl config, Object value)313         public void set(DOMConfigurationImpl config, Object value) {
314             if (!onlyValue.equals(value)) {
315                 throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
316                         "Unsupported value: " + value);
317             }
318         }
canSet(DOMConfigurationImpl config, Object value)319         public boolean canSet(DOMConfigurationImpl config, Object value) {
320             return onlyValue.equals(value);
321         }
322     }
323 
324     static abstract class BooleanParameter implements Parameter {
canSet(DOMConfigurationImpl config, Object value)325         public boolean canSet(DOMConfigurationImpl config, Object value) {
326             return value instanceof Boolean;
327         }
328     }
329 
canSetParameter(String name, Object value)330     public boolean canSetParameter(String name, Object value) {
331         Parameter parameter = PARAMETERS.get(name);
332         return parameter != null && parameter.canSet(this, value);
333     }
334 
setParameter(String name, Object value)335     public void setParameter(String name, Object value) throws DOMException {
336         Parameter parameter = PARAMETERS.get(name);
337         if (parameter == null) {
338             throw new DOMException(DOMException.NOT_FOUND_ERR, "No such parameter: " + name);
339         }
340         try {
341             parameter.set(this, value);
342         } catch (NullPointerException e) {
343             throw new DOMException(DOMException.TYPE_MISMATCH_ERR,
344                     "Null not allowed for " + name);
345         } catch (ClassCastException e) {
346             throw new DOMException(DOMException.TYPE_MISMATCH_ERR,
347                     "Invalid type for " + name + ": " + value.getClass());
348         }
349     }
350 
getParameter(String name)351     public Object getParameter(String name) throws DOMException {
352         Parameter parameter = PARAMETERS.get(name);
353         if (parameter == null) {
354             throw new DOMException(DOMException.NOT_FOUND_ERR, "No such parameter: " + name);
355         }
356         return parameter.get(this);
357     }
358 
getParameterNames()359     public DOMStringList getParameterNames() {
360         return internalGetParameterNames();
361     }
362 
internalGetParameterNames()363     private static DOMStringList internalGetParameterNames() {
364         final String[] result = PARAMETERS.keySet().toArray(new String[PARAMETERS.size()]);
365         return new DOMStringList() {
366             public String item(int index) {
367                 return index < result.length ? result[index] : null;
368             }
369             public int getLength() {
370                 return result.length;
371             }
372             public boolean contains(String str) {
373                 return PARAMETERS.containsKey(str); // case-insensitive.
374             }
375         };
376     }
377 
378     public void normalize(Node node) {
379         /*
380          * Since we don't validate, this code doesn't take into account the
381          * following "supported" parameters: datatype-normalization, entities,
382          * schema-location, schema-type, or validate.
383          *
384          * TODO: normalize namespaces
385          */
386 
387         switch (node.getNodeType()) {
388             case Node.CDATA_SECTION_NODE:
389                 CDATASectionImpl cdata = (CDATASectionImpl) node;
390                 if (cdataSections) {
391                     if (cdata.needsSplitting()) {
392                         if (splitCdataSections) {
393                             cdata.split();
394                             report(DOMError.SEVERITY_WARNING, "cdata-sections-splitted");
395                         } else {
396                             report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
397                         }
398                     }
399                     checkTextValidity(cdata.buffer);
400                     break;
401                 }
402                 node = cdata.replaceWithText();
403                 // fall through
404 
405             case Node.TEXT_NODE:
406                 TextImpl text = (TextImpl) node;
407                 text = text.minimize();
408                 if (text != null) {
409                     checkTextValidity(text.buffer);
410                 }
411                 break;
412 
413             case Node.COMMENT_NODE:
414                 CommentImpl comment = (CommentImpl) node;
415                 if (!comments) {
416                     comment.getParentNode().removeChild(comment);
417                     break;
418                 }
419                 if (comment.containsDashDash()) {
420                     report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
421                 }
422                 checkTextValidity(comment.buffer);
423                 break;
424 
425             case Node.PROCESSING_INSTRUCTION_NODE:
426                 checkTextValidity(((ProcessingInstructionImpl) node).getData());
427                 break;
428 
429             case Node.ATTRIBUTE_NODE:
430                 checkTextValidity(((AttrImpl) node).getValue());
431                 break;
432 
433             case Node.ELEMENT_NODE:
434                 ElementImpl element = (ElementImpl) node;
435                 NamedNodeMap attributes = element.getAttributes();
436                 for (int i = 0; i < attributes.getLength(); i++) {
437                     normalize(attributes.item(i));
438                 }
439                 // fall through
440 
441             case Node.DOCUMENT_NODE:
442             case Node.DOCUMENT_FRAGMENT_NODE:
443                 Node next;
444                 for (Node child = node.getFirstChild(); child != null; child = next) {
445                     // lookup next eagerly because normalize() may remove its subject
446                     next = child.getNextSibling();
447                     normalize(child);
448                 }
449                 break;
450 
451             case Node.NOTATION_NODE:
452             case Node.DOCUMENT_TYPE_NODE:
453             case Node.ENTITY_NODE:
454             case Node.ENTITY_REFERENCE_NODE:
455                 break;
456 
457             default:
458                 throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
459                         "Unsupported node type " + node.getNodeType());
460         }
461     }
462 
463     private void checkTextValidity(CharSequence s) {
464         if (wellFormed && !isValid(s)) {
465             report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
466         }
467     }
468 
469     /**
470      * Returns true if all of the characters in the text are permitted for use
471      * in XML documents.
472      */
473     private boolean isValid(CharSequence text) {
474         for (int i = 0; i < text.length(); i++) {
475             char c = text.charAt(i);
476             // as defined by http://www.w3.org/TR/REC-xml/#charsets.
477             boolean valid = c == 0x9 || c == 0xA || c == 0xD
478                     || (c >= 0x20 && c <= 0xd7ff)
479                     || (c >= 0xe000 && c <= 0xfffd);
480             if (!valid) {
481                 return false;
482             }
483         }
484         return true;
485     }
486 
487     private void report(short severity, String type) {
488         if (errorHandler != null) {
489             // TODO: abort if handleError returns false
490             errorHandler.handleError(new DOMErrorImpl(severity, type));
491         }
492     }
493 }
494