1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package org.apache.harmony.xml.dom;
18 
19 import java.util.Map;
20 import java.util.TreeMap;
21 import org.w3c.dom.DOMConfiguration;
22 import org.w3c.dom.DOMError;
23 import org.w3c.dom.DOMErrorHandler;
24 import org.w3c.dom.DOMException;
25 import org.w3c.dom.DOMStringList;
26 import org.w3c.dom.NamedNodeMap;
27 import org.w3c.dom.Node;
28 
29 /**
30  * A minimal implementation of DOMConfiguration. This implementation uses inner
31  * parameter instances to centralize each parameter's behavior.
32  */
33 public final class DOMConfigurationImpl implements DOMConfiguration {
34 
35     private static final Map<String, Parameter> PARAMETERS
36             = new TreeMap<String, Parameter>(String.CASE_INSENSITIVE_ORDER);
37 
38     static {
39         /*
40          * True to canonicalize the document (unsupported). This includes
41          * removing DocumentType nodes from the tree and removing unused
42          * namespace declarations. Setting this to true also sets these
43          * parameters:
44          *   entities = false
45          *   normalize-characters = false
46          *   cdata-sections = false
47          *   namespaces = true
48          *   namespace-declarations = true
49          *   well-formed = true
50          *   element-content-whitespace = true
51          * Setting these parameters to another value shall revert the canonical
52          * form to false.
53          */
54         PARAMETERS.put("canonical-form", new FixedParameter(false));
55 
56         /*
57          * True to keep existing CDATA nodes; false to replace them/merge them
58          * into adjacent text nodes.
59          */
60         PARAMETERS.put("cdata-sections", new BooleanParameter() {
61             public Object get(DOMConfigurationImpl config) {
62                 return config.cdataSections;
63             }
64             public void set(DOMConfigurationImpl config, Object value) {
65                 config.cdataSections = (Boolean) value;
66             }
67         });
68 
69         /*
70          * True to check character normalization (unsupported).
71          */
72         PARAMETERS.put("check-character-normalization", new FixedParameter(false));
73 
74         /*
75          * True to keep comments in the document; false to discard them.
76          */
77         PARAMETERS.put("comments", new BooleanParameter() {
78             public Object get(DOMConfigurationImpl config) {
79                 return config.comments;
80             }
81             public void set(DOMConfigurationImpl config, Object value) {
82                 config.comments = (Boolean) value;
83             }
84         });
85 
86         /*
87          * True to expose schema normalized values. Setting this to true sets
88          * the validate parameter to true. Has no effect when validate is false.
89          */
90         PARAMETERS.put("datatype-normalization", new BooleanParameter() {
91             public Object get(DOMConfigurationImpl config) {
92                 return config.datatypeNormalization;
93             }
94             public void set(DOMConfigurationImpl config, Object value) {
95                 if ((Boolean) value) {
96                     config.datatypeNormalization = true;
97                     config.validate = true;
98                 } else {
99                     config.datatypeNormalization = false;
100                 }
101             }
102         });
103 
104         /*
105          * True to keep whitespace elements in the document; false to discard
106          * them (unsupported).
107          */
108         PARAMETERS.put("element-content-whitespace", new FixedParameter(true));
109 
110         /*
111          * True to keep entity references in the document; false to expand them.
112          */
113         PARAMETERS.put("entities", new BooleanParameter() {
114             public Object get(DOMConfigurationImpl config) {
115                 return config.entities;
116             }
117             public void set(DOMConfigurationImpl config, Object value) {
118                 config.entities = (Boolean) value;
119             }
120         });
121 
122         /*
123          * Handler to be invoked when errors are encountered.
124          */
125         PARAMETERS.put("error-handler", new Parameter() {
126             public Object get(DOMConfigurationImpl config) {
127                 return config.errorHandler;
128             }
129             public void set(DOMConfigurationImpl config, Object value) {
130                 config.errorHandler = (DOMErrorHandler) value;
131             }
132             public boolean canSet(DOMConfigurationImpl config, Object value) {
133                 return value == null || value instanceof DOMErrorHandler;
134             }
135         });
136 
137         /*
138          * Bulk alias to set the following parameter values:
139          *   validate-if-schema = false
140          *   entities = false
141          *   datatype-normalization = false
142          *   cdata-sections = false
143          *   namespace-declarations = true
144          *   well-formed = true
145          *   element-content-whitespace = true
146          *   comments = true
147          *   namespaces = true.
148          * Querying this returns true if all of the above parameters have the
149          * listed values; false otherwise.
150          */
151         PARAMETERS.put("infoset", new BooleanParameter() {
152             public Object get(DOMConfigurationImpl config) {
153                 // validate-if-schema is always false
154                 // element-content-whitespace is always true
155                 // namespace-declarations is always true
156                 return !config.entities
157                         && !config.datatypeNormalization
158                         && !config.cdataSections
159                         && config.wellFormed
160                         && config.comments
161                         && config.namespaces;
162             }
163             public void set(DOMConfigurationImpl config, Object value) {
164                 if ((Boolean) value) {
165                     // validate-if-schema is always false
166                     // element-content-whitespace is always true
167                     // namespace-declarations is always true
168                     config.entities = false;
169                     config.datatypeNormalization = false;
170                     config.cdataSections = false;
171                     config.wellFormed = true;
172                     config.comments = true;
173                     config.namespaces = true;
174                 }
175             }
176         });
177 
178         /*
179          * True to perform namespace processing; false for none.
180          */
181         PARAMETERS.put("namespaces", new BooleanParameter() {
182             public Object get(DOMConfigurationImpl config) {
183                 return config.namespaces;
184             }
185             public void set(DOMConfigurationImpl config, Object value) {
186                 config.namespaces = (Boolean) value;
187             }
188         });
189 
190         /**
191          * True to include namespace declarations; false to discard them
192          * (unsupported). Even when namespace declarations are discarded,
193          * prefixes are retained.
194          *
195          * Has no effect if namespaces is false.
196          */
197         PARAMETERS.put("namespace-declarations", new FixedParameter(true));
198 
199         /*
200          * True to fully normalize characters (unsupported).
201          */
202         PARAMETERS.put("normalize-characters", new FixedParameter(false));
203 
204         /*
205          * A list of whitespace-separated URIs representing the schemas to validate
206          * against. Has no effect if schema-type is null.
207          */
208         PARAMETERS.put("schema-location", new Parameter() {
209             public Object get(DOMConfigurationImpl config) {
210                 return config.schemaLocation;
211             }
212             public void set(DOMConfigurationImpl config, Object value) {
213                 config.schemaLocation = (String) value;
214             }
215             public boolean canSet(DOMConfigurationImpl config, Object value) {
216                 return value == null || value instanceof String;
217             }
218         });
219 
220         /*
221          * URI representing the type of schema language, such as
222          * "http://www.w3.org/2001/XMLSchema" or "http://www.w3.org/TR/REC-xml".
223          */
224         PARAMETERS.put("schema-type", new Parameter() {
225             public Object get(DOMConfigurationImpl config) {
226                 return config.schemaType;
227             }
228             public void set(DOMConfigurationImpl config, Object value) {
229                 config.schemaType = (String) value;
230             }
231             public boolean canSet(DOMConfigurationImpl config, Object value) {
232                 return value == null || value instanceof String;
233             }
234         });
235 
236         /*
237          * True to split CDATA sections containing "]]>"; false to signal an
238          * error instead.
239          */
240         PARAMETERS.put("split-cdata-sections", new BooleanParameter() {
241             public Object get(DOMConfigurationImpl config) {
242                 return config.splitCdataSections;
243             }
244             public void set(DOMConfigurationImpl config, Object value) {
245                 config.splitCdataSections = (Boolean) value;
246             }
247         });
248 
249         /*
250          * True to require validation against a schema or DTD. Validation will
251          * recompute element content whitespace, ID and schema type data.
252          *
253          * Setting this unsets validate-if-schema.
254          */
255         PARAMETERS.put("validate", new BooleanParameter() {
256             public Object get(DOMConfigurationImpl config) {
257                 return config.validate;
258             }
259             public void set(DOMConfigurationImpl config, Object value) {
260                 // validate-if-schema is always false
261                 config.validate = (Boolean) value;
262             }
263         });
264 
265         /*
266          * True to validate if a schema was declared (unsupported). Setting this
267          * unsets validate.
268          */
269         PARAMETERS.put("validate-if-schema", new FixedParameter(false));
270 
271         /*
272          * True to report invalid characters in node names, attributes, elements,
273          * comments, text, CDATA sections and processing instructions.
274          */
275         PARAMETERS.put("well-formed", new BooleanParameter() {
276             public Object get(DOMConfigurationImpl config) {
277                 return config.wellFormed;
278             }
279             public void set(DOMConfigurationImpl config, Object value) {
280                 config.wellFormed = (Boolean) value;
281             }
282         });
283 
284         // TODO add "resource-resolver" property for use with LS feature...
285     }
286 
287     private boolean cdataSections = true;
288     private boolean comments = true;
289     private boolean datatypeNormalization = false;
290     private boolean entities = true;
291     private DOMErrorHandler errorHandler;
292     private boolean namespaces = true;
293     private String schemaLocation;
294     private String schemaType;
295     private boolean splitCdataSections = true;
296     private boolean validate = false;
297     private boolean wellFormed = true;
298 
299     interface Parameter {
get(DOMConfigurationImpl config)300         Object get(DOMConfigurationImpl config);
set(DOMConfigurationImpl config, Object value)301         void set(DOMConfigurationImpl config, Object value);
canSet(DOMConfigurationImpl config, Object value)302         boolean canSet(DOMConfigurationImpl config, Object value);
303     }
304 
305     static class FixedParameter implements Parameter {
306         final Object onlyValue;
FixedParameter(Object onlyValue)307         FixedParameter(Object onlyValue) {
308             this.onlyValue = onlyValue;
309         }
get(DOMConfigurationImpl config)310         public Object get(DOMConfigurationImpl config) {
311             return onlyValue;
312         }
set(DOMConfigurationImpl config, Object value)313         public void set(DOMConfigurationImpl config, Object value) {
314             if (!onlyValue.equals(value)) {
315                 throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
316                         "Unsupported value: " + value);
317             }
318         }
canSet(DOMConfigurationImpl config, Object value)319         public boolean canSet(DOMConfigurationImpl config, Object value) {
320             return onlyValue.equals(value);
321         }
322     }
323 
324     static abstract class BooleanParameter implements Parameter {
canSet(DOMConfigurationImpl config, Object value)325         public boolean canSet(DOMConfigurationImpl config, Object value) {
326             return value instanceof Boolean;
327         }
328     }
329 
canSetParameter(String name, Object value)330     public boolean canSetParameter(String name, Object value) {
331         Parameter parameter = PARAMETERS.get(name);
332         return parameter != null && parameter.canSet(this, value);
333     }
334 
setParameter(String name, Object value)335     public void setParameter(String name, Object value) throws DOMException {
336         Parameter parameter = PARAMETERS.get(name);
337         if (parameter == null) {
338             throw new DOMException(DOMException.NOT_FOUND_ERR, "No such parameter: " + name);
339         }
340         try {
341             parameter.set(this, value);
342         } catch (NullPointerException e) {
343             throw new DOMException(DOMException.TYPE_MISMATCH_ERR,
344                     "Null not allowed for " + name);
345         } catch (ClassCastException e) {
346             throw new DOMException(DOMException.TYPE_MISMATCH_ERR,
347                     "Invalid type for " + name + ": " + value.getClass());
348         }
349     }
350 
getParameter(String name)351     public Object getParameter(String name) throws DOMException {
352         Parameter parameter = PARAMETERS.get(name);
353         if (parameter == null) {
354             throw new DOMException(DOMException.NOT_FOUND_ERR, "No such parameter: " + name);
355         }
356         return parameter.get(this);
357     }
358 
getParameterNames()359     public DOMStringList getParameterNames() {
360         final String[] result = PARAMETERS.keySet().toArray(new String[PARAMETERS.size()]);
361         return new DOMStringList() {
362             public String item(int index) {
363                 return index < result.length ? result[index] : null;
364             }
365             public int getLength() {
366                 return result.length;
367             }
368             public boolean contains(String str) {
369                 return PARAMETERS.containsKey(str); // case-insensitive.
370             }
371         };
372     }
373 
374     public void normalize(Node node) {
375         /*
376          * Since we don't validate, this code doesn't take into account the
377          * following "supported" parameters: datatype-normalization, entities,
378          * schema-location, schema-type, or validate.
379          *
380          * TODO: normalize namespaces
381          */
382 
383         switch (node.getNodeType()) {
384             case Node.CDATA_SECTION_NODE:
385                 CDATASectionImpl cdata = (CDATASectionImpl) node;
386                 if (cdataSections) {
387                     if (cdata.needsSplitting()) {
388                         if (splitCdataSections) {
389                             cdata.split();
390                             report(DOMError.SEVERITY_WARNING, "cdata-sections-splitted");
391                         } else {
392                             report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
393                         }
394                     }
395                     checkTextValidity(cdata.buffer);
396                     break;
397                 }
398                 node = cdata.replaceWithText();
399                 // fall through
400 
401             case Node.TEXT_NODE:
402                 TextImpl text = (TextImpl) node;
403                 text = text.minimize();
404                 if (text != null) {
405                     checkTextValidity(text.buffer);
406                 }
407                 break;
408 
409             case Node.COMMENT_NODE:
410                 CommentImpl comment = (CommentImpl) node;
411                 if (!comments) {
412                     comment.getParentNode().removeChild(comment);
413                     break;
414                 }
415                 if (comment.containsDashDash()) {
416                     report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
417                 }
418                 checkTextValidity(comment.buffer);
419                 break;
420 
421             case Node.PROCESSING_INSTRUCTION_NODE:
422                 checkTextValidity(((ProcessingInstructionImpl) node).getData());
423                 break;
424 
425             case Node.ATTRIBUTE_NODE:
426                 checkTextValidity(((AttrImpl) node).getValue());
427                 break;
428 
429             case Node.ELEMENT_NODE:
430                 ElementImpl element = (ElementImpl) node;
431                 NamedNodeMap attributes = element.getAttributes();
432                 for (int i = 0; i < attributes.getLength(); i++) {
433                     normalize(attributes.item(i));
434                 }
435                 // fall through
436 
437             case Node.DOCUMENT_NODE:
438             case Node.DOCUMENT_FRAGMENT_NODE:
439                 Node next;
440                 for (Node child = node.getFirstChild(); child != null; child = next) {
441                     // lookup next eagerly because normalize() may remove its subject
442                     next = child.getNextSibling();
443                     normalize(child);
444                 }
445                 break;
446 
447             case Node.NOTATION_NODE:
448             case Node.DOCUMENT_TYPE_NODE:
449             case Node.ENTITY_NODE:
450             case Node.ENTITY_REFERENCE_NODE:
451                 break;
452 
453             default:
454                 throw new DOMException(DOMException.NOT_SUPPORTED_ERR,
455                         "Unsupported node type " + node.getNodeType());
456         }
457     }
458 
459     private void checkTextValidity(CharSequence s) {
460         if (wellFormed && !isValid(s)) {
461             report(DOMError.SEVERITY_ERROR, "wf-invalid-character");
462         }
463     }
464 
465     /**
466      * Returns true if all of the characters in the text are permitted for use
467      * in XML documents.
468      */
469     private boolean isValid(CharSequence text) {
470         for (int i = 0; i < text.length(); i++) {
471             char c = text.charAt(i);
472             // as defined by http://www.w3.org/TR/REC-xml/#charsets.
473             boolean valid = c == 0x9 || c == 0xA || c == 0xD
474                     || (c >= 0x20 && c <= 0xd7ff)
475                     || (c >= 0xe000 && c <= 0xfffd);
476             if (!valid) {
477                 return false;
478             }
479         }
480         return true;
481     }
482 
483     private void report(short severity, String type) {
484         if (errorHandler != null) {
485             // TODO: abort if handleError returns false
486             errorHandler.handleError(new DOMErrorImpl(severity, type));
487         }
488     }
489 }
490