1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package org.apache.harmony.xml.dom; 18 19 import java.util.Map; 20 import java.util.TreeMap; 21 import org.w3c.dom.DOMConfiguration; 22 import org.w3c.dom.DOMError; 23 import org.w3c.dom.DOMErrorHandler; 24 import org.w3c.dom.DOMException; 25 import org.w3c.dom.DOMStringList; 26 import org.w3c.dom.NamedNodeMap; 27 import org.w3c.dom.Node; 28 29 /** 30 * A minimal implementation of DOMConfiguration. This implementation uses inner 31 * parameter instances to centralize each parameter's behavior. 32 */ 33 public final class DOMConfigurationImpl implements DOMConfiguration { 34 35 private static final Map<String, Parameter> PARAMETERS 36 = new TreeMap<String, Parameter>(String.CASE_INSENSITIVE_ORDER); 37 38 static { 39 /* 40 * True to canonicalize the document (unsupported). This includes 41 * removing DocumentType nodes from the tree and removing unused 42 * namespace declarations. Setting this to true also sets these 43 * parameters: 44 * entities = false 45 * normalize-characters = false 46 * cdata-sections = false 47 * namespaces = true 48 * namespace-declarations = true 49 * well-formed = true 50 * element-content-whitespace = true 51 * Setting these parameters to another value shall revert the canonical 52 * form to false. 53 */ 54 PARAMETERS.put("canonical-form", new FixedParameter(false)); 55 56 /* 57 * True to keep existing CDATA nodes; false to replace them/merge them 58 * into adjacent text nodes. 59 */ 60 PARAMETERS.put("cdata-sections", new BooleanParameter() { 61 public Object get(DOMConfigurationImpl config) { 62 return config.cdataSections; 63 } 64 public void set(DOMConfigurationImpl config, Object value) { 65 config.cdataSections = (Boolean) value; 66 } 67 }); 68 69 /* 70 * True to check character normalization (unsupported). 71 */ 72 PARAMETERS.put("check-character-normalization", new FixedParameter(false)); 73 74 /* 75 * True to keep comments in the document; false to discard them. 76 */ 77 PARAMETERS.put("comments", new BooleanParameter() { 78 public Object get(DOMConfigurationImpl config) { 79 return config.comments; 80 } 81 public void set(DOMConfigurationImpl config, Object value) { 82 config.comments = (Boolean) value; 83 } 84 }); 85 86 /* 87 * True to expose schema normalized values. Setting this to true sets 88 * the validate parameter to true. Has no effect when validate is false. 89 */ 90 PARAMETERS.put("datatype-normalization", new BooleanParameter() { 91 public Object get(DOMConfigurationImpl config) { 92 return config.datatypeNormalization; 93 } 94 public void set(DOMConfigurationImpl config, Object value) { 95 if ((Boolean) value) { 96 config.datatypeNormalization = true; 97 config.validate = true; 98 } else { 99 config.datatypeNormalization = false; 100 } 101 } 102 }); 103 104 /* 105 * True to keep whitespace elements in the document; false to discard 106 * them (unsupported). 107 */ 108 PARAMETERS.put("element-content-whitespace", new FixedParameter(true)); 109 110 /* 111 * True to keep entity references in the document; false to expand them. 112 */ 113 PARAMETERS.put("entities", new BooleanParameter() { 114 public Object get(DOMConfigurationImpl config) { 115 return config.entities; 116 } 117 public void set(DOMConfigurationImpl config, Object value) { 118 config.entities = (Boolean) value; 119 } 120 }); 121 122 /* 123 * Handler to be invoked when errors are encountered. 124 */ 125 PARAMETERS.put("error-handler", new Parameter() { 126 public Object get(DOMConfigurationImpl config) { 127 return config.errorHandler; 128 } 129 public void set(DOMConfigurationImpl config, Object value) { 130 config.errorHandler = (DOMErrorHandler) value; 131 } 132 public boolean canSet(DOMConfigurationImpl config, Object value) { 133 return value == null || value instanceof DOMErrorHandler; 134 } 135 }); 136 137 /* 138 * Bulk alias to set the following parameter values: 139 * validate-if-schema = false 140 * entities = false 141 * datatype-normalization = false 142 * cdata-sections = false 143 * namespace-declarations = true 144 * well-formed = true 145 * element-content-whitespace = true 146 * comments = true 147 * namespaces = true. 148 * Querying this returns true if all of the above parameters have the 149 * listed values; false otherwise. 150 */ 151 PARAMETERS.put("infoset", new BooleanParameter() { 152 public Object get(DOMConfigurationImpl config) { 153 // validate-if-schema is always false 154 // element-content-whitespace is always true 155 // namespace-declarations is always true 156 return !config.entities 157 && !config.datatypeNormalization 158 && !config.cdataSections 159 && config.wellFormed 160 && config.comments 161 && config.namespaces; 162 } 163 public void set(DOMConfigurationImpl config, Object value) { 164 if ((Boolean) value) { 165 // validate-if-schema is always false 166 // element-content-whitespace is always true 167 // namespace-declarations is always true 168 config.entities = false; 169 config.datatypeNormalization = false; 170 config.cdataSections = false; 171 config.wellFormed = true; 172 config.comments = true; 173 config.namespaces = true; 174 } 175 } 176 }); 177 178 /* 179 * True to perform namespace processing; false for none. 180 */ 181 PARAMETERS.put("namespaces", new BooleanParameter() { 182 public Object get(DOMConfigurationImpl config) { 183 return config.namespaces; 184 } 185 public void set(DOMConfigurationImpl config, Object value) { 186 config.namespaces = (Boolean) value; 187 } 188 }); 189 190 /** 191 * True to include namespace declarations; false to discard them 192 * (unsupported). Even when namespace declarations are discarded, 193 * prefixes are retained. 194 * 195 * Has no effect if namespaces is false. 196 */ 197 PARAMETERS.put("namespace-declarations", new FixedParameter(true)); 198 199 /* 200 * True to fully normalize characters (unsupported). 201 */ 202 PARAMETERS.put("normalize-characters", new FixedParameter(false)); 203 204 /* 205 * A list of whitespace-separated URIs representing the schemas to validate 206 * against. Has no effect if schema-type is null. 207 */ 208 PARAMETERS.put("schema-location", new Parameter() { 209 public Object get(DOMConfigurationImpl config) { 210 return config.schemaLocation; 211 } 212 public void set(DOMConfigurationImpl config, Object value) { 213 config.schemaLocation = (String) value; 214 } 215 public boolean canSet(DOMConfigurationImpl config, Object value) { 216 return value == null || value instanceof String; 217 } 218 }); 219 220 /* 221 * URI representing the type of schema language, such as 222 * "http://www.w3.org/2001/XMLSchema" or "http://www.w3.org/TR/REC-xml". 223 */ 224 PARAMETERS.put("schema-type", new Parameter() { 225 public Object get(DOMConfigurationImpl config) { 226 return config.schemaType; 227 } 228 public void set(DOMConfigurationImpl config, Object value) { 229 config.schemaType = (String) value; 230 } 231 public boolean canSet(DOMConfigurationImpl config, Object value) { 232 return value == null || value instanceof String; 233 } 234 }); 235 236 /* 237 * True to split CDATA sections containing "]]>"; false to signal an 238 * error instead. 239 */ 240 PARAMETERS.put("split-cdata-sections", new BooleanParameter() { 241 public Object get(DOMConfigurationImpl config) { 242 return config.splitCdataSections; 243 } 244 public void set(DOMConfigurationImpl config, Object value) { 245 config.splitCdataSections = (Boolean) value; 246 } 247 }); 248 249 /* 250 * True to require validation against a schema or DTD. Validation will 251 * recompute element content whitespace, ID and schema type data. 252 * 253 * Setting this unsets validate-if-schema. 254 */ 255 PARAMETERS.put("validate", new BooleanParameter() { 256 public Object get(DOMConfigurationImpl config) { 257 return config.validate; 258 } 259 public void set(DOMConfigurationImpl config, Object value) { 260 // validate-if-schema is always false 261 config.validate = (Boolean) value; 262 } 263 }); 264 265 /* 266 * True to validate if a schema was declared (unsupported). Setting this 267 * unsets validate. 268 */ 269 PARAMETERS.put("validate-if-schema", new FixedParameter(false)); 270 271 /* 272 * True to report invalid characters in node names, attributes, elements, 273 * comments, text, CDATA sections and processing instructions. 274 */ 275 PARAMETERS.put("well-formed", new BooleanParameter() { 276 public Object get(DOMConfigurationImpl config) { 277 return config.wellFormed; 278 } 279 public void set(DOMConfigurationImpl config, Object value) { 280 config.wellFormed = (Boolean) value; 281 } 282 }); 283 284 // TODO add "resource-resolver" property for use with LS feature... 285 } 286 287 private boolean cdataSections = true; 288 private boolean comments = true; 289 private boolean datatypeNormalization = false; 290 private boolean entities = true; 291 private DOMErrorHandler errorHandler; 292 private boolean namespaces = true; 293 private String schemaLocation; 294 private String schemaType; 295 private boolean splitCdataSections = true; 296 private boolean validate = false; 297 private boolean wellFormed = true; 298 299 interface Parameter { get(DOMConfigurationImpl config)300 Object get(DOMConfigurationImpl config); set(DOMConfigurationImpl config, Object value)301 void set(DOMConfigurationImpl config, Object value); canSet(DOMConfigurationImpl config, Object value)302 boolean canSet(DOMConfigurationImpl config, Object value); 303 } 304 305 static class FixedParameter implements Parameter { 306 final Object onlyValue; FixedParameter(Object onlyValue)307 FixedParameter(Object onlyValue) { 308 this.onlyValue = onlyValue; 309 } get(DOMConfigurationImpl config)310 public Object get(DOMConfigurationImpl config) { 311 return onlyValue; 312 } set(DOMConfigurationImpl config, Object value)313 public void set(DOMConfigurationImpl config, Object value) { 314 if (!onlyValue.equals(value)) { 315 throw new DOMException(DOMException.NOT_SUPPORTED_ERR, 316 "Unsupported value: " + value); 317 } 318 } canSet(DOMConfigurationImpl config, Object value)319 public boolean canSet(DOMConfigurationImpl config, Object value) { 320 return onlyValue.equals(value); 321 } 322 } 323 324 static abstract class BooleanParameter implements Parameter { canSet(DOMConfigurationImpl config, Object value)325 public boolean canSet(DOMConfigurationImpl config, Object value) { 326 return value instanceof Boolean; 327 } 328 } 329 canSetParameter(String name, Object value)330 public boolean canSetParameter(String name, Object value) { 331 Parameter parameter = PARAMETERS.get(name); 332 return parameter != null && parameter.canSet(this, value); 333 } 334 setParameter(String name, Object value)335 public void setParameter(String name, Object value) throws DOMException { 336 Parameter parameter = PARAMETERS.get(name); 337 if (parameter == null) { 338 throw new DOMException(DOMException.NOT_FOUND_ERR, "No such parameter: " + name); 339 } 340 try { 341 parameter.set(this, value); 342 } catch (NullPointerException e) { 343 throw new DOMException(DOMException.TYPE_MISMATCH_ERR, 344 "Null not allowed for " + name); 345 } catch (ClassCastException e) { 346 throw new DOMException(DOMException.TYPE_MISMATCH_ERR, 347 "Invalid type for " + name + ": " + value.getClass()); 348 } 349 } 350 getParameter(String name)351 public Object getParameter(String name) throws DOMException { 352 Parameter parameter = PARAMETERS.get(name); 353 if (parameter == null) { 354 throw new DOMException(DOMException.NOT_FOUND_ERR, "No such parameter: " + name); 355 } 356 return parameter.get(this); 357 } 358 getParameterNames()359 public DOMStringList getParameterNames() { 360 return internalGetParameterNames(); 361 } 362 internalGetParameterNames()363 private static DOMStringList internalGetParameterNames() { 364 final String[] result = PARAMETERS.keySet().toArray(new String[PARAMETERS.size()]); 365 return new DOMStringList() { 366 public String item(int index) { 367 return index < result.length ? result[index] : null; 368 } 369 public int getLength() { 370 return result.length; 371 } 372 public boolean contains(String str) { 373 return PARAMETERS.containsKey(str); // case-insensitive. 374 } 375 }; 376 } 377 378 public void normalize(Node node) { 379 /* 380 * Since we don't validate, this code doesn't take into account the 381 * following "supported" parameters: datatype-normalization, entities, 382 * schema-location, schema-type, or validate. 383 * 384 * TODO: normalize namespaces 385 */ 386 387 switch (node.getNodeType()) { 388 case Node.CDATA_SECTION_NODE: 389 CDATASectionImpl cdata = (CDATASectionImpl) node; 390 if (cdataSections) { 391 if (cdata.needsSplitting()) { 392 if (splitCdataSections) { 393 cdata.split(); 394 report(DOMError.SEVERITY_WARNING, "cdata-sections-splitted"); 395 } else { 396 report(DOMError.SEVERITY_ERROR, "wf-invalid-character"); 397 } 398 } 399 checkTextValidity(cdata.buffer); 400 break; 401 } 402 node = cdata.replaceWithText(); 403 // fall through 404 405 case Node.TEXT_NODE: 406 TextImpl text = (TextImpl) node; 407 text = text.minimize(); 408 if (text != null) { 409 checkTextValidity(text.buffer); 410 } 411 break; 412 413 case Node.COMMENT_NODE: 414 CommentImpl comment = (CommentImpl) node; 415 if (!comments) { 416 comment.getParentNode().removeChild(comment); 417 break; 418 } 419 if (comment.containsDashDash()) { 420 report(DOMError.SEVERITY_ERROR, "wf-invalid-character"); 421 } 422 checkTextValidity(comment.buffer); 423 break; 424 425 case Node.PROCESSING_INSTRUCTION_NODE: 426 checkTextValidity(((ProcessingInstructionImpl) node).getData()); 427 break; 428 429 case Node.ATTRIBUTE_NODE: 430 checkTextValidity(((AttrImpl) node).getValue()); 431 break; 432 433 case Node.ELEMENT_NODE: 434 ElementImpl element = (ElementImpl) node; 435 NamedNodeMap attributes = element.getAttributes(); 436 for (int i = 0; i < attributes.getLength(); i++) { 437 normalize(attributes.item(i)); 438 } 439 // fall through 440 441 case Node.DOCUMENT_NODE: 442 case Node.DOCUMENT_FRAGMENT_NODE: 443 Node next; 444 for (Node child = node.getFirstChild(); child != null; child = next) { 445 // lookup next eagerly because normalize() may remove its subject 446 next = child.getNextSibling(); 447 normalize(child); 448 } 449 break; 450 451 case Node.NOTATION_NODE: 452 case Node.DOCUMENT_TYPE_NODE: 453 case Node.ENTITY_NODE: 454 case Node.ENTITY_REFERENCE_NODE: 455 break; 456 457 default: 458 throw new DOMException(DOMException.NOT_SUPPORTED_ERR, 459 "Unsupported node type " + node.getNodeType()); 460 } 461 } 462 463 private void checkTextValidity(CharSequence s) { 464 if (wellFormed && !isValid(s)) { 465 report(DOMError.SEVERITY_ERROR, "wf-invalid-character"); 466 } 467 } 468 469 /** 470 * Returns true if all of the characters in the text are permitted for use 471 * in XML documents. 472 */ 473 private boolean isValid(CharSequence text) { 474 for (int i = 0; i < text.length(); i++) { 475 char c = text.charAt(i); 476 // as defined by http://www.w3.org/TR/REC-xml/#charsets. 477 boolean valid = c == 0x9 || c == 0xA || c == 0xD 478 || (c >= 0x20 && c <= 0xd7ff) 479 || (c >= 0xe000 && c <= 0xfffd); 480 if (!valid) { 481 return false; 482 } 483 } 484 return true; 485 } 486 487 private void report(short severity, String type) { 488 if (errorHandler != null) { 489 // TODO: abort if handleError returns false 490 errorHandler.handleError(new DOMErrorImpl(severity, type)); 491 } 492 } 493 } 494