1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the  "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 /*
19  * $Id: DTM.java 468653 2006-10-28 07:07:05Z minchau $
20  */
21 package org.apache.xml.dtm;
22 
23 import javax.xml.transform.SourceLocator;
24 
25 import org.apache.xml.utils.XMLString;
26 
27 /**
28  * <code>DTM</code> is an XML document model expressed as a table
29  * rather than an object tree. It attempts to provide an interface to
30  * a parse tree that has very little object creation. (DTM
31  * implementations may also support incremental construction of the
32  * model, but that's hidden from the DTM API.)
33  *
34  * <p>Nodes in the DTM are identified by integer "handles".  A handle must
35  * be unique within a process, and carries both node identification and
36  * document identification.  It must be possible to compare two handles
37  * (and thus their nodes) for identity with "==".</p>
38  *
39  * <p>Namespace URLs, local-names, and expanded-names can all be
40  * represented by and tested as integer ID values.  An expanded name
41  * represents (and may or may not directly contain) a combination of
42  * the URL ID, and the local-name ID.  Note that the namespace URL id
43  * can be 0, which should have the meaning that the namespace is null.
44  * For consistancy, zero should not be used for a local-name index. </p>
45  *
46  * <p>Text content of a node is represented by an index and length,
47  * permitting efficient storage such as a shared FastStringBuffer.</p>
48  *
49  * <p>The model of the tree, as well as the general navigation model,
50  * is that of XPath 1.0, for the moment.  The model will eventually be
51  * adapted to match the XPath 2.0 data model, XML Schema, and
52  * InfoSet.</p>
53  *
54  * <p>DTM does _not_ directly support the W3C's Document Object
55  * Model. However, it attempts to come close enough that an
56  * implementation of DTM can be created that wraps a DOM and vice
57  * versa.</p>
58  *
59  * <p><strong>Please Note:</strong> The DTM API is still
60  * <strong>Subject To Change.</strong> This wouldn't affect most
61  * users, but might require updating some extensions.</p>
62  *
63  * <p> The largest change being contemplated is a reconsideration of
64  * the Node Handle representation.  We are still not entirely sure
65  * that an integer packed with two numeric subfields is really the
66  * best solution. It has been suggested that we move up to a Long, to
67  * permit more nodes per document without having to reduce the number
68  * of slots in the DTMManager. There's even been a proposal that we
69  * replace these integers with "cursor" objects containing the
70  * internal node id and a pointer to the actual DTM object; this might
71  * reduce the need to continuously consult the DTMManager to retrieve
72  * the latter, and might provide a useful "hook" back into normal Java
73  * heap management.  But changing this datatype would have huge impact
74  * on Xalan's internals -- especially given Java's lack of C-style
75  * typedefs -- so we won't cut over unless we're convinced the new
76  * solution really would be an improvement!</p>
77  * */
78 public interface DTM
79 {
80 
81   /**
82    * Null node handles are represented by this value.
83    */
84   public static final int NULL = -1;
85 
86   // These nodeType mnemonics and values are deliberately the same as those
87   // used by the DOM, for convenient mapping
88   //
89   // %REVIEW% Should we actually define these as initialized to,
90   // eg. org.w3c.dom.Document.ELEMENT_NODE?
91 
92   /**
93    * The node is a <code>Root</code>.
94    */
95   public static final short ROOT_NODE = 0;
96 
97   /**
98    * The node is an <code>Element</code>.
99    */
100   public static final short ELEMENT_NODE = 1;
101 
102   /**
103    * The node is an <code>Attr</code>.
104    */
105   public static final short ATTRIBUTE_NODE = 2;
106 
107   /**
108    * The node is a <code>Text</code> node.
109    */
110   public static final short TEXT_NODE = 3;
111 
112   /**
113    * The node is a <code>CDATASection</code>.
114    */
115   public static final short CDATA_SECTION_NODE = 4;
116 
117   /**
118    * The node is an <code>EntityReference</code>.
119    */
120   public static final short ENTITY_REFERENCE_NODE = 5;
121 
122   /**
123    * The node is an <code>Entity</code>.
124    */
125   public static final short ENTITY_NODE = 6;
126 
127   /**
128    * The node is a <code>ProcessingInstruction</code>.
129    */
130   public static final short PROCESSING_INSTRUCTION_NODE = 7;
131 
132   /**
133    * The node is a <code>Comment</code>.
134    */
135   public static final short COMMENT_NODE = 8;
136 
137   /**
138    * The node is a <code>Document</code>.
139    */
140   public static final short DOCUMENT_NODE = 9;
141 
142   /**
143    * The node is a <code>DocumentType</code>.
144    */
145   public static final short DOCUMENT_TYPE_NODE = 10;
146 
147   /**
148    * The node is a <code>DocumentFragment</code>.
149    */
150   public static final short DOCUMENT_FRAGMENT_NODE = 11;
151 
152   /**
153    * The node is a <code>Notation</code>.
154    */
155   public static final short NOTATION_NODE = 12;
156 
157   /**
158    * The node is a <code>namespace node</code>. Note that this is not
159    * currently a node type defined by the DOM API.
160    */
161   public static final short NAMESPACE_NODE = 13;
162 
163   /**
164    * The number of valid nodetypes.
165    */
166   public static final short  NTYPES = 14;
167 
168   // ========= DTM Implementation Control Functions. ==============
169   // %TBD% RETIRED -- do via setFeature if needed. Remove from impls.
170   // public void setParseBlockSize(int blockSizeSuggestion);
171 
172   /**
173    * Set an implementation dependent feature.
174    * <p>
175    * %REVIEW% Do we really expect to set features on DTMs?
176    *
177    * @param featureId A feature URL.
178    * @param state true if this feature should be on, false otherwise.
179    */
setFeature(String featureId, boolean state)180   public void setFeature(String featureId, boolean state);
181 
182   /**
183    * Set a run time property for this DTM instance.
184    *
185    * @param property a <code>String</code> value
186    * @param value an <code>Object</code> value
187    */
setProperty(String property, Object value)188   public void setProperty(String property, Object value);
189 
190   // ========= Document Navigation Functions =========
191 
192   /**
193    * This returns a stateless "traverser", that can navigate over an
194    * XPath axis, though not in document order.
195    *
196    * @param axis One of Axes.ANCESTORORSELF, etc.
197    *
198    * @return A DTMAxisIterator, or null if the givin axis isn't supported.
199    */
getAxisTraverser(final int axis)200   public DTMAxisTraverser getAxisTraverser(final int axis);
201 
202   /**
203    * This is a shortcut to the iterators that implement
204    * XPath axes.
205    * Returns a bare-bones iterator that must be initialized
206    * with a start node (using iterator.setStartNode()).
207    *
208    * @param axis One of Axes.ANCESTORORSELF, etc.
209    *
210    * @return A DTMAxisIterator, or null if the givin axis isn't supported.
211    */
getAxisIterator(final int axis)212   public DTMAxisIterator getAxisIterator(final int axis);
213 
214   /**
215    * Get an iterator that can navigate over an XPath Axis, predicated by
216    * the extended type ID.
217    *
218    * @param axis
219    * @param type An extended type ID.
220    *
221    * @return A DTMAxisIterator, or null if the givin axis isn't supported.
222    */
getTypedAxisIterator(final int axis, final int type)223   public DTMAxisIterator getTypedAxisIterator(final int axis, final int type);
224 
225   /**
226    * Given a node handle, test if it has child nodes.
227    * <p> %REVIEW% This is obviously useful at the DOM layer, where it
228    * would permit testing this without having to create a proxy
229    * node. It's less useful in the DTM API, where
230    * (dtm.getFirstChild(nodeHandle)!=DTM.NULL) is just as fast and
231    * almost as self-evident. But it's a convenience, and eases porting
232    * of DOM code to DTM.  </p>
233    *
234    * @param nodeHandle int Handle of the node.
235    * @return int true if the given node has child nodes.
236    */
hasChildNodes(int nodeHandle)237   public boolean hasChildNodes(int nodeHandle);
238 
239   /**
240    * Given a node handle, get the handle of the node's first child.
241    *
242    * @param nodeHandle int Handle of the node.
243    * @return int DTM node-number of first child,
244    * or DTM.NULL to indicate none exists.
245    */
getFirstChild(int nodeHandle)246   public int getFirstChild(int nodeHandle);
247 
248   /**
249    * Given a node handle, get the handle of the node's last child.
250    *
251    * @param nodeHandle int Handle of the node.
252    * @return int Node-number of last child,
253    * or DTM.NULL to indicate none exists.
254    */
getLastChild(int nodeHandle)255   public int getLastChild(int nodeHandle);
256 
257   /**
258    * Retrieves an attribute node by local name and namespace URI
259    *
260    * %TBD% Note that we currently have no way to support
261    * the DOM's old getAttribute() call, which accesses only the qname.
262    *
263    * @param elementHandle Handle of the node upon which to look up this attribute.
264    * @param namespaceURI The namespace URI of the attribute to
265    *   retrieve, or null.
266    * @param name The local name of the attribute to
267    *   retrieve.
268    * @return The attribute node handle with the specified name (
269    *   <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such
270    *   attribute.
271    */
getAttributeNode(int elementHandle, String namespaceURI, String name)272   public int getAttributeNode(int elementHandle, String namespaceURI,
273                               String name);
274 
275   /**
276    * Given a node handle, get the index of the node's first attribute.
277    *
278    * @param nodeHandle int Handle of the node.
279    * @return Handle of first attribute, or DTM.NULL to indicate none exists.
280    */
getFirstAttribute(int nodeHandle)281   public int getFirstAttribute(int nodeHandle);
282 
283   /**
284    * Given a node handle, get the index of the node's first namespace node.
285    *
286    * @param nodeHandle handle to node, which should probably be an element
287    *                   node, but need not be.
288    *
289    * @param inScope true if all namespaces in scope should be
290    *                   returned, false if only the node's own
291    *                   namespace declarations should be returned.
292    * @return handle of first namespace,
293    * or DTM.NULL to indicate none exists.
294    */
getFirstNamespaceNode(int nodeHandle, boolean inScope)295   public int getFirstNamespaceNode(int nodeHandle, boolean inScope);
296 
297   /**
298    * Given a node handle, advance to its next sibling.
299    * @param nodeHandle int Handle of the node.
300    * @return int Node-number of next sibling,
301    * or DTM.NULL to indicate none exists.
302    */
getNextSibling(int nodeHandle)303   public int getNextSibling(int nodeHandle);
304 
305   /**
306    * Given a node handle, find its preceeding sibling.
307    * WARNING: DTM implementations may be asymmetric; in some,
308    * this operation has been resolved by search, and is relatively expensive.
309    *
310    * @param nodeHandle the id of the node.
311    * @return int Node-number of the previous sib,
312    * or DTM.NULL to indicate none exists.
313    */
getPreviousSibling(int nodeHandle)314   public int getPreviousSibling(int nodeHandle);
315 
316   /**
317    * Given a node handle, advance to the next attribute. If an
318    * element, we advance to its first attribute; if an attr, we advance to
319    * the next attr of the same element.
320    *
321    * @param nodeHandle int Handle of the node.
322    * @return int DTM node-number of the resolved attr,
323    * or DTM.NULL to indicate none exists.
324    */
getNextAttribute(int nodeHandle)325   public int getNextAttribute(int nodeHandle);
326 
327   /**
328    * Given a namespace handle, advance to the next namespace in the same scope
329    * (local or local-plus-inherited, as selected by getFirstNamespaceNode)
330    *
331    * @param baseHandle handle to original node from where the first child
332    * was relative to (needed to return nodes in document order).
333    * @param namespaceHandle handle to node which must be of type
334    * NAMESPACE_NODE.
335    * NEEDSDOC @param inScope
336    * @return handle of next namespace,
337    * or DTM.NULL to indicate none exists.
338    */
getNextNamespaceNode(int baseHandle, int namespaceHandle, boolean inScope)339   public int getNextNamespaceNode(int baseHandle, int namespaceHandle,
340                                   boolean inScope);
341 
342   /**
343    * Given a node handle, find its parent node.
344    *
345    * @param nodeHandle the id of the node.
346    * @return int Node handle of parent,
347    * or DTM.NULL to indicate none exists.
348    */
getParent(int nodeHandle)349   public int getParent(int nodeHandle);
350 
351   /**
352    * Given a DTM which contains only a single document,
353    * find the Node Handle of the  Document node. Note
354    * that if the DTM is configured so it can contain multiple
355    * documents, this call will return the Document currently
356    * under construction -- but may return null if it's between
357    * documents. Generally, you should use getOwnerDocument(nodeHandle)
358    * or getDocumentRoot(nodeHandle) instead.
359    *
360    * @return int Node handle of document, or DTM.NULL if a shared DTM
361    * can not tell us which Document is currently active.
362    */
getDocument()363   public int getDocument();
364 
365   /**
366    * Given a node handle, find the owning document node. This version mimics
367    * the behavior of the DOM call by the same name.
368    *
369    * @param nodeHandle the id of the node.
370    * @return int Node handle of owning document, or DTM.NULL if the node was
371    * a Document.
372    * @see #getDocumentRoot(int nodeHandle)
373    */
getOwnerDocument(int nodeHandle)374   public int getOwnerDocument(int nodeHandle);
375 
376   /**
377    * Given a node handle, find the owning document node.
378    *
379    * @param nodeHandle the id of the node.
380    * @return int Node handle of owning document, or the node itself if it was
381    * a Document. (Note difference from DOM, where getOwnerDocument returns
382    * null for the Document node.)
383    * @see #getOwnerDocument(int nodeHandle)
384    */
getDocumentRoot(int nodeHandle)385   public int getDocumentRoot(int nodeHandle);
386 
387   /**
388    * Get the string-value of a node as a String object
389    * (see http://www.w3.org/TR/xpath#data-model
390    * for the definition of a node's string-value).
391    *
392    * @param nodeHandle The node ID.
393    *
394    * @return A string object that represents the string-value of the given node.
395    */
getStringValue(int nodeHandle)396   public XMLString getStringValue(int nodeHandle);
397 
398   /**
399    * Get number of character array chunks in
400    * the string-value of a node.
401    * (see http://www.w3.org/TR/xpath#data-model
402    * for the definition of a node's string-value).
403    * Note that a single text node may have multiple text chunks.
404    *
405    * @param nodeHandle The node ID.
406    *
407    * @return number of character array chunks in
408    *         the string-value of a node.
409    */
getStringValueChunkCount(int nodeHandle)410   public int getStringValueChunkCount(int nodeHandle);
411 
412   /**
413    * Get a character array chunk in the string-value of a node.
414    * (see http://www.w3.org/TR/xpath#data-model
415    * for the definition of a node's string-value).
416    * Note that a single text node may have multiple text chunks.
417    *
418    * @param nodeHandle The node ID.
419    * @param chunkIndex Which chunk to get.
420    * @param startAndLen  A two-integer array which, upon return, WILL
421    * BE FILLED with values representing the chunk's start position
422    * within the returned character buffer and the length of the chunk.
423    * @return The character array buffer within which the chunk occurs,
424    * setting startAndLen's contents as a side-effect.
425    */
getStringValueChunk(int nodeHandle, int chunkIndex, int[] startAndLen)426   public char[] getStringValueChunk(int nodeHandle, int chunkIndex,
427                                     int[] startAndLen);
428 
429   /**
430    * Given a node handle, return an ID that represents the node's expanded name.
431    *
432    * @param nodeHandle The handle to the node in question.
433    *
434    * @return the expanded-name id of the node.
435    */
getExpandedTypeID(int nodeHandle)436   public int getExpandedTypeID(int nodeHandle);
437 
438   /**
439    * Given an expanded name, return an ID.  If the expanded-name does not
440    * exist in the internal tables, the entry will be created, and the ID will
441    * be returned.  Any additional nodes that are created that have this
442    * expanded name will use this ID.
443    *
444    * NEEDSDOC @param namespace
445    * NEEDSDOC @param localName
446    * NEEDSDOC @param type
447    *
448    * @return the expanded-name id of the node.
449    */
getExpandedTypeID(String namespace, String localName, int type)450   public int getExpandedTypeID(String namespace, String localName, int type);
451 
452   /**
453    * Given an expanded-name ID, return the local name part.
454    *
455    * @param ExpandedNameID an ID that represents an expanded-name.
456    * @return String Local name of this node.
457    */
getLocalNameFromExpandedNameID(int ExpandedNameID)458   public String getLocalNameFromExpandedNameID(int ExpandedNameID);
459 
460   /**
461    * Given an expanded-name ID, return the namespace URI part.
462    *
463    * @param ExpandedNameID an ID that represents an expanded-name.
464    * @return String URI value of this node's namespace, or null if no
465    * namespace was resolved.
466    */
getNamespaceFromExpandedNameID(int ExpandedNameID)467   public String getNamespaceFromExpandedNameID(int ExpandedNameID);
468 
469   /**
470    * Given a node handle, return its DOM-style node name. This will
471    * include names such as #text or #document.
472    *
473    * @param nodeHandle the id of the node.
474    * @return String Name of this node, which may be an empty string.
475    * %REVIEW% Document when empty string is possible...
476    */
getNodeName(int nodeHandle)477   public String getNodeName(int nodeHandle);
478 
479   /**
480    * Given a node handle, return the XPath node name.  This should be
481    * the name as described by the XPath data model, NOT the DOM-style
482    * name.
483    *
484    * @param nodeHandle the id of the node.
485    * @return String Name of this node.
486    */
getNodeNameX(int nodeHandle)487   public String getNodeNameX(int nodeHandle);
488 
489   /**
490    * Given a node handle, return its DOM-style localname.
491    * (As defined in Namespaces, this is the portion of the name after the
492    * prefix, if present, or the whole node name if no prefix exists)
493    *
494    * @param nodeHandle the id of the node.
495    * @return String Local name of this node.
496    */
getLocalName(int nodeHandle)497   public String getLocalName(int nodeHandle);
498 
499   /**
500    * Given a namespace handle, return the prefix that the namespace decl is
501    * mapping.
502    * Given a node handle, return the prefix used to map to the namespace.
503    * (As defined in Namespaces, this is the portion of the name before any
504    * colon character).
505    *
506    * <p> %REVIEW% Are you sure you want "" for no prefix?  </p>
507    *
508    * @param nodeHandle the id of the node.
509    * @return String prefix of this node's name, or "" if no explicit
510    * namespace prefix was given.
511    */
getPrefix(int nodeHandle)512   public String getPrefix(int nodeHandle);
513 
514   /**
515    * Given a node handle, return its DOM-style namespace URI
516    * (As defined in Namespaces, this is the declared URI which this node's
517    * prefix -- or default in lieu thereof -- was mapped to.)
518    * @param nodeHandle the id of the node.
519    * @return String URI value of this node's namespace, or null if no
520    * namespace was resolved.
521    */
getNamespaceURI(int nodeHandle)522   public String getNamespaceURI(int nodeHandle);
523 
524   /**
525    * Given a node handle, return its node value. This is mostly
526    * as defined by the DOM, but may ignore some conveniences.
527    * <p>
528    * @param nodeHandle The node id.
529    * @return String Value of this node, or null if not
530    * meaningful for this node type.
531    */
getNodeValue(int nodeHandle)532   public String getNodeValue(int nodeHandle);
533 
534   /**
535    * Given a node handle, return its DOM-style node type.
536    *
537    * <p>%REVIEW% Generally, returning short is false economy. Return int?</p>
538    *
539    * @param nodeHandle The node id.
540    * @return int Node type, as per the DOM's Node._NODE constants.
541    */
getNodeType(int nodeHandle)542   public short getNodeType(int nodeHandle);
543 
544   /**
545    * Get the depth level of this node in the tree (equals 1 for
546    * a parentless node).
547    *
548    * @param nodeHandle The node id.
549    * @return the number of ancestors, plus one
550    * @xsl.usage internal
551    */
getLevel(int nodeHandle)552   public short getLevel(int nodeHandle);
553 
554   // ============== Document query functions ==============
555 
556   /**
557    * Tests whether DTM DOM implementation implements a specific feature and
558    * that feature is supported by this node.
559    * @param feature The name of the feature to test.
560    * @param version This is the version number of the feature to test.
561    *   If the version is not
562    *   specified, supporting any version of the feature will cause the
563    *   method to return <code>true</code>.
564    * @return Returns <code>true</code> if the specified feature is
565    *   supported on this node, <code>false</code> otherwise.
566    */
isSupported(String feature, String version)567   public boolean isSupported(String feature, String version);
568 
569   /**
570    * Return the base URI of the document entity. If it is not known
571    * (because the document was parsed from a socket connection or from
572    * standard input, for example), the value of this property is unknown.
573    *
574    * @return the document base URI String object or null if unknown.
575    */
getDocumentBaseURI()576   public String getDocumentBaseURI();
577 
578   /**
579    * Set the base URI of the document entity.
580    *
581    * @param baseURI the document base URI String object or null if unknown.
582    */
setDocumentBaseURI(String baseURI)583   public void setDocumentBaseURI(String baseURI);
584 
585   /**
586    * Return the system identifier of the document entity. If
587    * it is not known, the value of this property is null.
588    *
589    * @param nodeHandle The node id, which can be any valid node handle.
590    * @return the system identifier String object or null if unknown.
591    */
getDocumentSystemIdentifier(int nodeHandle)592   public String getDocumentSystemIdentifier(int nodeHandle);
593 
594   /**
595    * Return the name of the character encoding scheme
596    *        in which the document entity is expressed.
597    *
598    * @param nodeHandle The node id, which can be any valid node handle.
599    * @return the document encoding String object.
600    */
getDocumentEncoding(int nodeHandle)601   public String getDocumentEncoding(int nodeHandle);
602 
603   /**
604    * Return an indication of the standalone status of the document,
605    *        either "yes" or "no". This property is derived from the optional
606    *        standalone document declaration in the XML declaration at the
607    *        beginning of the document entity, and has no value if there is no
608    *        standalone document declaration.
609    *
610    * @param nodeHandle The node id, which can be any valid node handle.
611    * @return the document standalone String object, either "yes", "no", or null.
612    */
getDocumentStandalone(int nodeHandle)613   public String getDocumentStandalone(int nodeHandle);
614 
615   /**
616    * Return a string representing the XML version of the document. This
617    * property is derived from the XML declaration optionally present at the
618    * beginning of the document entity, and has no value if there is no XML
619    * declaration.
620    *
621    * @param documentHandle the document handle
622    * @return the document version String object
623    */
getDocumentVersion(int documentHandle)624   public String getDocumentVersion(int documentHandle);
625 
626   /**
627    * Return an indication of
628    * whether the processor has read the complete DTD. Its value is a
629    * boolean. If it is false, then certain properties (indicated in their
630    * descriptions below) may be unknown. If it is true, those properties
631    * are never unknown.
632    *
633    * @return <code>true</code> if all declarations were processed;
634    *         <code>false</code> otherwise.
635    */
getDocumentAllDeclarationsProcessed()636   public boolean getDocumentAllDeclarationsProcessed();
637 
638   /**
639    *   A document type declaration information item has the following properties:
640    *
641    *     1. [system identifier] The system identifier of the external subset, if
642    *        it exists. Otherwise this property has no value.
643    *
644    * @return the system identifier String object, or null if there is none.
645    */
getDocumentTypeDeclarationSystemIdentifier()646   public String getDocumentTypeDeclarationSystemIdentifier();
647 
648   /**
649    * Return the public identifier of the external subset,
650    * normalized as described in 4.2.2 External Entities [XML]. If there is
651    * no external subset or if it has no public identifier, this property
652    * has no value.
653    *
654    * @return the public identifier String object, or null if there is none.
655    */
getDocumentTypeDeclarationPublicIdentifier()656   public String getDocumentTypeDeclarationPublicIdentifier();
657 
658   /**
659    * Returns the <code>Element</code> whose <code>ID</code> is given by
660    * <code>elementId</code>. If no such element exists, returns
661    * <code>DTM.NULL</code>. Behavior is not defined if more than one element
662    * has this <code>ID</code>. Attributes (including those
663    * with the name "ID") are not of type ID unless so defined by DTD/Schema
664    * information available to the DTM implementation.
665    * Implementations that do not know whether attributes are of type ID or
666    * not are expected to return <code>DTM.NULL</code>.
667    *
668    * <p>%REVIEW% Presumably IDs are still scoped to a single document,
669    * and this operation searches only within a single document, right?
670    * Wouldn't want collisions between DTMs in the same process.</p>
671    *
672    * @param elementId The unique <code>id</code> value for an element.
673    * @return The handle of the matching element.
674    */
getElementById(String elementId)675   public int getElementById(String elementId);
676 
677   /**
678    * The getUnparsedEntityURI function returns the URI of the unparsed
679    * entity with the specified name in the same document as the context
680    * node (see [3.3 Unparsed Entities]). It returns the empty string if
681    * there is no such entity.
682    * <p>
683    * XML processors may choose to use the System Identifier (if one
684    * is provided) to resolve the entity, rather than the URI in the
685    * Public Identifier. The details are dependent on the processor, and
686    * we would have to support some form of plug-in resolver to handle
687    * this properly. Currently, we simply return the System Identifier if
688    * present, and hope that it a usable URI or that our caller can
689    * map it to one.
690    * %REVIEW% Resolve Public Identifiers... or consider changing function name.
691    * <p>
692    * If we find a relative URI
693    * reference, XML expects it to be resolved in terms of the base URI
694    * of the document. The DOM doesn't do that for us, and it isn't
695    * entirely clear whether that should be done here; currently that's
696    * pushed up to a higher level of our application. (Note that DOM Level
697    * 1 didn't store the document's base URI.)
698    * %REVIEW% Consider resolving Relative URIs.
699    * <p>
700    * (The DOM's statement that "An XML processor may choose to
701    * completely expand entities before the structure model is passed
702    * to the DOM" refers only to parsed entities, not unparsed, and hence
703    * doesn't affect this function.)
704    *
705    * @param name A string containing the Entity Name of the unparsed
706    * entity.
707    *
708    * @return String containing the URI of the Unparsed Entity, or an
709    * empty string if no such entity exists.
710    */
getUnparsedEntityURI(String name)711   public String getUnparsedEntityURI(String name);
712 
713   // ============== Boolean methods ================
714 
715   /**
716    * Return true if the xsl:strip-space or xsl:preserve-space was processed
717    * during construction of the document contained in this DTM.
718    *
719    * NEEDSDOC ($objectName$) @return
720    */
supportsPreStripping()721   public boolean supportsPreStripping();
722 
723   /**
724    * Figure out whether nodeHandle2 should be considered as being later
725    * in the document than nodeHandle1, in Document Order as defined
726    * by the XPath model. This may not agree with the ordering defined
727    * by other XML applications.
728    * <p>
729    * There are some cases where ordering isn't defined, and neither are
730    * the results of this function -- though we'll generally return true.
731    * <p>
732    * %REVIEW% Make sure this does the right thing with attribute nodes!!!
733    * <p>
734    * %REVIEW% Consider renaming for clarity. Perhaps isDocumentOrder(a,b)?
735    *
736    * @param firstNodeHandle DOM Node to perform position comparison on.
737    * @param secondNodeHandle DOM Node to perform position comparison on.
738    *
739    * @return false if secondNode comes before firstNode, otherwise return true.
740    * You can think of this as
741    * <code>(firstNode.documentOrderPosition &lt;= secondNode.documentOrderPosition)</code>.
742    */
isNodeAfter(int firstNodeHandle, int secondNodeHandle)743   public boolean isNodeAfter(int firstNodeHandle, int secondNodeHandle);
744 
745   /**
746    * 2. [element content whitespace] A boolean indicating whether a
747    * text node represents white space appearing within element content
748    * (see [XML], 2.10 "White Space Handling").  Note that validating
749    * XML processors are required by XML 1.0 to provide this
750    * information... but that DOM Level 2 did not support it, since it
751    * depends on knowledge of the DTD which DOM2 could not guarantee
752    * would be available.
753    * <p>
754    * If there is no declaration for the containing element, an XML
755    * processor must assume that the whitespace could be meaningful and
756    * return false. If no declaration has been read, but the [all
757    * declarations processed] property of the document information item
758    * is false (so there may be an unread declaration), then the value
759    * of this property is indeterminate for white space characters and
760    * should probably be reported as false. It is always false for text
761    * nodes that contain anything other than (or in addition to) white
762    * space.
763    * <p>
764    * Note too that it always returns false for non-Text nodes.
765    * <p>
766    * %REVIEW% Joe wants to rename this isWhitespaceInElementContent() for clarity
767    *
768    * @param nodeHandle the node ID.
769    * @return <code>true</code> if the node definitely represents whitespace in
770    * element content; <code>false</code> otherwise.
771    */
isCharacterElementContentWhitespace(int nodeHandle)772   public boolean isCharacterElementContentWhitespace(int nodeHandle);
773 
774   /**
775    *    10. [all declarations processed] This property is not strictly speaking
776    *        part of the infoset of the document. Rather it is an indication of
777    *        whether the processor has read the complete DTD. Its value is a
778    *        boolean. If it is false, then certain properties (indicated in their
779    *        descriptions below) may be unknown. If it is true, those properties
780    *        are never unknown.
781    *
782    * @param documentHandle A node handle that must identify a document.
783    * @return <code>true</code> if all declarations were processed;
784    *         <code>false</code> otherwise.
785    */
isDocumentAllDeclarationsProcessed(int documentHandle)786   public boolean isDocumentAllDeclarationsProcessed(int documentHandle);
787 
788   /**
789    *     5. [specified] A flag indicating whether this attribute was actually
790    *        specified in the start-tag of its element, or was defaulted from the
791    *        DTD (or schema).
792    *
793    * @param attributeHandle The attribute handle
794    * @return <code>true</code> if the attribute was specified;
795    *         <code>false</code> if it was defaulted or the handle doesn't
796    *            refer to an attribute node.
797    */
isAttributeSpecified(int attributeHandle)798   public boolean isAttributeSpecified(int attributeHandle);
799 
800   // ========== Direct SAX Dispatch, for optimization purposes ========
801 
802   /**
803    * Directly call the
804    * characters method on the passed ContentHandler for the
805    * string-value of the given node (see http://www.w3.org/TR/xpath#data-model
806    * for the definition of a node's string-value). Multiple calls to the
807    * ContentHandler's characters methods may well occur for a single call to
808    * this method.
809    *
810    * @param nodeHandle The node ID.
811    * @param ch A non-null reference to a ContentHandler.
812    * @param normalize true if the content should be normalized according to
813    * the rules for the XPath
814    * <a href="http://www.w3.org/TR/xpath#function-normalize-space">normalize-space</a>
815    * function.
816    *
817    * @throws org.xml.sax.SAXException
818    */
dispatchCharactersEvents( int nodeHandle, org.xml.sax.ContentHandler ch, boolean normalize)819   public void dispatchCharactersEvents(
820     int nodeHandle, org.xml.sax.ContentHandler ch, boolean normalize)
821       throws org.xml.sax.SAXException;
822 
823   /**
824    * Directly create SAX parser events representing the XML content of
825    * a DTM subtree. This is a "serialize" operation.
826    *
827    * @param nodeHandle The node ID.
828    * @param ch A non-null reference to a ContentHandler.
829    *
830    * @throws org.xml.sax.SAXException
831    */
dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch)832   public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch)
833     throws org.xml.sax.SAXException;
834 
835   /**
836    * Return an DOM node for the given node.
837    *
838    * @param nodeHandle The node ID.
839    *
840    * @return A node representation of the DTM node.
841    */
getNode(int nodeHandle)842   public org.w3c.dom.Node getNode(int nodeHandle);
843 
844   // ==== Construction methods (may not be supported by some implementations!) =====
845   // %REVIEW% What response occurs if not supported?
846 
847   /**
848    * @return true iff we're building this model incrementally (eg
849    * we're partnered with a CoroutineParser) and thus require that the
850    * transformation and the parse run simultaneously. Guidance to the
851    * DTMManager.
852    */
needsTwoThreads()853   public boolean needsTwoThreads();
854 
855   // %REVIEW% Do these appends make any sense, should we support a
856   // wider set of methods (like the "append" methods in the
857   // current DTMDocumentImpl draft), or should we just support SAX
858   // listener interfaces?  Should it be a separate interface to
859   // make that distinction explicit?
860 
861   /**
862    * Return this DTM's content handler, if it has one.
863    *
864    * @return null if this model doesn't respond to SAX events.
865    */
getContentHandler()866   public org.xml.sax.ContentHandler getContentHandler();
867 
868   /**
869    * Return this DTM's lexical handler, if it has one.
870    *
871    * %REVIEW% Should this return null if constrution already done/begun?
872    *
873    * @return null if this model doesn't respond to lexical SAX events.
874    */
getLexicalHandler()875   public org.xml.sax.ext.LexicalHandler getLexicalHandler();
876 
877   /**
878    * Return this DTM's EntityResolver, if it has one.
879    *
880    * @return null if this model doesn't respond to SAX entity ref events.
881    */
getEntityResolver()882   public org.xml.sax.EntityResolver getEntityResolver();
883 
884   /**
885    * Return this DTM's DTDHandler, if it has one.
886    *
887    * @return null if this model doesn't respond to SAX dtd events.
888    */
getDTDHandler()889   public org.xml.sax.DTDHandler getDTDHandler();
890 
891   /**
892    * Return this DTM's ErrorHandler, if it has one.
893    *
894    * @return null if this model doesn't respond to SAX error events.
895    */
getErrorHandler()896   public org.xml.sax.ErrorHandler getErrorHandler();
897 
898   /**
899    * Return this DTM's DeclHandler, if it has one.
900    *
901    * @return null if this model doesn't respond to SAX Decl events.
902    */
getDeclHandler()903   public org.xml.sax.ext.DeclHandler getDeclHandler();
904 
905   /**
906    * Append a child to "the end of the document". Please note that
907    * the node is always cloned in a base DTM, since our basic behavior
908    * is immutable so nodes can't be removed from their previous
909    * location.
910    *
911    * <p> %REVIEW%  DTM maintains an insertion cursor which
912    * performs a depth-first tree walk as nodes come in, and this operation
913    * is really equivalent to:
914    *    insertionCursor.appendChild(document.importNode(newChild)))
915    * where the insert point is the last element that was appended (or
916    * the last one popped back to by an end-element operation).</p>
917    *
918    * @param newChild Must be a valid new node handle.
919    * @param clone true if the child should be cloned into the document.
920    * @param cloneDepth if the clone argument is true, specifies that the
921    *                   clone should include all it's children.
922    */
appendChild(int newChild, boolean clone, boolean cloneDepth)923   public void appendChild(int newChild, boolean clone, boolean cloneDepth);
924 
925   /**
926    * Append a text node child that will be constructed from a string,
927    * to the end of the document. Behavior is otherwise like appendChild().
928    *
929    * @param str Non-null reference to a string.
930    */
appendTextChild(String str)931   public void appendTextChild(String str);
932 
933   /**
934    * Get the location of a node in the source document.
935    *
936    * @param node an <code>int</code> value
937    * @return a <code>SourceLocator</code> value or null if no location
938    * is available
939    */
getSourceLocatorFor(int node)940   public SourceLocator getSourceLocatorFor(int node);
941 
942   /**
943    * As the DTM is registered with the DTMManager, this method
944    * will be called. This will give the DTM implementation a
945    * chance to initialize any subsystems that are required to
946    * build the DTM
947    */
documentRegistration()948   public void documentRegistration();
949 
950   /**
951    * As documents are released from the DTMManager, the DTM implementation
952    * will be notified of the event. This will allow the DTM implementation
953    * to shutdown any subsystem activity that may of been assoiated with
954    * the active DTM Implementation.
955    */
956 
documentRelease()957    public void documentRelease();
958 
959    /**
960     * Migrate a DTM built with an old DTMManager to a new DTMManager.
961     * After the migration, the new DTMManager will treat the DTM as
962     * one that is built by itself.
963     * This is used to support DTM sharing between multiple transformations.
964     * @param manager the DTMManager
965     */
migrateTo(DTMManager manager)966    public void migrateTo(DTMManager manager);
967 }
968