1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the  "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 /*
19  * $Id: DOMHelper.java 468655 2006-10-28 07:12:06Z minchau $
20  */
21 package org.apache.xml.utils;
22 
23 import java.util.Hashtable;
24 import java.util.Vector;
25 
26 import javax.xml.XMLConstants;
27 import javax.xml.parsers.DocumentBuilder;
28 import javax.xml.parsers.DocumentBuilderFactory;
29 import javax.xml.parsers.ParserConfigurationException;
30 
31 import org.apache.xml.dtm.ref.DTMNodeProxy;
32 import org.apache.xml.res.XMLErrorResources;
33 import org.apache.xml.res.XMLMessages;
34 
35 import org.w3c.dom.Attr;
36 import org.w3c.dom.DOMImplementation;
37 import org.w3c.dom.Document;
38 import org.w3c.dom.DocumentType;
39 import org.w3c.dom.Element;
40 import org.w3c.dom.Entity;
41 import org.w3c.dom.NamedNodeMap;
42 import org.w3c.dom.Node;
43 import org.w3c.dom.Text;
44 
45 /**
46  * @deprecated Since the introduction of the DTM, this class will be removed.
47  * This class provides a front-end to DOM implementations, providing
48  * a number of utility functions that either aren't yet standardized
49  * by the DOM spec or that are defined in optional DOM modules and
50  * hence may not be present in all DOMs.
51  */
52 public class DOMHelper
53 {
54 
55   /**
56    * DOM Level 1 did not have a standard mechanism for creating a new
57    * Document object. This function provides a DOM-implementation-independent
58    * abstraction for that for that concept. It's typically used when
59    * outputting a new DOM as the result of an operation.
60    * <p>
61    * TODO: This isn't directly compatable with DOM Level 2.
62    * The Level 2 createDocument call also creates the root
63    * element, and thus requires that you know what that element will be
64    * before creating the Document. We should think about whether we want
65    * to change this code, and the callers, so we can use the DOM's own
66    * method. (It's also possible that DOM Level 3 may relax this
67    * sequence, but you may give up some intelligence in the DOM by
68    * doing so; the intent was that knowing the document type and root
69    * element might let the DOM automatically switch to a specialized
70    * subclass for particular kinds of documents.)
71    *
72    * @param isSecureProcessing state of the secure processing feature.
73    * @return The newly created DOM Document object, with no children, or
74    * null if we can't find a DOM implementation that permits creating
75    * new empty Documents.
76    */
createDocument(boolean isSecureProcessing)77   public static Document createDocument(boolean isSecureProcessing)
78   {
79 
80     try
81     {
82 
83       // Use an implementation of the JAVA API for XML Parsing 1.0 to
84       // create a DOM Document node to contain the result.
85       DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
86 
87       dfactory.setNamespaceAware(true);
88       // BEGIN android-removed
89       //     If set, DocumentBuilderFactoryImpl.newDocumentBuilder() fails
90       //     because we haven't implemented validation
91       // dfactory.setValidating(true);
92       // BEGIN android-removed
93 
94       // BEGIN android-removed
95       //     We haven't implemented secure processing
96       // if (isSecureProcessing)
97       // {
98       //   try
99       //   {
100       //     dfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
101       //   }
102       //   catch (ParserConfigurationException pce) {}
103       // }
104       // END android-removed
105 
106       DocumentBuilder docBuilder = dfactory.newDocumentBuilder();
107       Document outNode = docBuilder.newDocument();
108 
109       return outNode;
110     }
111     catch (ParserConfigurationException pce)
112     {
113       throw new RuntimeException(
114         XMLMessages.createXMLMessage(
115           XMLErrorResources.ER_CREATEDOCUMENT_NOT_SUPPORTED, null));  //"createDocument() not supported in XPathContext!");
116 
117       // return null;
118     }
119   }
120 
121   /**
122    * DOM Level 1 did not have a standard mechanism for creating a new
123    * Document object. This function provides a DOM-implementation-independent
124    * abstraction for that for that concept. It's typically used when
125    * outputting a new DOM as the result of an operation.
126    *
127    * @return The newly created DOM Document object, with no children, or
128    * null if we can't find a DOM implementation that permits creating
129    * new empty Documents.
130    */
createDocument()131   public static Document createDocument()
132   {
133     return createDocument(false);
134   }
135 
136   /**
137    * Tells, through the combination of the default-space attribute
138    * on xsl:stylesheet, xsl:strip-space, xsl:preserve-space, and the
139    * xml:space attribute, whether or not extra whitespace should be stripped
140    * from the node.  Literal elements from template elements should
141    * <em>not</em> be tested with this function.
142    * @param textNode A text node from the source tree.
143    * @return true if the text node should be stripped of extra whitespace.
144    *
145    * @throws javax.xml.transform.TransformerException
146    * @xsl.usage advanced
147    */
shouldStripSourceNode(Node textNode)148   public boolean shouldStripSourceNode(Node textNode)
149           throws javax.xml.transform.TransformerException
150   {
151 
152     // return (null == m_envSupport) ? false : m_envSupport.shouldStripSourceNode(textNode);
153     return false;
154   }
155 
156   /**
157    * Supports the XPath function GenerateID by returning a unique
158    * identifier string for any given DOM Node.
159    * <p>
160    * Warning: The base implementation uses the Node object's hashCode(),
161    * which is NOT guaranteed to be unique. If that method hasn't been
162    * overridden in this DOM ipmlementation, most Java implementions will
163    * derive it from the object's address and should be OK... but if
164    * your DOM uses a different definition of hashCode (eg hashing the
165    * contents of the subtree), or if your DOM may have multiple objects
166    * that represent a single Node in the data structure (eg via proxying),
167    * you may need to find another way to assign a unique identifier.
168    * <p>
169    * Also, be aware that if nodes are destroyed and recreated, there is
170    * an open issue regarding whether an ID may be reused. Currently
171    * we're assuming that the input document is stable for the duration
172    * of the XPath/XSLT operation, so this shouldn't arise in this context.
173    * <p>
174    * (DOM Level 3 is investigating providing a unique node "key", but
175    * that won't help Level 1 and Level 2 implementations.)
176    *
177    * @param node whose identifier you want to obtain
178    *
179    * @return a string which should be different for every Node object.
180    */
getUniqueID(Node node)181   public String getUniqueID(Node node)
182   {
183     return "N" + Integer.toHexString(node.hashCode()).toUpperCase();
184   }
185 
186   /**
187    * Figure out whether node2 should be considered as being later
188    * in the document than node1, in Document Order as defined
189    * by the XPath model. This may not agree with the ordering defined
190    * by other XML applications.
191    * <p>
192    * There are some cases where ordering isn't defined, and neither are
193    * the results of this function -- though we'll generally return true.
194    *
195    * TODO: Make sure this does the right thing with attribute nodes!!!
196    *
197    * @param node1 DOM Node to perform position comparison on.
198    * @param node2 DOM Node to perform position comparison on .
199    *
200    * @return false if node2 comes before node1, otherwise return true.
201    * You can think of this as
202    * <code>(node1.documentOrderPosition &lt;= node2.documentOrderPosition)</code>.
203    */
isNodeAfter(Node node1, Node node2)204   public static boolean isNodeAfter(Node node1, Node node2)
205   {
206     if (node1 == node2 || isNodeTheSame(node1, node2))
207       return true;
208 
209         // Default return value, if there is no defined ordering
210     boolean isNodeAfter = true;
211 
212     Node parent1 = getParentOfNode(node1);
213     Node parent2 = getParentOfNode(node2);
214 
215     // Optimize for most common case
216     if (parent1 == parent2 || isNodeTheSame(parent1, parent2))  // then we know they are siblings
217     {
218       if (null != parent1)
219         isNodeAfter = isNodeAfterSibling(parent1, node1, node2);
220       else
221       {
222                   // If both parents are null, ordering is not defined.
223                   // We're returning a value in lieu of throwing an exception.
224                   // Not a case we expect to arise in XPath, but beware if you
225                   // try to reuse this method.
226 
227                   // We can just fall through in this case, which allows us
228                   // to hit the debugging code at the end of the function.
229           //return isNodeAfter;
230       }
231     }
232     else
233     {
234 
235       // General strategy: Figure out the lengths of the two
236       // ancestor chains, reconcile the lengths, and look for
237           // the lowest common ancestor. If that ancestor is one of
238           // the nodes being compared, it comes before the other.
239       // Otherwise perform a sibling compare.
240                 //
241                 // NOTE: If no common ancestor is found, ordering is undefined
242                 // and we return the default value of isNodeAfter.
243 
244       // Count parents in each ancestor chain
245       int nParents1 = 2, nParents2 = 2;  // include node & parent obtained above
246 
247       while (parent1 != null)
248       {
249         nParents1++;
250 
251         parent1 = getParentOfNode(parent1);
252       }
253 
254       while (parent2 != null)
255       {
256         nParents2++;
257 
258         parent2 = getParentOfNode(parent2);
259       }
260 
261           // Initially assume scan for common ancestor starts with
262           // the input nodes.
263       Node startNode1 = node1, startNode2 = node2;
264 
265       // If one ancestor chain is longer, adjust its start point
266           // so we're comparing at the same depths
267       if (nParents1 < nParents2)
268       {
269         // Adjust startNode2 to depth of startNode1
270         int adjust = nParents2 - nParents1;
271 
272         for (int i = 0; i < adjust; i++)
273         {
274           startNode2 = getParentOfNode(startNode2);
275         }
276       }
277       else if (nParents1 > nParents2)
278       {
279         // adjust startNode1 to depth of startNode2
280         int adjust = nParents1 - nParents2;
281 
282         for (int i = 0; i < adjust; i++)
283         {
284           startNode1 = getParentOfNode(startNode1);
285         }
286       }
287 
288       Node prevChild1 = null, prevChild2 = null;  // so we can "back up"
289 
290       // Loop up the ancestor chain looking for common parent
291       while (null != startNode1)
292       {
293         if (startNode1 == startNode2 || isNodeTheSame(startNode1, startNode2))  // common parent?
294         {
295           if (null == prevChild1)  // first time in loop?
296           {
297 
298             // Edge condition: one is the ancestor of the other.
299             isNodeAfter = (nParents1 < nParents2) ? true : false;
300 
301             break;  // from while loop
302           }
303           else
304           {
305                         // Compare ancestors below lowest-common as siblings
306             isNodeAfter = isNodeAfterSibling(startNode1, prevChild1,
307                                              prevChild2);
308 
309             break;  // from while loop
310           }
311         }  // end if(startNode1 == startNode2)
312 
313                 // Move up one level and try again
314         prevChild1 = startNode1;
315         startNode1 = getParentOfNode(startNode1);
316         prevChild2 = startNode2;
317         startNode2 = getParentOfNode(startNode2);
318       }  // end while(parents exist to examine)
319     }  // end big else (not immediate siblings)
320 
321         // WARNING: The following diagnostic won't report the early
322         // "same node" case. Fix if/when needed.
323 
324     /* -- please do not remove... very useful for diagnostics --
325     System.out.println("node1 = "+node1.getNodeName()+"("+node1.getNodeType()+")"+
326     ", node2 = "+node2.getNodeName()
327     +"("+node2.getNodeType()+")"+
328     ", isNodeAfter = "+isNodeAfter); */
329     return isNodeAfter;
330   }  // end isNodeAfter(Node node1, Node node2)
331 
332   /**
333    * Use DTMNodeProxy to determine whether two nodes are the same.
334    *
335    * @param node1 The first DOM node to compare.
336    * @param node2 The second DOM node to compare.
337    * @return true if the two nodes are the same.
338    */
isNodeTheSame(Node node1, Node node2)339   public static boolean isNodeTheSame(Node node1, Node node2)
340   {
341     if (node1 instanceof DTMNodeProxy && node2 instanceof DTMNodeProxy)
342       return ((DTMNodeProxy)node1).equals((DTMNodeProxy)node2);
343     else
344       return (node1 == node2);
345   }
346 
347   /**
348    * Figure out if child2 is after child1 in document order.
349    * <p>
350    * Warning: Some aspects of "document order" are not well defined.
351    * For example, the order of attributes is considered
352    * meaningless in XML, and the order reported by our model will
353    * be consistant for a given invocation but may not
354    * match that of either the source file or the serialized output.
355    *
356    * @param parent Must be the parent of both child1 and child2.
357    * @param child1 Must be the child of parent and not equal to child2.
358    * @param child2 Must be the child of parent and not equal to child1.
359    * @return true if child 2 is after child1 in document order.
360    */
isNodeAfterSibling(Node parent, Node child1, Node child2)361   private static boolean isNodeAfterSibling(Node parent, Node child1,
362                                             Node child2)
363   {
364 
365     boolean isNodeAfterSibling = false;
366     short child1type = child1.getNodeType();
367     short child2type = child2.getNodeType();
368 
369     if ((Node.ATTRIBUTE_NODE != child1type)
370             && (Node.ATTRIBUTE_NODE == child2type))
371     {
372 
373       // always sort attributes before non-attributes.
374       isNodeAfterSibling = false;
375     }
376     else if ((Node.ATTRIBUTE_NODE == child1type)
377              && (Node.ATTRIBUTE_NODE != child2type))
378     {
379 
380       // always sort attributes before non-attributes.
381       isNodeAfterSibling = true;
382     }
383     else if (Node.ATTRIBUTE_NODE == child1type)
384     {
385       NamedNodeMap children = parent.getAttributes();
386       int nNodes = children.getLength();
387       boolean found1 = false, found2 = false;
388 
389           // Count from the start until we find one or the other.
390       for (int i = 0; i < nNodes; i++)
391       {
392         Node child = children.item(i);
393 
394         if (child1 == child || isNodeTheSame(child1, child))
395         {
396           if (found2)
397           {
398             isNodeAfterSibling = false;
399 
400             break;
401           }
402 
403           found1 = true;
404         }
405         else if (child2 == child || isNodeTheSame(child2, child))
406         {
407           if (found1)
408           {
409             isNodeAfterSibling = true;
410 
411             break;
412           }
413 
414           found2 = true;
415         }
416       }
417     }
418     else
419     {
420                 // TODO: Check performance of alternate solution:
421                 // There are two choices here: Count from the start of
422                 // the document until we find one or the other, or count
423                 // from one until we find or fail to find the other.
424                 // Either can wind up scanning all the siblings in the worst
425                 // case, which on a wide document can be a lot of work but
426                 // is more typically is a short list.
427                 // Scanning from the start involves two tests per iteration,
428                 // but it isn't clear that scanning from the middle doesn't
429                 // yield more iterations on average.
430                 // We should run some testcases.
431       Node child = parent.getFirstChild();
432       boolean found1 = false, found2 = false;
433 
434       while (null != child)
435       {
436 
437         // Node child = children.item(i);
438         if (child1 == child || isNodeTheSame(child1, child))
439         {
440           if (found2)
441           {
442             isNodeAfterSibling = false;
443 
444             break;
445           }
446 
447           found1 = true;
448         }
449         else if (child2 == child || isNodeTheSame(child2, child))
450         {
451           if (found1)
452           {
453             isNodeAfterSibling = true;
454 
455             break;
456           }
457 
458           found2 = true;
459         }
460 
461         child = child.getNextSibling();
462       }
463     }
464 
465     return isNodeAfterSibling;
466   }  // end isNodeAfterSibling(Node parent, Node child1, Node child2)
467 
468   //==========================================================
469   // SECTION: Namespace resolution
470   //==========================================================
471 
472   /**
473    * Get the depth level of this node in the tree (equals 1 for
474    * a parentless node).
475    *
476    * @param n Node to be examined.
477    * @return the number of ancestors, plus one
478    * @xsl.usage internal
479    */
getLevel(Node n)480   public short getLevel(Node n)
481   {
482 
483     short level = 1;
484 
485     while (null != (n = getParentOfNode(n)))
486     {
487       level++;
488     }
489 
490     return level;
491   }
492 
493   /**
494    * Given an XML Namespace prefix and a context in which the prefix
495    * is to be evaluated, return the Namespace Name this prefix was
496    * bound to. Note that DOM Level 3 is expected to provide a version of
497    * this which deals with the DOM's "early binding" behavior.
498    *
499    * Default handling:
500    *
501    * @param prefix String containing namespace prefix to be resolved,
502    * without the ':' which separates it from the localname when used
503    * in a Node Name. The empty sting signifies the default namespace
504    * at this point in the document.
505    * @param namespaceContext Element which provides context for resolution.
506    * (We could extend this to work for other nodes by first seeking their
507    * nearest Element ancestor.)
508    *
509    * @return a String containing the Namespace URI which this prefix
510    * represents in the specified context.
511    */
getNamespaceForPrefix(String prefix, Element namespaceContext)512   public String getNamespaceForPrefix(String prefix, Element namespaceContext)
513   {
514 
515     int type;
516     Node parent = namespaceContext;
517     String namespace = null;
518 
519     if (prefix.equals("xml"))
520     {
521       namespace = QName.S_XMLNAMESPACEURI; // Hardcoded, per Namespace spec
522     }
523         else if(prefix.equals("xmlns"))
524     {
525           // Hardcoded in the DOM spec, expected to be adopted by
526           // Namespace spec. NOTE: Namespace declarations _must_ use
527           // the xmlns: prefix; other prefixes declared as belonging
528           // to this namespace will not be recognized and should
529           // probably be rejected by parsers as erroneous declarations.
530       namespace = "http://www.w3.org/2000/xmlns/";
531     }
532     else
533     {
534           // Attribute name for this prefix's declaration
535           String declname=(prefix=="")
536                         ? "xmlns"
537                         : "xmlns:"+prefix;
538 
539           // Scan until we run out of Elements or have resolved the namespace
540       while ((null != parent) && (null == namespace)
541              && (((type = parent.getNodeType()) == Node.ELEMENT_NODE)
542                  || (type == Node.ENTITY_REFERENCE_NODE)))
543       {
544         if (type == Node.ELEMENT_NODE)
545         {
546 
547                         // Look for the appropriate Namespace Declaration attribute,
548                         // either "xmlns:prefix" or (if prefix is "") "xmlns".
549                         // TODO: This does not handle "implicit declarations"
550                         // which may be created when the DOM is edited. DOM Level
551                         // 3 will define how those should be interpreted. But
552                         // this issue won't arise in freshly-parsed DOMs.
553 
554                 // NOTE: declname is set earlier, outside the loop.
555                         Attr attr=((Element)parent).getAttributeNode(declname);
556                         if(attr!=null)
557                         {
558                 namespace = attr.getNodeValue();
559                 break;
560                         }
561                 }
562 
563         parent = getParentOfNode(parent);
564       }
565     }
566 
567     return namespace;
568   }
569 
570   /**
571    * An experiment for the moment.
572    */
573   Hashtable m_NSInfos = new Hashtable();
574 
575   /** Object to put into the m_NSInfos table that tells that a node has not been
576    *  processed, but has xmlns namespace decls.  */
577   protected static final NSInfo m_NSInfoUnProcWithXMLNS = new NSInfo(false,
578                                                             true);
579 
580   /** Object to put into the m_NSInfos table that tells that a node has not been
581    *  processed, but has no xmlns namespace decls.  */
582   protected static final NSInfo m_NSInfoUnProcWithoutXMLNS = new NSInfo(false,
583                                                                false);
584 
585   /** Object to put into the m_NSInfos table that tells that a node has not been
586    *  processed, and has no xmlns namespace decls, and has no ancestor decls.  */
587   protected static final NSInfo m_NSInfoUnProcNoAncestorXMLNS =
588     new NSInfo(false, false, NSInfo.ANCESTORNOXMLNS);
589 
590   /** Object to put into the m_NSInfos table that tells that a node has been
591    *  processed, and has xmlns namespace decls.  */
592   protected static final NSInfo m_NSInfoNullWithXMLNS = new NSInfo(true,
593                                                           true);
594 
595   /** Object to put into the m_NSInfos table that tells that a node has been
596    *  processed, and has no xmlns namespace decls.  */
597   protected static final NSInfo m_NSInfoNullWithoutXMLNS = new NSInfo(true,
598                                                              false);
599 
600   /** Object to put into the m_NSInfos table that tells that a node has been
601    *  processed, and has no xmlns namespace decls. and has no ancestor decls.  */
602   protected static final NSInfo m_NSInfoNullNoAncestorXMLNS =
603     new NSInfo(true, false, NSInfo.ANCESTORNOXMLNS);
604 
605   /** Vector of node (odd indexes) and NSInfos (even indexes) that tell if
606    *  the given node is a candidate for ancestor namespace processing.  */
607   protected Vector m_candidateNoAncestorXMLNS = new Vector();
608 
609   /**
610    * Returns the namespace of the given node. Differs from simply getting
611    * the node's prefix and using getNamespaceForPrefix in that it attempts
612    * to cache some of the data in NSINFO objects, to avoid repeated lookup.
613    * TODO: Should we consider moving that logic into getNamespaceForPrefix?
614    *
615    * @param n Node to be examined.
616    *
617    * @return String containing the Namespace Name (uri) for this node.
618    * Note that this is undefined for any nodes other than Elements and
619    * Attributes.
620    */
getNamespaceOfNode(Node n)621   public String getNamespaceOfNode(Node n)
622   {
623 
624     String namespaceOfPrefix;
625     boolean hasProcessedNS;
626     NSInfo nsInfo;
627     short ntype = n.getNodeType();
628 
629     if (Node.ATTRIBUTE_NODE != ntype)
630     {
631       Object nsObj = m_NSInfos.get(n);  // return value
632 
633       nsInfo = (nsObj == null) ? null : (NSInfo) nsObj;
634       hasProcessedNS = (nsInfo == null) ? false : nsInfo.m_hasProcessedNS;
635     }
636     else
637     {
638       hasProcessedNS = false;
639       nsInfo = null;
640     }
641 
642     if (hasProcessedNS)
643     {
644       namespaceOfPrefix = nsInfo.m_namespace;
645     }
646     else
647     {
648       namespaceOfPrefix = null;
649 
650       String nodeName = n.getNodeName();
651       int indexOfNSSep = nodeName.indexOf(':');
652       String prefix;
653 
654       if (Node.ATTRIBUTE_NODE == ntype)
655       {
656         if (indexOfNSSep > 0)
657         {
658           prefix = nodeName.substring(0, indexOfNSSep);
659         }
660         else
661         {
662 
663           // Attributes don't use the default namespace, so if
664           // there isn't a prefix, we're done.
665           return namespaceOfPrefix;
666         }
667       }
668       else
669       {
670         prefix = (indexOfNSSep >= 0)
671                  ? nodeName.substring(0, indexOfNSSep) : "";
672       }
673 
674       boolean ancestorsHaveXMLNS = false;
675       boolean nHasXMLNS = false;
676 
677       if (prefix.equals("xml"))
678       {
679         namespaceOfPrefix = QName.S_XMLNAMESPACEURI;
680       }
681       else
682       {
683         int parentType;
684         Node parent = n;
685 
686         while ((null != parent) && (null == namespaceOfPrefix))
687         {
688           if ((null != nsInfo)
689                   && (nsInfo.m_ancestorHasXMLNSAttrs
690                       == NSInfo.ANCESTORNOXMLNS))
691           {
692             break;
693           }
694 
695           parentType = parent.getNodeType();
696 
697           if ((null == nsInfo) || nsInfo.m_hasXMLNSAttrs)
698           {
699             boolean elementHasXMLNS = false;
700 
701             if (parentType == Node.ELEMENT_NODE)
702             {
703               NamedNodeMap nnm = parent.getAttributes();
704 
705               for (int i = 0; i < nnm.getLength(); i++)
706               {
707                 Node attr = nnm.item(i);
708                 String aname = attr.getNodeName();
709 
710                 if (aname.charAt(0) == 'x')
711                 {
712                   boolean isPrefix = aname.startsWith("xmlns:");
713 
714                   if (aname.equals("xmlns") || isPrefix)
715                   {
716                     if (n == parent)
717                       nHasXMLNS = true;
718 
719                     elementHasXMLNS = true;
720                     ancestorsHaveXMLNS = true;
721 
722                     String p = isPrefix ? aname.substring(6) : "";
723 
724                     if (p.equals(prefix))
725                     {
726                       namespaceOfPrefix = attr.getNodeValue();
727 
728                       break;
729                     }
730                   }
731                 }
732               }
733             }
734 
735             if ((Node.ATTRIBUTE_NODE != parentType) && (null == nsInfo)
736                     && (n != parent))
737             {
738               nsInfo = elementHasXMLNS
739                        ? m_NSInfoUnProcWithXMLNS : m_NSInfoUnProcWithoutXMLNS;
740 
741               m_NSInfos.put(parent, nsInfo);
742             }
743           }
744 
745           if (Node.ATTRIBUTE_NODE == parentType)
746           {
747             parent = getParentOfNode(parent);
748           }
749           else
750           {
751             m_candidateNoAncestorXMLNS.addElement(parent);
752             m_candidateNoAncestorXMLNS.addElement(nsInfo);
753 
754             parent = parent.getParentNode();
755           }
756 
757           if (null != parent)
758           {
759             Object nsObj = m_NSInfos.get(parent);  // return value
760 
761             nsInfo = (nsObj == null) ? null : (NSInfo) nsObj;
762           }
763         }
764 
765         int nCandidates = m_candidateNoAncestorXMLNS.size();
766 
767         if (nCandidates > 0)
768         {
769           if ((false == ancestorsHaveXMLNS) && (null == parent))
770           {
771             for (int i = 0; i < nCandidates; i += 2)
772             {
773               Object candidateInfo = m_candidateNoAncestorXMLNS.elementAt(i
774                                        + 1);
775 
776               if (candidateInfo == m_NSInfoUnProcWithoutXMLNS)
777               {
778                 m_NSInfos.put(m_candidateNoAncestorXMLNS.elementAt(i),
779                               m_NSInfoUnProcNoAncestorXMLNS);
780               }
781               else if (candidateInfo == m_NSInfoNullWithoutXMLNS)
782               {
783                 m_NSInfos.put(m_candidateNoAncestorXMLNS.elementAt(i),
784                               m_NSInfoNullNoAncestorXMLNS);
785               }
786             }
787           }
788 
789           m_candidateNoAncestorXMLNS.removeAllElements();
790         }
791       }
792 
793       if (Node.ATTRIBUTE_NODE != ntype)
794       {
795         if (null == namespaceOfPrefix)
796         {
797           if (ancestorsHaveXMLNS)
798           {
799             if (nHasXMLNS)
800               m_NSInfos.put(n, m_NSInfoNullWithXMLNS);
801             else
802               m_NSInfos.put(n, m_NSInfoNullWithoutXMLNS);
803           }
804           else
805           {
806             m_NSInfos.put(n, m_NSInfoNullNoAncestorXMLNS);
807           }
808         }
809         else
810         {
811           m_NSInfos.put(n, new NSInfo(namespaceOfPrefix, nHasXMLNS));
812         }
813       }
814     }
815 
816     return namespaceOfPrefix;
817   }
818 
819   /**
820    * Returns the local name of the given node. If the node's name begins
821    * with a namespace prefix, this is the part after the colon; otherwise
822    * it's the full node name.
823    *
824    * @param n the node to be examined.
825    *
826    * @return String containing the Local Name
827    */
getLocalNameOfNode(Node n)828   public String getLocalNameOfNode(Node n)
829   {
830 
831     String qname = n.getNodeName();
832     int index = qname.indexOf(':');
833 
834     return (index < 0) ? qname : qname.substring(index + 1);
835   }
836 
837   /**
838    * Returns the element name with the namespace prefix (if any) replaced
839    * by the Namespace URI it was bound to. This is not a standard
840    * representation of a node name, but it allows convenient
841    * single-string comparison of the "universal" names of two nodes.
842    *
843    * @param elem Element to be examined.
844    *
845    * @return String in the form "namespaceURI:localname" if the node
846    * belongs to a namespace, or simply "localname" if it doesn't.
847    * @see #getExpandedAttributeName
848    */
getExpandedElementName(Element elem)849   public String getExpandedElementName(Element elem)
850   {
851 
852     String namespace = getNamespaceOfNode(elem);
853 
854     return (null != namespace)
855            ? namespace + ":" + getLocalNameOfNode(elem)
856            : getLocalNameOfNode(elem);
857   }
858 
859   /**
860    * Returns the attribute name with the namespace prefix (if any) replaced
861    * by the Namespace URI it was bound to. This is not a standard
862    * representation of a node name, but it allows convenient
863    * single-string comparison of the "universal" names of two nodes.
864    *
865    * @param attr Attr to be examined
866    *
867    * @return String in the form "namespaceURI:localname" if the node
868    * belongs to a namespace, or simply "localname" if it doesn't.
869    * @see #getExpandedElementName
870    */
getExpandedAttributeName(Attr attr)871   public String getExpandedAttributeName(Attr attr)
872   {
873 
874     String namespace = getNamespaceOfNode(attr);
875 
876     return (null != namespace)
877            ? namespace + ":" + getLocalNameOfNode(attr)
878            : getLocalNameOfNode(attr);
879   }
880 
881   //==========================================================
882   // SECTION: DOM Helper Functions
883   //==========================================================
884 
885   /**
886    * Tell if the node is ignorable whitespace. Note that this can
887    * be determined only in the context of a DTD or other Schema,
888    * and that DOM Level 2 has nostandardized DOM API which can
889    * return that information.
890    * @deprecated
891    *
892    * @param node Node to be examined
893    *
894    * @return CURRENTLY HARDCODED TO FALSE, but should return true if
895    * and only if the node is of type Text, contains only whitespace,
896    * and does not appear as part of the #PCDATA content of an element.
897    * (Note that determining this last may require allowing for
898    * Entity References.)
899    */
isIgnorableWhitespace(Text node)900   public boolean isIgnorableWhitespace(Text node)
901   {
902 
903     boolean isIgnorable = false;  // return value
904 
905     // TODO: I can probably do something to figure out if this
906     // space is ignorable from just the information in
907     // the DOM tree.
908         // -- You need to be able to distinguish whitespace
909         // that is #PCDATA from whitespace that isn't.  That requires
910         // DTD support, which won't be standardized until DOM Level 3.
911     return isIgnorable;
912   }
913 
914   /**
915    * Get the first unparented node in the ancestor chain.
916    * @deprecated
917    *
918    * @param node Starting node, to specify which chain to chase
919    *
920    * @return the topmost ancestor.
921    */
getRoot(Node node)922   public Node getRoot(Node node)
923   {
924 
925     Node root = null;
926 
927     while (node != null)
928     {
929       root = node;
930       node = getParentOfNode(node);
931     }
932 
933     return root;
934   }
935 
936   /**
937    * Get the root node of the document tree, regardless of
938    * whether or not the node passed in is a document node.
939    * <p>
940    * TODO: This doesn't handle DocumentFragments or "orphaned" subtrees
941    * -- it's currently returning ownerDocument even when the tree is
942    * not actually part of the main Document tree. We should either
943    * rewrite the description to say that it finds the Document node,
944    * or change the code to walk up the ancestor chain.
945 
946    *
947    * @param n Node to be examined
948    *
949    * @return the Document node. Note that this is not the correct answer
950    * if n was (or was a child of) a DocumentFragment or an orphaned node,
951    * as can arise if the DOM has been edited rather than being generated
952    * by a parser.
953    */
getRootNode(Node n)954   public Node getRootNode(Node n)
955   {
956     int nt = n.getNodeType();
957     return ( (Node.DOCUMENT_NODE == nt) || (Node.DOCUMENT_FRAGMENT_NODE == nt) )
958            ? n : n.getOwnerDocument();
959   }
960 
961   /**
962    * Test whether the given node is a namespace decl node. In DOM Level 2
963    * this can be done in a namespace-aware manner, but in Level 1 DOMs
964    * it has to be done by testing the node name.
965    *
966    * @param n Node to be examined.
967    *
968    * @return boolean -- true iff the node is an Attr whose name is
969    * "xmlns" or has the "xmlns:" prefix.
970    */
isNamespaceNode(Node n)971   public boolean isNamespaceNode(Node n)
972   {
973 
974     if (Node.ATTRIBUTE_NODE == n.getNodeType())
975     {
976       String attrName = n.getNodeName();
977 
978       return (attrName.startsWith("xmlns:") || attrName.equals("xmlns"));
979     }
980 
981     return false;
982   }
983 
984   /**
985    * Obtain the XPath-model parent of a DOM node -- ownerElement for Attrs,
986    * parent for other nodes.
987    * <p>
988    * Background: The DOM believes that you must be your Parent's
989    * Child, and thus Attrs don't have parents. XPath said that Attrs
990    * do have their owning Element as their parent. This function
991    * bridges the difference, either by using the DOM Level 2 ownerElement
992    * function or by using a "silly and expensive function" in Level 1
993    * DOMs.
994    * <p>
995    * (There's some discussion of future DOMs generalizing ownerElement
996    * into ownerNode and making it work on all types of nodes. This
997    * still wouldn't help the users of Level 1 or Level 2 DOMs)
998    * <p>
999    *
1000    * @param node Node whose XPath parent we want to obtain
1001    *
1002    * @return the parent of the node, or the ownerElement if it's an
1003    * Attr node, or null if the node is an orphan.
1004    *
1005    * @throws RuntimeException if the Document has no root element.
1006    * This can't arise if the Document was created
1007    * via the DOM Level 2 factory methods, but is possible if other
1008    * mechanisms were used to obtain it
1009    */
getParentOfNode(Node node)1010   public static Node getParentOfNode(Node node) throws RuntimeException
1011   {
1012     Node parent;
1013     short nodeType = node.getNodeType();
1014 
1015     if (Node.ATTRIBUTE_NODE == nodeType)
1016     {
1017       Document doc = node.getOwnerDocument();
1018           /*
1019       TBD:
1020       if(null == doc)
1021       {
1022         throw new RuntimeException(XSLMessages.createXPATHMessage(XPATHErrorResources.ER_CHILD_HAS_NO_OWNER_DOCUMENT, null));//"Attribute child does not have an owner document!");
1023       }
1024       */
1025 
1026           // Given how expensive the tree walk may be, we should first ask
1027           // whether this DOM can answer the question for us. The additional
1028           // test does slow down Level 1 DOMs slightly. DOMHelper2, which
1029           // is currently specialized for Xerces, assumes it can use the
1030           // Level 2 solution. We might want to have an intermediate stage,
1031           // which would assume DOM Level 2 but not assume Xerces.
1032           //
1033           // (Shouldn't have to check whether impl is null in a compliant DOM,
1034           // but let's be paranoid for a moment...)
1035           DOMImplementation impl=doc.getImplementation();
1036           if(impl!=null && impl.hasFeature("Core","2.0"))
1037           {
1038                   parent=((Attr)node).getOwnerElement();
1039                   return parent;
1040           }
1041 
1042           // DOM Level 1 solution, as fallback. Hugely expensive.
1043 
1044       Element rootElem = doc.getDocumentElement();
1045 
1046       if (null == rootElem)
1047       {
1048         throw new RuntimeException(
1049           XMLMessages.createXMLMessage(
1050             XMLErrorResources.ER_CHILD_HAS_NO_OWNER_DOCUMENT_ELEMENT,
1051             null));  //"Attribute child does not have an owner document element!");
1052       }
1053 
1054       parent = locateAttrParent(rootElem, node);
1055 
1056         }
1057     else
1058     {
1059       parent = node.getParentNode();
1060 
1061       // if((Node.DOCUMENT_NODE != nodeType) && (null == parent))
1062       // {
1063       //   throw new RuntimeException("Child does not have parent!");
1064       // }
1065     }
1066 
1067     return parent;
1068   }
1069 
1070   /**
1071    * Given an ID, return the element. This can work only if the document
1072    * is interpreted in the context of a DTD or Schema, since otherwise
1073    * we don't know which attributes are or aren't IDs.
1074    * <p>
1075    * Note that DOM Level 1 had no ability to retrieve this information.
1076    * DOM Level 2 introduced it but does not promise that it will be
1077    * supported in all DOMs; those which can't support it will always
1078    * return null.
1079    * <p>
1080    * TODO: getElementByID is currently unimplemented. Support DOM Level 2?
1081    *
1082    * @param id The unique identifier to be searched for.
1083    * @param doc The document to search within.
1084    * @return CURRENTLY HARDCODED TO NULL, but it should be:
1085    * The node which has this unique identifier, or null if there
1086    * is no such node or this DOM can't reliably recognize it.
1087    */
getElementByID(String id, Document doc)1088   public Element getElementByID(String id, Document doc)
1089   {
1090     return null;
1091   }
1092 
1093   /**
1094    * The getUnparsedEntityURI function returns the URI of the unparsed
1095    * entity with the specified name in the same document as the context
1096    * node (see [3.3 Unparsed Entities]). It returns the empty string if
1097    * there is no such entity.
1098    * <p>
1099    * XML processors may choose to use the System Identifier (if one
1100    * is provided) to resolve the entity, rather than the URI in the
1101    * Public Identifier. The details are dependent on the processor, and
1102    * we would have to support some form of plug-in resolver to handle
1103    * this properly. Currently, we simply return the System Identifier if
1104    * present, and hope that it a usable URI or that our caller can
1105    * map it to one.
1106    * TODO: Resolve Public Identifiers... or consider changing function name.
1107    * <p>
1108    * If we find a relative URI
1109    * reference, XML expects it to be resolved in terms of the base URI
1110    * of the document. The DOM doesn't do that for us, and it isn't
1111    * entirely clear whether that should be done here; currently that's
1112    * pushed up to a higher levelof our application. (Note that DOM Level
1113    * 1 didn't store the document's base URI.)
1114    * TODO: Consider resolving Relative URIs.
1115    * <p>
1116    * (The DOM's statement that "An XML processor may choose to
1117    * completely expand entities before the structure model is passed
1118    * to the DOM" refers only to parsed entities, not unparsed, and hence
1119    * doesn't affect this function.)
1120    *
1121    * @param name A string containing the Entity Name of the unparsed
1122    * entity.
1123    * @param doc Document node for the document to be searched.
1124    *
1125    * @return String containing the URI of the Unparsed Entity, or an
1126    * empty string if no such entity exists.
1127    */
getUnparsedEntityURI(String name, Document doc)1128   public String getUnparsedEntityURI(String name, Document doc)
1129   {
1130 
1131     String url = "";
1132     DocumentType doctype = doc.getDoctype();
1133 
1134     if (null != doctype)
1135     {
1136       NamedNodeMap entities = doctype.getEntities();
1137       if(null == entities)
1138         return url;
1139       Entity entity = (Entity) entities.getNamedItem(name);
1140       if(null == entity)
1141         return url;
1142 
1143       String notationName = entity.getNotationName();
1144 
1145       if (null != notationName)  // then it's unparsed
1146       {
1147         // The draft says: "The XSLT processor may use the public
1148         // identifier to generate a URI for the entity instead of the URI
1149         // specified in the system identifier. If the XSLT processor does
1150         // not use the public identifier to generate the URI, it must use
1151         // the system identifier; if the system identifier is a relative
1152         // URI, it must be resolved into an absolute URI using the URI of
1153         // the resource containing the entity declaration as the base
1154         // URI [RFC2396]."
1155         // So I'm falling a bit short here.
1156         url = entity.getSystemId();
1157 
1158         if (null == url)
1159         {
1160           url = entity.getPublicId();
1161         }
1162         else
1163         {
1164           // This should be resolved to an absolute URL, but that's hard
1165           // to do from here.
1166         }
1167       }
1168     }
1169 
1170     return url;
1171   }
1172 
1173   /**
1174    * Support for getParentOfNode; walks a DOM tree until it finds
1175    * the Element which owns the Attr. This is hugely expensive, and
1176    * if at all possible you should use the DOM Level 2 Attr.ownerElement()
1177    * method instead.
1178    *  <p>
1179    * The DOM Level 1 developers expected that folks would keep track
1180    * of the last Element they'd seen and could recover the info from
1181    * that source. Obviously that doesn't work very well if the only
1182    * information you've been presented with is the Attr. The DOM Level 2
1183    * getOwnerElement() method fixes that, but only for Level 2 and
1184    * later DOMs.
1185    *
1186    * @param elem Element whose subtree is to be searched for this Attr
1187    * @param attr Attr whose owner is to be located.
1188    *
1189    * @return the first Element whose attribute list includes the provided
1190    * attr. In modern DOMs, this will also be the only such Element. (Early
1191    * DOMs had some hope that Attrs might be sharable, but this idea has
1192    * been abandoned.)
1193    */
locateAttrParent(Element elem, Node attr)1194   private static Node locateAttrParent(Element elem, Node attr)
1195   {
1196 
1197     Node parent = null;
1198 
1199         // This should only be called for Level 1 DOMs, so we don't have to
1200         // worry about namespace issues. In later levels, it's possible
1201         // for a DOM to have two Attrs with the same NodeName but
1202         // different namespaces, and we'd need to get getAttributeNodeNS...
1203         // but later levels also have Attr.getOwnerElement.
1204         Attr check=elem.getAttributeNode(attr.getNodeName());
1205         if(check==attr)
1206                 parent = elem;
1207 
1208     if (null == parent)
1209     {
1210       for (Node node = elem.getFirstChild(); null != node;
1211               node = node.getNextSibling())
1212       {
1213         if (Node.ELEMENT_NODE == node.getNodeType())
1214         {
1215           parent = locateAttrParent((Element) node, attr);
1216 
1217           if (null != parent)
1218             break;
1219         }
1220       }
1221     }
1222 
1223     return parent;
1224   }
1225 
1226   /**
1227    * The factory object used for creating nodes
1228    * in the result tree.
1229    */
1230   protected Document m_DOMFactory = null;
1231 
1232   /**
1233    * Store the factory object required to create DOM nodes
1234    * in the result tree. In fact, that's just the result tree's
1235    * Document node...
1236    *
1237    * @param domFactory The DOM Document Node within whose context
1238    * the result tree will be built.
1239    */
setDOMFactory(Document domFactory)1240   public void setDOMFactory(Document domFactory)
1241   {
1242     this.m_DOMFactory = domFactory;
1243   }
1244 
1245   /**
1246    * Retrieve the factory object required to create DOM nodes
1247    * in the result tree.
1248    *
1249    * @return The result tree's DOM Document Node.
1250    */
getDOMFactory()1251   public Document getDOMFactory()
1252   {
1253 
1254     if (null == this.m_DOMFactory)
1255     {
1256       this.m_DOMFactory = createDocument();
1257     }
1258 
1259     return this.m_DOMFactory;
1260   }
1261 
1262   /**
1263    * Get the textual contents of the node. See
1264    * getNodeData(Node,FastStringBuffer) for discussion of how
1265    * whitespace nodes are handled.
1266    *
1267    * @param node DOM Node to be examined
1268    * @return String containing a concatenation of all the
1269    * textual content within that node.
1270    * @see #getNodeData(Node,FastStringBuffer)
1271    *
1272    */
getNodeData(Node node)1273   public static String getNodeData(Node node)
1274   {
1275 
1276     FastStringBuffer buf = StringBufferPool.get();
1277     String s;
1278 
1279     try
1280     {
1281       getNodeData(node, buf);
1282 
1283       s = (buf.length() > 0) ? buf.toString() : "";
1284     }
1285     finally
1286     {
1287       StringBufferPool.free(buf);
1288     }
1289 
1290     return s;
1291   }
1292 
1293   /**
1294    * Retrieve the text content of a DOM subtree, appending it into a
1295    * user-supplied FastStringBuffer object. Note that attributes are
1296    * not considered part of the content of an element.
1297    * <p>
1298    * There are open questions regarding whitespace stripping.
1299    * Currently we make no special effort in that regard, since the standard
1300    * DOM doesn't yet provide DTD-based information to distinguish
1301    * whitespace-in-element-context from genuine #PCDATA. Note that we
1302    * should probably also consider xml:space if/when we address this.
1303    * DOM Level 3 may solve the problem for us.
1304    *
1305    * @param node Node whose subtree is to be walked, gathering the
1306    * contents of all Text or CDATASection nodes.
1307    * @param buf FastStringBuffer into which the contents of the text
1308    * nodes are to be concatenated.
1309    */
getNodeData(Node node, FastStringBuffer buf)1310   public static void getNodeData(Node node, FastStringBuffer buf)
1311   {
1312 
1313     switch (node.getNodeType())
1314     {
1315     case Node.DOCUMENT_FRAGMENT_NODE :
1316     case Node.DOCUMENT_NODE :
1317     case Node.ELEMENT_NODE :
1318     {
1319       for (Node child = node.getFirstChild(); null != child;
1320               child = child.getNextSibling())
1321       {
1322         getNodeData(child, buf);
1323       }
1324     }
1325     break;
1326     case Node.TEXT_NODE :
1327     case Node.CDATA_SECTION_NODE :
1328       buf.append(node.getNodeValue());
1329       break;
1330     case Node.ATTRIBUTE_NODE :
1331       buf.append(node.getNodeValue());
1332       break;
1333     case Node.PROCESSING_INSTRUCTION_NODE :
1334       // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
1335       break;
1336     default :
1337       // ignore
1338       break;
1339     }
1340   }
1341 }
1342