1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the  "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 /*
19  * $Id: ToTextStream.java 468654 2006-10-28 07:09:23Z minchau $
20  */
21 package org.apache.xml.serializer;
22 
23 import java.io.IOException;
24 
25 import org.apache.xml.serializer.utils.MsgKey;
26 import org.apache.xml.serializer.utils.Utils;
27 import org.xml.sax.Attributes;
28 import org.xml.sax.SAXException;
29 
30 /**
31  * This class is not a public API.
32  * It is only public because it is used in other packages.
33  * This class converts SAX or SAX-like calls to a
34  * serialized document for xsl:output method of "text".
35  * @xsl.usage internal
36  */
37 public class ToTextStream extends ToStream
38 {
39 
40 
41   /**
42    * Default constructor.
43    */
ToTextStream()44   public ToTextStream()
45   {
46     super();
47   }
48 
49 
50 
51   /**
52    * Receive notification of the beginning of a document.
53    *
54    * <p>The SAX parser will invoke this method only once, before any
55    * other methods in this interface or in DTDHandler (except for
56    * setDocumentLocator).</p>
57    *
58    * @throws org.xml.sax.SAXException Any SAX exception, possibly
59    *            wrapping another exception.
60    *
61    * @throws org.xml.sax.SAXException
62    */
startDocumentInternal()63   protected void startDocumentInternal() throws org.xml.sax.SAXException
64   {
65     super.startDocumentInternal();
66 
67     m_needToCallStartDocument = false;
68 
69     // No action for the moment.
70   }
71 
72   /**
73    * Receive notification of the end of a document.
74    *
75    * <p>The SAX parser will invoke this method only once, and it will
76    * be the last method invoked during the parse.  The parser shall
77    * not invoke this method until it has either abandoned parsing
78    * (because of an unrecoverable error) or reached the end of
79    * input.</p>
80    *
81    * @throws org.xml.sax.SAXException Any SAX exception, possibly
82    *            wrapping another exception.
83    *
84    * @throws org.xml.sax.SAXException
85    */
endDocument()86   public void endDocument() throws org.xml.sax.SAXException
87   {
88     flushPending();
89     flushWriter();
90     if (m_tracer != null)
91         super.fireEndDoc();
92   }
93 
94   /**
95    * Receive notification of the beginning of an element.
96    *
97    * <p>The Parser will invoke this method at the beginning of every
98    * element in the XML document; there will be a corresponding
99    * endElement() event for every startElement() event (even when the
100    * element is empty). All of the element's content will be
101    * reported, in order, before the corresponding endElement()
102    * event.</p>
103    *
104    * <p>If the element name has a namespace prefix, the prefix will
105    * still be attached.  Note that the attribute list provided will
106    * contain only attributes with explicit values (specified or
107    * defaulted): #IMPLIED attributes will be omitted.</p>
108    *
109    *
110    * @param namespaceURI The Namespace URI, or the empty string if the
111    *        element has no Namespace URI or if Namespace
112    *        processing is not being performed.
113    * @param localName The local name (without prefix), or the
114    *        empty string if Namespace processing is not being
115    *        performed.
116    * @param name The qualified name (with prefix), or the
117    *        empty string if qualified names are not available.
118    * @param atts The attributes attached to the element, if any.
119    * @throws org.xml.sax.SAXException Any SAX exception, possibly
120    *            wrapping another exception.
121    * @see #endElement
122    * @see org.xml.sax.AttributeList
123    *
124    * @throws org.xml.sax.SAXException
125    */
startElement( String namespaceURI, String localName, String name, Attributes atts)126   public void startElement(
127           String namespaceURI, String localName, String name, Attributes atts)
128             throws org.xml.sax.SAXException
129   {
130     // time to fire off startElement event
131     if (m_tracer != null) {
132         super.fireStartElem(name);
133         this.firePseudoAttributes();
134     }
135     return;
136   }
137 
138   /**
139    * Receive notification of the end of an element.
140    *
141    * <p>The SAX parser will invoke this method at the end of every
142    * element in the XML document; there will be a corresponding
143    * startElement() event for every endElement() event (even when the
144    * element is empty).</p>
145    *
146    * <p>If the element name has a namespace prefix, the prefix will
147    * still be attached to the name.</p>
148    *
149    *
150    * @param namespaceURI The Namespace URI, or the empty string if the
151    *        element has no Namespace URI or if Namespace
152    *        processing is not being performed.
153    * @param localName The local name (without prefix), or the
154    *        empty string if Namespace processing is not being
155    *        performed.
156    * @param name The qualified name (with prefix), or the
157    *        empty string if qualified names are not available.
158    * @throws org.xml.sax.SAXException Any SAX exception, possibly
159    *            wrapping another exception.
160    *
161    * @throws org.xml.sax.SAXException
162    */
endElement(String namespaceURI, String localName, String name)163   public void endElement(String namespaceURI, String localName, String name)
164           throws org.xml.sax.SAXException
165   {
166         if (m_tracer != null)
167             super.fireEndElem(name);
168   }
169 
170   /**
171    * Receive notification of character data.
172    *
173    * <p>The Parser will call this method to report each chunk of
174    * character data.  SAX parsers may return all contiguous character
175    * data in a single chunk, or they may split it into several
176    * chunks; however, all of the characters in any single event
177    * must come from the same external entity, so that the Locator
178    * provides useful information.</p>
179    *
180    * <p>The application must not attempt to read from the array
181    * outside of the specified range.</p>
182    *
183    * <p>Note that some parsers will report whitespace using the
184    * ignorableWhitespace() method rather than this one (validating
185    * parsers must do so).</p>
186    *
187    * @param ch The characters from the XML document.
188    * @param start The start position in the array.
189    * @param length The number of characters to read from the array.
190    * @throws org.xml.sax.SAXException Any SAX exception, possibly
191    *            wrapping another exception.
192    * @see #ignorableWhitespace
193    * @see org.xml.sax.Locator
194    */
characters(char ch[], int start, int length)195   public void characters(char ch[], int start, int length)
196           throws org.xml.sax.SAXException
197   {
198 
199     flushPending();
200 
201     try
202     {
203         if (inTemporaryOutputState()) {
204             /* leave characters un-processed as we are
205              * creating temporary output, the output generated by
206              * this serializer will be input to a final serializer
207              * later on and it will do the processing in final
208              * output state (not temporary output state).
209              *
210              * A "temporary" ToTextStream serializer is used to
211              * evaluate attribute value templates (for example),
212              * and the result of evaluating such a thing
213              * is fed into a final serializer later on.
214              */
215             m_writer.write(ch, start, length);
216         }
217         else {
218             // In final output state we do process the characters!
219             writeNormalizedChars(ch, start, length, m_lineSepUse);
220         }
221 
222         if (m_tracer != null)
223             super.fireCharEvent(ch, start, length);
224     }
225     catch(IOException ioe)
226     {
227       throw new SAXException(ioe);
228     }
229   }
230 
231   /**
232    * If available, when the disable-output-escaping attribute is used,
233    * output raw text without escaping.
234    *
235    * @param ch The characters from the XML document.
236    * @param start The start position in the array.
237    * @param length The number of characters to read from the array.
238    *
239    * @throws org.xml.sax.SAXException Any SAX exception, possibly
240    *            wrapping another exception.
241    */
charactersRaw(char ch[], int start, int length)242   public void charactersRaw(char ch[], int start, int length)
243           throws org.xml.sax.SAXException
244   {
245 
246     try
247     {
248       writeNormalizedChars(ch, start, length, m_lineSepUse);
249     }
250     catch(IOException ioe)
251     {
252       throw new SAXException(ioe);
253     }
254   }
255 
256     /**
257      * Normalize the characters, but don't escape.  Different from
258      * SerializerToXML#writeNormalizedChars because it does not attempt to do
259      * XML escaping at all.
260      *
261      * @param ch The characters from the XML document.
262      * @param start The start position in the array.
263      * @param length The number of characters to read from the array.
264      * @param useLineSep true if the operating systems
265      * end-of-line separator should be output rather than a new-line character.
266      *
267      * @throws IOException
268      * @throws org.xml.sax.SAXException
269      */
writeNormalizedChars( final char ch[], final int start, final int length, final boolean useLineSep)270     void writeNormalizedChars(
271         final char ch[],
272             final int start,
273             final int length,
274             final boolean useLineSep)
275             throws IOException, org.xml.sax.SAXException
276     {
277         final String encoding = getEncoding();
278         final java.io.Writer writer = m_writer;
279         final int end = start + length;
280 
281         /* copy a few "constants" before the loop for performance */
282         final char S_LINEFEED = CharInfo.S_LINEFEED;
283 
284         // This for() loop always increments i by one at the end
285         // of the loop.  Additional increments of i adjust for when
286         // two input characters (a high/low UTF16 surrogate pair)
287         // are processed.
288         for (int i = start; i < end; i++) {
289             final char c = ch[i];
290 
291             if (S_LINEFEED == c && useLineSep) {
292                 writer.write(m_lineSep, 0, m_lineSepLen);
293                 // one input char processed
294             } else if (m_encodingInfo.isInEncoding(c)) {
295                 writer.write(c);
296                 // one input char processed
297             } else if (Encodings.isHighUTF16Surrogate(c)) {
298                 final int codePoint = writeUTF16Surrogate(c, ch, i, end);
299                 if (codePoint != 0) {
300                     // I think we can just emit the message,
301                     // not crash and burn.
302                     final String integralValue = Integer.toString(codePoint);
303                     final String msg = Utils.messages.createMessage(
304                         MsgKey.ER_ILLEGAL_CHARACTER,
305                         new Object[] { integralValue, encoding });
306 
307                     //Older behavior was to throw the message,
308                     //but newer gentler behavior is to write a message to System.err
309                     //throw new SAXException(msg);
310                     System.err.println(msg);
311 
312                 }
313                 i++; // two input chars processed
314             } else {
315                 // Don't know what to do with this char, it is
316                 // not in the encoding and not a high char in
317                 // a surrogate pair, so write out as an entity ref
318                 if (encoding != null) {
319                     /* The output encoding is known,
320                      * so somthing is wrong.
321                      */
322 
323                     // not in the encoding, so write out a character reference
324                     writer.write('&');
325                     writer.write('#');
326                     writer.write(Integer.toString(c));
327                     writer.write(';');
328 
329                     // I think we can just emit the message,
330                     // not crash and burn.
331                     final String integralValue = Integer.toString(c);
332                     final String msg = Utils.messages.createMessage(
333                         MsgKey.ER_ILLEGAL_CHARACTER,
334                         new Object[] { integralValue, encoding });
335 
336                     //Older behavior was to throw the message,
337                     //but newer gentler behavior is to write a message to System.err
338                     //throw new SAXException(msg);
339                     System.err.println(msg);
340                 } else {
341                     /* The output encoding is not known,
342                      * so just write it out as-is.
343                      */
344                     writer.write(c);
345                 }
346 
347                 // one input char was processed
348             }
349         }
350     }
351 
352   /**
353    * Receive notification of cdata.
354    *
355    * <p>The Parser will call this method to report each chunk of
356    * character data.  SAX parsers may return all contiguous character
357    * data in a single chunk, or they may split it into several
358    * chunks; however, all of the characters in any single event
359    * must come from the same external entity, so that the Locator
360    * provides useful information.</p>
361    *
362    * <p>The application must not attempt to read from the array
363    * outside of the specified range.</p>
364    *
365    * <p>Note that some parsers will report whitespace using the
366    * ignorableWhitespace() method rather than this one (validating
367    * parsers must do so).</p>
368    *
369    * @param ch The characters from the XML document.
370    * @param start The start position in the array.
371    * @param length The number of characters to read from the array.
372    * @throws org.xml.sax.SAXException Any SAX exception, possibly
373    *            wrapping another exception.
374    * @see #ignorableWhitespace
375    * @see org.xml.sax.Locator
376    */
cdata(char ch[], int start, int length)377   public void cdata(char ch[], int start, int length)
378           throws org.xml.sax.SAXException
379   {
380     try
381     {
382         writeNormalizedChars(ch, start, length, m_lineSepUse);
383         if (m_tracer != null)
384             super.fireCDATAEvent(ch, start, length);
385     }
386     catch(IOException ioe)
387     {
388       throw new SAXException(ioe);
389     }
390   }
391 
392   /**
393    * Receive notification of ignorable whitespace in element content.
394    *
395    * <p>Validating Parsers must use this method to report each chunk
396    * of ignorable whitespace (see the W3C XML 1.0 recommendation,
397    * section 2.10): non-validating parsers may also use this method
398    * if they are capable of parsing and using content models.</p>
399    *
400    * <p>SAX parsers may return all contiguous whitespace in a single
401    * chunk, or they may split it into several chunks; however, all of
402    * the characters in any single event must come from the same
403    * external entity, so that the Locator provides useful
404    * information.</p>
405    *
406    * <p>The application must not attempt to read from the array
407    * outside of the specified range.</p>
408    *
409    * @param ch The characters from the XML document.
410    * @param start The start position in the array.
411    * @param length The number of characters to read from the array.
412    * @throws org.xml.sax.SAXException Any SAX exception, possibly
413    *            wrapping another exception.
414    * @see #characters
415    *
416    * @throws org.xml.sax.SAXException
417    */
ignorableWhitespace(char ch[], int start, int length)418   public void ignorableWhitespace(char ch[], int start, int length)
419           throws org.xml.sax.SAXException
420   {
421 
422     try
423     {
424       writeNormalizedChars(ch, start, length, m_lineSepUse);
425     }
426     catch(IOException ioe)
427     {
428       throw new SAXException(ioe);
429     }
430   }
431 
432   /**
433    * Receive notification of a processing instruction.
434    *
435    * <p>The Parser will invoke this method once for each processing
436    * instruction found: note that processing instructions may occur
437    * before or after the main document element.</p>
438    *
439    * <p>A SAX parser should never report an XML declaration (XML 1.0,
440    * section 2.8) or a text declaration (XML 1.0, section 4.3.1)
441    * using this method.</p>
442    *
443    * @param target The processing instruction target.
444    * @param data The processing instruction data, or null if
445    *        none was supplied.
446    * @throws org.xml.sax.SAXException Any SAX exception, possibly
447    *            wrapping another exception.
448    *
449    * @throws org.xml.sax.SAXException
450    */
processingInstruction(String target, String data)451   public void processingInstruction(String target, String data)
452           throws org.xml.sax.SAXException
453   {
454     // flush anything pending first
455     flushPending();
456 
457     if (m_tracer != null)
458         super.fireEscapingEvent(target, data);
459   }
460 
461   /**
462    * Called when a Comment is to be constructed.
463    * Note that Xalan will normally invoke the other version of this method.
464    * %REVIEW% In fact, is this one ever needed, or was it a mistake?
465    *
466    * @param   data  The comment data.
467    * @throws org.xml.sax.SAXException Any SAX exception, possibly
468    *            wrapping another exception.
469    */
comment(String data)470   public void comment(String data) throws org.xml.sax.SAXException
471   {
472       final int length = data.length();
473       if (length > m_charsBuff.length)
474       {
475           m_charsBuff = new char[length*2 + 1];
476       }
477       data.getChars(0, length, m_charsBuff, 0);
478       comment(m_charsBuff, 0, length);
479   }
480 
481   /**
482    * Report an XML comment anywhere in the document.
483    *
484    * This callback will be used for comments inside or outside the
485    * document element, including comments in the external DTD
486    * subset (if read).
487    *
488    * @param ch An array holding the characters in the comment.
489    * @param start The starting position in the array.
490    * @param length The number of characters to use from the array.
491    * @throws org.xml.sax.SAXException The application may raise an exception.
492    */
comment(char ch[], int start, int length)493   public void comment(char ch[], int start, int length)
494           throws org.xml.sax.SAXException
495   {
496 
497     flushPending();
498     if (m_tracer != null)
499         super.fireCommentEvent(ch, start, length);
500   }
501 
502   /**
503    * Receive notivication of a entityReference.
504    *
505    * @param name non-null reference to the name of the entity.
506    *
507    * @throws org.xml.sax.SAXException
508    */
entityReference(String name)509   public void entityReference(String name) throws org.xml.sax.SAXException
510   {
511         if (m_tracer != null)
512             super.fireEntityReference(name);
513   }
514 
515     /**
516      * @see ExtendedContentHandler#addAttribute(String, String, String, String, String)
517      */
addAttribute( String uri, String localName, String rawName, String type, String value, boolean XSLAttribute)518     public void addAttribute(
519         String uri,
520         String localName,
521         String rawName,
522         String type,
523         String value,
524         boolean XSLAttribute)
525     {
526         // do nothing, just forget all about the attribute
527     }
528 
529     /**
530      * @see org.xml.sax.ext.LexicalHandler#endCDATA()
531      */
endCDATA()532     public void endCDATA() throws SAXException
533     {
534         // do nothing
535     }
536 
537     /**
538      * @see ExtendedContentHandler#endElement(String)
539      */
endElement(String elemName)540     public void endElement(String elemName) throws SAXException
541     {
542         if (m_tracer != null)
543             super.fireEndElem(elemName);
544     }
545 
546     /**
547      * From XSLTC
548      */
startElement( String elementNamespaceURI, String elementLocalName, String elementName)549     public void startElement(
550     String elementNamespaceURI,
551     String elementLocalName,
552     String elementName)
553     throws SAXException
554     {
555         if (m_needToCallStartDocument)
556             startDocumentInternal();
557         // time to fire off startlement event.
558         if (m_tracer != null) {
559             super.fireStartElem(elementName);
560             this.firePseudoAttributes();
561         }
562 
563         return;
564     }
565 
566 
567     /**
568      * From XSLTC
569      */
characters(String characters)570     public void characters(String characters)
571     throws SAXException
572     {
573         final int length = characters.length();
574         if (length > m_charsBuff.length)
575         {
576             m_charsBuff = new char[length*2 + 1];
577         }
578         characters.getChars(0, length, m_charsBuff, 0);
579         characters(m_charsBuff, 0, length);
580     }
581 
582 
583     /**
584      * From XSLTC
585      */
addAttribute(String name, String value)586     public void addAttribute(String name, String value)
587     {
588         // do nothing, forget about the attribute
589     }
590 
591     /**
592      * Add a unique attribute
593      */
addUniqueAttribute(String qName, String value, int flags)594     public void addUniqueAttribute(String qName, String value, int flags)
595         throws SAXException
596     {
597         // do nothing, forget about the attribute
598     }
599 
startPrefixMapping( String prefix, String uri, boolean shouldFlush)600     public boolean startPrefixMapping(
601         String prefix,
602         String uri,
603         boolean shouldFlush)
604         throws SAXException
605     {
606         // no namespace support for HTML
607         return false;
608     }
609 
610 
startPrefixMapping(String prefix, String uri)611     public void startPrefixMapping(String prefix, String uri)
612         throws org.xml.sax.SAXException
613     {
614         // no namespace support for HTML
615     }
616 
617 
namespaceAfterStartElement( final String prefix, final String uri)618     public void namespaceAfterStartElement(
619         final String prefix,
620         final String uri)
621         throws SAXException
622     {
623         // no namespace support for HTML
624     }
625 
flushPending()626     public void flushPending() throws org.xml.sax.SAXException
627     {
628             if (m_needToCallStartDocument)
629             {
630                 startDocumentInternal();
631                 m_needToCallStartDocument = false;
632             }
633     }
634 }
635