1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the  "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 /*
19  * $Id: ToHTMLStream.java 468654 2006-10-28 07:09:23Z minchau $
20  */
21 package org.apache.xml.serializer;
22 
23 import java.io.IOException;
24 import java.util.Properties;
25 
26 import javax.xml.transform.Result;
27 
28 import org.apache.xml.serializer.utils.MsgKey;
29 import org.apache.xml.serializer.utils.Utils;
30 import org.xml.sax.Attributes;
31 import org.xml.sax.SAXException;
32 
33 /**
34  * This serializer takes a series of SAX or
35  * SAX-like events and writes its output
36  * to the given stream.
37  *
38  * This class is not a public API, it is public
39  * because it is used from another package.
40  *
41  * @xsl.usage internal
42  */
43 public class ToHTMLStream extends ToStream
44 {
45 
46     /** This flag is set while receiving events from the DTD */
47     protected boolean m_inDTD = false;
48 
49     /** True if the current element is a block element.  (seems like
50      *  this needs to be a stack. -sb). */
51     private boolean m_inBlockElem = false;
52 
53     /**
54      * Map that tells which XML characters should have special treatment, and it
55      *  provides character to entity name lookup.
56      */
57     private final CharInfo m_htmlcharInfo =
58 //        new CharInfo(CharInfo.HTML_ENTITIES_RESOURCE);
59         CharInfo.getCharInfo(CharInfo.HTML_ENTITIES_RESOURCE, Method.HTML);
60 
61     /** A digital search trie for fast, case insensitive lookup of ElemDesc objects. */
62     static final Trie m_elementFlags = new Trie();
63 
64     static {
65         initTagReference(m_elementFlags);
66     }
initTagReference(Trie m_elementFlags)67     static void initTagReference(Trie m_elementFlags) {
68 
69         // HTML 4.0 loose DTD
70         m_elementFlags.put("BASEFONT", new ElemDesc(0 | ElemDesc.EMPTY));
71         m_elementFlags.put(
72             "FRAME",
73             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
74         m_elementFlags.put("FRAMESET", new ElemDesc(0 | ElemDesc.BLOCK));
75         m_elementFlags.put("NOFRAMES", new ElemDesc(0 | ElemDesc.BLOCK));
76         m_elementFlags.put(
77             "ISINDEX",
78             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
79         m_elementFlags.put(
80             "APPLET",
81             new ElemDesc(0 | ElemDesc.WHITESPACESENSITIVE));
82         m_elementFlags.put("CENTER", new ElemDesc(0 | ElemDesc.BLOCK));
83         m_elementFlags.put("DIR", new ElemDesc(0 | ElemDesc.BLOCK));
84         m_elementFlags.put("MENU", new ElemDesc(0 | ElemDesc.BLOCK));
85 
86         // HTML 4.0 strict DTD
87         m_elementFlags.put("TT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
88         m_elementFlags.put("I", new ElemDesc(0 | ElemDesc.FONTSTYLE));
89         m_elementFlags.put("B", new ElemDesc(0 | ElemDesc.FONTSTYLE));
90         m_elementFlags.put("BIG", new ElemDesc(0 | ElemDesc.FONTSTYLE));
91         m_elementFlags.put("SMALL", new ElemDesc(0 | ElemDesc.FONTSTYLE));
92         m_elementFlags.put("EM", new ElemDesc(0 | ElemDesc.PHRASE));
93         m_elementFlags.put("STRONG", new ElemDesc(0 | ElemDesc.PHRASE));
94         m_elementFlags.put("DFN", new ElemDesc(0 | ElemDesc.PHRASE));
95         m_elementFlags.put("CODE", new ElemDesc(0 | ElemDesc.PHRASE));
96         m_elementFlags.put("SAMP", new ElemDesc(0 | ElemDesc.PHRASE));
97         m_elementFlags.put("KBD", new ElemDesc(0 | ElemDesc.PHRASE));
98         m_elementFlags.put("VAR", new ElemDesc(0 | ElemDesc.PHRASE));
99         m_elementFlags.put("CITE", new ElemDesc(0 | ElemDesc.PHRASE));
100         m_elementFlags.put("ABBR", new ElemDesc(0 | ElemDesc.PHRASE));
101         m_elementFlags.put("ACRONYM", new ElemDesc(0 | ElemDesc.PHRASE));
102         m_elementFlags.put(
103             "SUP",
104             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
105         m_elementFlags.put(
106             "SUB",
107             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
108         m_elementFlags.put(
109             "SPAN",
110             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
111         m_elementFlags.put(
112             "BDO",
113             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
114         m_elementFlags.put(
115             "BR",
116             new ElemDesc(
117                 0
118                     | ElemDesc.SPECIAL
119                     | ElemDesc.ASPECIAL
120                     | ElemDesc.EMPTY
121                     | ElemDesc.BLOCK));
122         m_elementFlags.put("BODY", new ElemDesc(0 | ElemDesc.BLOCK));
123         m_elementFlags.put(
124             "ADDRESS",
125             new ElemDesc(
126                 0
127                     | ElemDesc.BLOCK
128                     | ElemDesc.BLOCKFORM
129                     | ElemDesc.BLOCKFORMFIELDSET));
130         m_elementFlags.put(
131             "DIV",
132             new ElemDesc(
133                 0
134                     | ElemDesc.BLOCK
135                     | ElemDesc.BLOCKFORM
136                     | ElemDesc.BLOCKFORMFIELDSET));
137         m_elementFlags.put("A", new ElemDesc(0 | ElemDesc.SPECIAL));
138         m_elementFlags.put(
139             "MAP",
140             new ElemDesc(
141                 0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL | ElemDesc.BLOCK));
142         m_elementFlags.put(
143             "AREA",
144             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
145         m_elementFlags.put(
146             "LINK",
147             new ElemDesc(
148                 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
149         m_elementFlags.put(
150             "IMG",
151             new ElemDesc(
152                 0
153                     | ElemDesc.SPECIAL
154                     | ElemDesc.ASPECIAL
155                     | ElemDesc.EMPTY
156                     | ElemDesc.WHITESPACESENSITIVE));
157         m_elementFlags.put(
158             "OBJECT",
159             new ElemDesc(
160                 0
161                     | ElemDesc.SPECIAL
162                     | ElemDesc.ASPECIAL
163                     | ElemDesc.HEADMISC
164                     | ElemDesc.WHITESPACESENSITIVE));
165         m_elementFlags.put("PARAM", new ElemDesc(0 | ElemDesc.EMPTY));
166         m_elementFlags.put(
167             "HR",
168             new ElemDesc(
169                 0
170                     | ElemDesc.BLOCK
171                     | ElemDesc.BLOCKFORM
172                     | ElemDesc.BLOCKFORMFIELDSET
173                     | ElemDesc.EMPTY));
174         m_elementFlags.put(
175             "P",
176             new ElemDesc(
177                 0
178                     | ElemDesc.BLOCK
179                     | ElemDesc.BLOCKFORM
180                     | ElemDesc.BLOCKFORMFIELDSET));
181         m_elementFlags.put(
182             "H1",
183             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
184         m_elementFlags.put(
185             "H2",
186             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
187         m_elementFlags.put(
188             "H3",
189             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
190         m_elementFlags.put(
191             "H4",
192             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
193         m_elementFlags.put(
194             "H5",
195             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
196         m_elementFlags.put(
197             "H6",
198             new ElemDesc(0 | ElemDesc.HEAD | ElemDesc.BLOCK));
199         m_elementFlags.put(
200             "PRE",
201             new ElemDesc(0 | ElemDesc.PREFORMATTED | ElemDesc.BLOCK));
202         m_elementFlags.put(
203             "Q",
204             new ElemDesc(0 | ElemDesc.SPECIAL | ElemDesc.ASPECIAL));
205         m_elementFlags.put(
206             "BLOCKQUOTE",
207             new ElemDesc(
208                 0
209                     | ElemDesc.BLOCK
210                     | ElemDesc.BLOCKFORM
211                     | ElemDesc.BLOCKFORMFIELDSET));
212         m_elementFlags.put("INS", new ElemDesc(0));
213         m_elementFlags.put("DEL", new ElemDesc(0));
214         m_elementFlags.put(
215             "DL",
216             new ElemDesc(
217                 0
218                     | ElemDesc.BLOCK
219                     | ElemDesc.BLOCKFORM
220                     | ElemDesc.BLOCKFORMFIELDSET));
221         m_elementFlags.put("DT", new ElemDesc(0 | ElemDesc.BLOCK));
222         m_elementFlags.put("DD", new ElemDesc(0 | ElemDesc.BLOCK));
223         m_elementFlags.put(
224             "OL",
225             new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
226         m_elementFlags.put(
227             "UL",
228             new ElemDesc(0 | ElemDesc.LIST | ElemDesc.BLOCK));
229         m_elementFlags.put("LI", new ElemDesc(0 | ElemDesc.BLOCK));
230         m_elementFlags.put("FORM", new ElemDesc(0 | ElemDesc.BLOCK));
231         m_elementFlags.put("LABEL", new ElemDesc(0 | ElemDesc.FORMCTRL));
232         m_elementFlags.put(
233             "INPUT",
234             new ElemDesc(
235                 0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL | ElemDesc.EMPTY));
236         m_elementFlags.put(
237             "SELECT",
238             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
239         m_elementFlags.put("OPTGROUP", new ElemDesc(0));
240         m_elementFlags.put("OPTION", new ElemDesc(0));
241         m_elementFlags.put(
242             "TEXTAREA",
243             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
244         m_elementFlags.put(
245             "FIELDSET",
246             new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.BLOCKFORM));
247         m_elementFlags.put("LEGEND", new ElemDesc(0));
248         m_elementFlags.put(
249             "BUTTON",
250             new ElemDesc(0 | ElemDesc.FORMCTRL | ElemDesc.INLINELABEL));
251         m_elementFlags.put(
252             "TABLE",
253             new ElemDesc(
254                 0
255                     | ElemDesc.BLOCK
256                     | ElemDesc.BLOCKFORM
257                     | ElemDesc.BLOCKFORMFIELDSET));
258         m_elementFlags.put("CAPTION", new ElemDesc(0 | ElemDesc.BLOCK));
259         m_elementFlags.put("THEAD", new ElemDesc(0 | ElemDesc.BLOCK));
260         m_elementFlags.put("TFOOT", new ElemDesc(0 | ElemDesc.BLOCK));
261         m_elementFlags.put("TBODY", new ElemDesc(0 | ElemDesc.BLOCK));
262         m_elementFlags.put("COLGROUP", new ElemDesc(0 | ElemDesc.BLOCK));
263         m_elementFlags.put(
264             "COL",
265             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
266         m_elementFlags.put("TR", new ElemDesc(0 | ElemDesc.BLOCK));
267         m_elementFlags.put("TH", new ElemDesc(0));
268         m_elementFlags.put("TD", new ElemDesc(0));
269         m_elementFlags.put(
270             "HEAD",
271             new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HEADELEM));
272         m_elementFlags.put("TITLE", new ElemDesc(0 | ElemDesc.BLOCK));
273         m_elementFlags.put(
274             "BASE",
275             new ElemDesc(0 | ElemDesc.EMPTY | ElemDesc.BLOCK));
276         m_elementFlags.put(
277             "META",
278             new ElemDesc(
279                 0 | ElemDesc.HEADMISC | ElemDesc.EMPTY | ElemDesc.BLOCK));
280         m_elementFlags.put(
281             "STYLE",
282             new ElemDesc(
283                 0 | ElemDesc.HEADMISC | ElemDesc.RAW | ElemDesc.BLOCK));
284         m_elementFlags.put(
285             "SCRIPT",
286             new ElemDesc(
287                 0
288                     | ElemDesc.SPECIAL
289                     | ElemDesc.ASPECIAL
290                     | ElemDesc.HEADMISC
291                     | ElemDesc.RAW));
292         m_elementFlags.put(
293             "NOSCRIPT",
294             new ElemDesc(
295                 0
296                     | ElemDesc.BLOCK
297                     | ElemDesc.BLOCKFORM
298                     | ElemDesc.BLOCKFORMFIELDSET));
299         m_elementFlags.put("HTML", new ElemDesc(0 | ElemDesc.BLOCK | ElemDesc.HTMLELEM));
300 
301         // From "John Ky" <hand@syd.speednet.com.au
302         // Transitional Document Type Definition ()
303         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/sgml/loosedtd.html#basefont
304         m_elementFlags.put("FONT", new ElemDesc(0 | ElemDesc.FONTSTYLE));
305 
306         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-STRIKE
307         m_elementFlags.put("S", new ElemDesc(0 | ElemDesc.FONTSTYLE));
308         m_elementFlags.put("STRIKE", new ElemDesc(0 | ElemDesc.FONTSTYLE));
309 
310         // file:///C:/Documents%20and%20Settings/sboag.BOAG600E/My%20Documents/html/present/graphics.html#edef-U
311         m_elementFlags.put("U", new ElemDesc(0 | ElemDesc.FONTSTYLE));
312 
313         // From "John Ky" <hand@syd.speednet.com.au
314         m_elementFlags.put("NOBR", new ElemDesc(0 | ElemDesc.FONTSTYLE));
315 
316         // HTML 4.0, section 16.5
317         m_elementFlags.put(
318             "IFRAME",
319             new ElemDesc(
320                 0
321                     | ElemDesc.BLOCK
322                     | ElemDesc.BLOCKFORM
323                     | ElemDesc.BLOCKFORMFIELDSET));
324 
325         // Netscape 4 extension
326         m_elementFlags.put(
327             "LAYER",
328             new ElemDesc(
329                 0
330                     | ElemDesc.BLOCK
331                     | ElemDesc.BLOCKFORM
332                     | ElemDesc.BLOCKFORMFIELDSET));
333         // Netscape 4 extension
334         m_elementFlags.put(
335             "ILAYER",
336             new ElemDesc(
337                 0
338                     | ElemDesc.BLOCK
339                     | ElemDesc.BLOCKFORM
340                     | ElemDesc.BLOCKFORMFIELDSET));
341 
342         // NOW FOR ATTRIBUTE INFORMATION . . .
343         ElemDesc elemDesc;
344 
345 
346         // ----------------------------------------------
347         elemDesc = (ElemDesc) m_elementFlags.get("a");
348         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
349         elemDesc.setAttr("NAME", ElemDesc.ATTRURL);
350 
351         // ----------------------------------------------
352         elemDesc = (ElemDesc) m_elementFlags.get("area");
353 
354         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
355         elemDesc.setAttr("NOHREF", ElemDesc.ATTREMPTY);
356 
357         // ----------------------------------------------
358         elemDesc = (ElemDesc) m_elementFlags.get("base");
359 
360         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
361 
362         // ----------------------------------------------
363         elemDesc = (ElemDesc) m_elementFlags.get("button");
364         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
365 
366         // ----------------------------------------------
367         elemDesc = (ElemDesc) m_elementFlags.get("blockquote");
368 
369         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
370 
371         // ----------------------------------------------
372         elemDesc = (ElemDesc) m_elementFlags.get("del");
373         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
374 
375         // ----------------------------------------------
376         elemDesc = (ElemDesc) m_elementFlags.get("dir");
377         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
378 
379         // ----------------------------------------------
380 
381         elemDesc = (ElemDesc) m_elementFlags.get("div");
382         elemDesc.setAttr("SRC", ElemDesc.ATTRURL); // Netscape 4 extension
383         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
384 
385         // ----------------------------------------------
386         elemDesc = (ElemDesc) m_elementFlags.get("dl");
387         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
388 
389         // ----------------------------------------------
390         elemDesc = (ElemDesc) m_elementFlags.get("form");
391         elemDesc.setAttr("ACTION", ElemDesc.ATTRURL);
392 
393         // ----------------------------------------------
394         // Attribution to: "Voytenko, Dimitry" <DVoytenko@SECTORBASE.COM>
395         elemDesc = (ElemDesc) m_elementFlags.get("frame");
396         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
397         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
398         elemDesc.setAttr("NORESIZE",ElemDesc.ATTREMPTY);
399 
400         // ----------------------------------------------
401         elemDesc = (ElemDesc) m_elementFlags.get("head");
402         elemDesc.setAttr("PROFILE", ElemDesc.ATTRURL);
403 
404         // ----------------------------------------------
405         elemDesc = (ElemDesc) m_elementFlags.get("hr");
406         elemDesc.setAttr("NOSHADE", ElemDesc.ATTREMPTY);
407 
408         // ----------------------------------------------
409         // HTML 4.0, section 16.5
410         elemDesc = (ElemDesc) m_elementFlags.get("iframe");
411         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
412         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
413 
414         // ----------------------------------------------
415         // Netscape 4 extension
416         elemDesc = (ElemDesc) m_elementFlags.get("ilayer");
417         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
418 
419         // ----------------------------------------------
420         elemDesc = (ElemDesc) m_elementFlags.get("img");
421         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
422         elemDesc.setAttr("LONGDESC", ElemDesc.ATTRURL);
423         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
424         elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
425 
426         // ----------------------------------------------
427         elemDesc = (ElemDesc) m_elementFlags.get("input");
428 
429         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
430         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
431         elemDesc.setAttr("CHECKED", ElemDesc.ATTREMPTY);
432         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
433         elemDesc.setAttr("ISMAP", ElemDesc.ATTREMPTY);
434         elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
435 
436         // ----------------------------------------------
437         elemDesc = (ElemDesc) m_elementFlags.get("ins");
438         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
439 
440         // ----------------------------------------------
441         // Netscape 4 extension
442         elemDesc = (ElemDesc) m_elementFlags.get("layer");
443         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
444 
445         // ----------------------------------------------
446         elemDesc = (ElemDesc) m_elementFlags.get("link");
447         elemDesc.setAttr("HREF", ElemDesc.ATTRURL);
448 
449         // ----------------------------------------------
450         elemDesc = (ElemDesc) m_elementFlags.get("menu");
451         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
452 
453         // ----------------------------------------------
454         elemDesc = (ElemDesc) m_elementFlags.get("object");
455 
456         elemDesc.setAttr("CLASSID", ElemDesc.ATTRURL);
457         elemDesc.setAttr("CODEBASE", ElemDesc.ATTRURL);
458         elemDesc.setAttr("DATA", ElemDesc.ATTRURL);
459         elemDesc.setAttr("ARCHIVE", ElemDesc.ATTRURL);
460         elemDesc.setAttr("USEMAP", ElemDesc.ATTRURL);
461         elemDesc.setAttr("DECLARE", ElemDesc.ATTREMPTY);
462 
463         // ----------------------------------------------
464         elemDesc = (ElemDesc) m_elementFlags.get("ol");
465         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
466 
467         // ----------------------------------------------
468         elemDesc = (ElemDesc) m_elementFlags.get("optgroup");
469         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
470 
471         // ----------------------------------------------
472         elemDesc = (ElemDesc) m_elementFlags.get("option");
473         elemDesc.setAttr("SELECTED", ElemDesc.ATTREMPTY);
474         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
475 
476         // ----------------------------------------------
477         elemDesc = (ElemDesc) m_elementFlags.get("q");
478         elemDesc.setAttr("CITE", ElemDesc.ATTRURL);
479 
480         // ----------------------------------------------
481         elemDesc = (ElemDesc) m_elementFlags.get("script");
482         elemDesc.setAttr("SRC", ElemDesc.ATTRURL);
483         elemDesc.setAttr("FOR", ElemDesc.ATTRURL);
484         elemDesc.setAttr("DEFER", ElemDesc.ATTREMPTY);
485 
486         // ----------------------------------------------
487         elemDesc = (ElemDesc) m_elementFlags.get("select");
488         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
489         elemDesc.setAttr("MULTIPLE", ElemDesc.ATTREMPTY);
490 
491         // ----------------------------------------------
492         elemDesc = (ElemDesc) m_elementFlags.get("table");
493         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY); // Internet-Explorer extension
494 
495         // ----------------------------------------------
496         elemDesc = (ElemDesc) m_elementFlags.get("td");
497         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
498 
499         // ----------------------------------------------
500         elemDesc = (ElemDesc) m_elementFlags.get("textarea");
501         elemDesc.setAttr("DISABLED", ElemDesc.ATTREMPTY);
502         elemDesc.setAttr("READONLY", ElemDesc.ATTREMPTY);
503 
504         // ----------------------------------------------
505         elemDesc = (ElemDesc) m_elementFlags.get("th");
506         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
507 
508         // ----------------------------------------------
509         // The nowrap attribute of a tr element is both
510         // a Netscape and Internet-Explorer extension
511         elemDesc = (ElemDesc) m_elementFlags.get("tr");
512         elemDesc.setAttr("NOWRAP", ElemDesc.ATTREMPTY);
513 
514         // ----------------------------------------------
515         elemDesc = (ElemDesc) m_elementFlags.get("ul");
516         elemDesc.setAttr("COMPACT", ElemDesc.ATTREMPTY);
517     }
518 
519     /**
520      * Dummy element for elements not found.
521      */
522     static private final ElemDesc m_dummy = new ElemDesc(0 | ElemDesc.BLOCK);
523 
524     /** True if URLs should be specially escaped with the %xx form. */
525     private boolean m_specialEscapeURLs = true;
526 
527     /** True if the META tag should be omitted. */
528     private boolean m_omitMetaTag = false;
529 
530     /**
531      * Tells if the formatter should use special URL escaping.
532      *
533      * @param bool True if URLs should be specially escaped with the %xx form.
534      */
setSpecialEscapeURLs(boolean bool)535     public void setSpecialEscapeURLs(boolean bool)
536     {
537         m_specialEscapeURLs = bool;
538     }
539 
540     /**
541      * Tells if the formatter should omit the META tag.
542      *
543      * @param bool True if the META tag should be omitted.
544      */
setOmitMetaTag(boolean bool)545     public void setOmitMetaTag(boolean bool)
546     {
547         m_omitMetaTag = bool;
548     }
549 
550     /**
551      * Specifies an output format for this serializer. It the
552      * serializer has already been associated with an output format,
553      * it will switch to the new format. This method should not be
554      * called while the serializer is in the process of serializing
555      * a document.
556      *
557      * This method can be called multiple times before starting
558      * the serialization of a particular result-tree. In principle
559      * all serialization parameters can be changed, with the exception
560      * of method="html" (it must be method="html" otherwise we
561      * shouldn't even have a ToHTMLStream object here!)
562      *
563      * @param format The output format or serialzation parameters
564      * to use.
565      */
setOutputFormat(Properties format)566     public void setOutputFormat(Properties format)
567     {
568         /*
569          * If "format" does not contain the property
570          * S_USE_URL_ESCAPING, then don't set this value at all,
571          * just leave as-is rather than explicitly setting it.
572          */
573         String value;
574         value = format.getProperty(OutputPropertiesFactory.S_USE_URL_ESCAPING);
575         if (value != null) {
576             m_specialEscapeURLs =
577                 OutputPropertyUtils.getBooleanProperty(
578                     OutputPropertiesFactory.S_USE_URL_ESCAPING,
579                     format);
580         }
581 
582         /*
583          * If "format" does not contain the property
584          * S_OMIT_META_TAG, then don't set this value at all,
585          * just leave as-is rather than explicitly setting it.
586          */
587         value = format.getProperty(OutputPropertiesFactory.S_OMIT_META_TAG);
588         if (value != null) {
589            m_omitMetaTag =
590                 OutputPropertyUtils.getBooleanProperty(
591                     OutputPropertiesFactory.S_OMIT_META_TAG,
592                     format);
593         }
594 
595         super.setOutputFormat(format);
596     }
597 
598     /**
599      * Tells if the formatter should use special URL escaping.
600      *
601      * @return True if URLs should be specially escaped with the %xx form.
602      */
getSpecialEscapeURLs()603     private final boolean getSpecialEscapeURLs()
604     {
605         return m_specialEscapeURLs;
606     }
607 
608     /**
609      * Tells if the formatter should omit the META tag.
610      *
611      * @return True if the META tag should be omitted.
612      */
getOmitMetaTag()613     private final boolean getOmitMetaTag()
614     {
615         return m_omitMetaTag;
616     }
617 
618     /**
619      * Get a description of the given element.
620      *
621      * @param name non-null name of element, case insensitive.
622      *
623      * @return non-null reference to ElemDesc, which may be m_dummy if no
624      *         element description matches the given name.
625      */
getElemDesc(String name)626     public static final ElemDesc getElemDesc(String name)
627     {
628         /* this method used to return m_dummy  when name was null
629          * but now it doesn't check and and requires non-null name.
630          */
631         Object obj = m_elementFlags.get(name);
632         if (null != obj)
633             return (ElemDesc)obj;
634         return m_dummy;
635     }
636 
637 
638     /**
639      * A Trie that is just a copy of the "static" one.
640      * We need this one to be able to use the faster, but not thread-safe
641      * method Trie.get2(name)
642      */
643     private Trie m_htmlInfo = new Trie(m_elementFlags);
644     /**
645      * Calls to this method could be replaced with calls to
646      * getElemDesc(name), but this one should be faster.
647      */
getElemDesc2(String name)648     private ElemDesc getElemDesc2(String name)
649     {
650         Object obj = m_htmlInfo.get2(name);
651         if (null != obj)
652             return (ElemDesc)obj;
653         return m_dummy;
654     }
655 
656     /**
657      * Default constructor.
658      */
ToHTMLStream()659     public ToHTMLStream()
660     {
661 
662         super();
663         // we are just constructing this thing, no output properties
664         // have been used, so we will set the right default for
665         // indenting anyways
666         m_doIndent = true;
667         m_charInfo = m_htmlcharInfo;
668         // initialize namespaces
669         m_prefixMap = new NamespaceMappings();
670 
671     }
672 
673     /** The name of the current element. */
674 //    private String m_currentElementName = null;
675 
676     /**
677      * Receive notification of the beginning of a document.
678      *
679      * @throws org.xml.sax.SAXException Any SAX exception, possibly
680      *            wrapping another exception.
681      *
682      * @throws org.xml.sax.SAXException
683      */
startDocumentInternal()684     protected void startDocumentInternal() throws org.xml.sax.SAXException
685     {
686         super.startDocumentInternal();
687 
688         m_needToCallStartDocument = false;
689         m_needToOutputDocTypeDecl = true;
690         m_startNewLine = false;
691         setOmitXMLDeclaration(true);
692     }
693 
694     /**
695      * This method should only get called once.
696      * If a DOCTYPE declaration needs to get written out, it will
697      * be written out. If it doesn't need to be written out, then
698      * the call to this method has no effect.
699      */
outputDocTypeDecl(String name)700     private void outputDocTypeDecl(String name) throws SAXException {
701         if (true == m_needToOutputDocTypeDecl)
702         {
703             String doctypeSystem = getDoctypeSystem();
704             String doctypePublic = getDoctypePublic();
705             if ((null != doctypeSystem) || (null != doctypePublic))
706             {
707                 final java.io.Writer writer = m_writer;
708                 try
709                 {
710                 writer.write("<!DOCTYPE ");
711                 writer.write(name);
712 
713                 if (null != doctypePublic)
714                 {
715                     writer.write(" PUBLIC \"");
716                     writer.write(doctypePublic);
717                     writer.write('"');
718                 }
719 
720                 if (null != doctypeSystem)
721                 {
722                     if (null == doctypePublic)
723                         writer.write(" SYSTEM \"");
724                     else
725                         writer.write(" \"");
726 
727                     writer.write(doctypeSystem);
728                     writer.write('"');
729                 }
730 
731                 writer.write('>');
732                 outputLineSep();
733                 }
734                 catch(IOException e)
735                 {
736                     throw new SAXException(e);
737                 }
738             }
739         }
740 
741         m_needToOutputDocTypeDecl = false;
742     }
743 
744     /**
745      * Receive notification of the end of a document.
746      *
747      * @throws org.xml.sax.SAXException Any SAX exception, possibly
748      *            wrapping another exception.
749      *
750      * @throws org.xml.sax.SAXException
751      */
endDocument()752     public final void endDocument() throws org.xml.sax.SAXException
753     {
754 
755         flushPending();
756         if (m_doIndent && !m_isprevtext)
757         {
758             try
759             {
760             outputLineSep();
761             }
762             catch(IOException e)
763             {
764                 throw new SAXException(e);
765             }
766         }
767 
768         flushWriter();
769         if (m_tracer != null)
770             super.fireEndDoc();
771     }
772 
773     /**
774      *  Receive notification of the beginning of an element.
775      *
776      *
777      *  @param namespaceURI
778      *  @param localName
779      *  @param name The element type name.
780      *  @param atts The attributes attached to the element, if any.
781      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
782      *             wrapping another exception.
783      *  @see #endElement
784      *  @see org.xml.sax.AttributeList
785      */
startElement( String namespaceURI, String localName, String name, Attributes atts)786     public void startElement(
787         String namespaceURI,
788         String localName,
789         String name,
790         Attributes atts)
791         throws org.xml.sax.SAXException
792     {
793 
794         ElemContext elemContext = m_elemContext;
795 
796         // clean up any pending things first
797         if (elemContext.m_startTagOpen)
798         {
799             closeStartTag();
800             elemContext.m_startTagOpen = false;
801         }
802         else if (m_cdataTagOpen)
803         {
804             closeCDATA();
805             m_cdataTagOpen = false;
806         }
807         else if (m_needToCallStartDocument)
808         {
809             startDocumentInternal();
810             m_needToCallStartDocument = false;
811         }
812 
813         if (m_needToOutputDocTypeDecl) {
814             String n = name;
815             if (n == null || n.length() == 0) {
816                 // If the lexical QName is not given
817                 // use the localName in the DOCTYPE
818                 n = localName;
819             }
820             outputDocTypeDecl(n);
821         }
822 
823 
824         // if this element has a namespace then treat it like XML
825         if (null != namespaceURI && namespaceURI.length() > 0)
826         {
827             super.startElement(namespaceURI, localName, name, atts);
828 
829             return;
830         }
831 
832         try
833         {
834             // getElemDesc2(name) is faster than getElemDesc(name)
835             ElemDesc elemDesc = getElemDesc2(name);
836             int elemFlags = elemDesc.getFlags();
837 
838             // deal with indentation issues first
839             if (m_doIndent)
840             {
841 
842                 boolean isBlockElement = (elemFlags & ElemDesc.BLOCK) != 0;
843                 if (m_ispreserve)
844                     m_ispreserve = false;
845                 else if (
846                     (null != elemContext.m_elementName)
847                     && (!m_inBlockElem
848                         || isBlockElement) /* && !isWhiteSpaceSensitive */
849                     )
850                 {
851                     m_startNewLine = true;
852 
853                     indent();
854 
855                 }
856                 m_inBlockElem = !isBlockElement;
857             }
858 
859             // save any attributes for later processing
860             if (atts != null)
861                 addAttributes(atts);
862 
863             m_isprevtext = false;
864             final java.io.Writer writer = m_writer;
865             writer.write('<');
866             writer.write(name);
867 
868 
869 
870             if (m_tracer != null)
871                 firePseudoAttributes();
872 
873             if ((elemFlags & ElemDesc.EMPTY) != 0)
874             {
875                 // an optimization for elements which are expected
876                 // to be empty.
877                 m_elemContext = elemContext.push();
878                 /* XSLTC sometimes calls namespaceAfterStartElement()
879                  * so we need to remember the name
880                  */
881                 m_elemContext.m_elementName = name;
882                 m_elemContext.m_elementDesc = elemDesc;
883                 return;
884             }
885             else
886             {
887                 elemContext = elemContext.push(namespaceURI,localName,name);
888                 m_elemContext = elemContext;
889                 elemContext.m_elementDesc = elemDesc;
890                 elemContext.m_isRaw = (elemFlags & ElemDesc.RAW) != 0;
891             }
892 
893 
894             if ((elemFlags & ElemDesc.HEADELEM) != 0)
895             {
896                 // This is the <HEAD> element, do some special processing
897                 closeStartTag();
898                 elemContext.m_startTagOpen = false;
899                 if (!m_omitMetaTag)
900                 {
901                     if (m_doIndent)
902                         indent();
903                     writer.write(
904                         "<META http-equiv=\"Content-Type\" content=\"text/html; charset=");
905                     String encoding = getEncoding();
906                     String encode = Encodings.getMimeEncoding(encoding);
907                     writer.write(encode);
908                     writer.write("\">");
909                 }
910             }
911         }
912         catch (IOException e)
913         {
914             throw new SAXException(e);
915         }
916     }
917 
918     /**
919      *  Receive notification of the end of an element.
920      *
921      *
922      *  @param namespaceURI
923      *  @param localName
924      *  @param name The element type name
925      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
926      *             wrapping another exception.
927      */
endElement( final String namespaceURI, final String localName, final String name)928     public final void endElement(
929         final String namespaceURI,
930         final String localName,
931         final String name)
932         throws org.xml.sax.SAXException
933     {
934         // deal with any pending issues
935         if (m_cdataTagOpen)
936             closeCDATA();
937 
938         // if the element has a namespace, treat it like XML, not HTML
939         if (null != namespaceURI && namespaceURI.length() > 0)
940         {
941             super.endElement(namespaceURI, localName, name);
942 
943             return;
944         }
945 
946         try
947         {
948 
949             ElemContext elemContext = m_elemContext;
950             final ElemDesc elemDesc = elemContext.m_elementDesc;
951             final int elemFlags = elemDesc.getFlags();
952             final boolean elemEmpty = (elemFlags & ElemDesc.EMPTY) != 0;
953 
954             // deal with any indentation issues
955             if (m_doIndent)
956             {
957                 final boolean isBlockElement = (elemFlags&ElemDesc.BLOCK) != 0;
958                 boolean shouldIndent = false;
959 
960                 if (m_ispreserve)
961                 {
962                     m_ispreserve = false;
963                 }
964                 else if (m_doIndent && (!m_inBlockElem || isBlockElement))
965                 {
966                     m_startNewLine = true;
967                     shouldIndent = true;
968                 }
969                 if (!elemContext.m_startTagOpen && shouldIndent)
970                     indent(elemContext.m_currentElemDepth - 1);
971                 m_inBlockElem = !isBlockElement;
972             }
973 
974             final java.io.Writer writer = m_writer;
975             if (!elemContext.m_startTagOpen)
976             {
977                 writer.write("</");
978                 writer.write(name);
979                 writer.write('>');
980             }
981             else
982             {
983                 // the start-tag open when this method was called,
984                 // so we need to process it now.
985 
986                 if (m_tracer != null)
987                     super.fireStartElem(name);
988 
989                 // the starting tag was still open when we received this endElement() call
990                 // so we need to process any gathered attributes NOW, before they go away.
991                 int nAttrs = m_attributes.getLength();
992                 if (nAttrs > 0)
993                 {
994                     processAttributes(m_writer, nAttrs);
995                     // clear attributes object for re-use with next element
996                     m_attributes.clear();
997                 }
998                 if (!elemEmpty)
999                 {
1000                     // As per Dave/Paul recommendation 12/06/2000
1001                     // if (shouldIndent)
1002                     // writer.write('>');
1003                     //  indent(m_currentIndent);
1004 
1005                     writer.write("></");
1006                     writer.write(name);
1007                     writer.write('>');
1008                 }
1009                 else
1010                 {
1011                     writer.write('>');
1012                 }
1013             }
1014 
1015             // clean up because the element has ended
1016             if ((elemFlags & ElemDesc.WHITESPACESENSITIVE) != 0)
1017                 m_ispreserve = true;
1018             m_isprevtext = false;
1019 
1020             // fire off the end element event
1021             if (m_tracer != null)
1022                 super.fireEndElem(name);
1023 
1024             // OPTIMIZE-EMPTY
1025             if (elemEmpty)
1026             {
1027                 // a quick exit if the HTML element had no children.
1028                 // This block of code can be removed if the corresponding block of code
1029                 // in startElement() also labeled with "OPTIMIZE-EMPTY" is also removed
1030                 m_elemContext = elemContext.m_prev;
1031                 return;
1032             }
1033 
1034             // some more clean because the element has ended.
1035             if (!elemContext.m_startTagOpen)
1036             {
1037                 if (m_doIndent && !m_preserves.isEmpty())
1038                     m_preserves.pop();
1039             }
1040             m_elemContext = elemContext.m_prev;
1041 //            m_isRawStack.pop();
1042         }
1043         catch (IOException e)
1044         {
1045             throw new SAXException(e);
1046         }
1047     }
1048 
1049     /**
1050      * Process an attribute.
1051      * @param   writer The writer to write the processed output to.
1052      * @param   name   The name of the attribute.
1053      * @param   value   The value of the attribute.
1054      * @param   elemDesc The description of the HTML element
1055      *           that has this attribute.
1056      *
1057      * @throws org.xml.sax.SAXException
1058      */
processAttribute( java.io.Writer writer, String name, String value, ElemDesc elemDesc)1059     protected void processAttribute(
1060         java.io.Writer writer,
1061         String name,
1062         String value,
1063         ElemDesc elemDesc)
1064         throws IOException
1065     {
1066         writer.write(' ');
1067 
1068         if (   ((value.length() == 0) || value.equalsIgnoreCase(name))
1069             && elemDesc != null
1070             && elemDesc.isAttrFlagSet(name, ElemDesc.ATTREMPTY))
1071         {
1072             writer.write(name);
1073         }
1074         else
1075         {
1076             // %REVIEW% %OPT%
1077             // Two calls to single-char write may NOT
1078             // be more efficient than one to string-write...
1079             writer.write(name);
1080             writer.write("=\"");
1081             if (   elemDesc != null
1082                 && elemDesc.isAttrFlagSet(name, ElemDesc.ATTRURL))
1083                 writeAttrURI(writer, value, m_specialEscapeURLs);
1084             else
1085                 writeAttrString(writer, value, this.getEncoding());
1086             writer.write('"');
1087 
1088         }
1089     }
1090 
1091     /**
1092      * Tell if a character is an ASCII digit.
1093      */
isASCIIDigit(char c)1094     private boolean isASCIIDigit(char c)
1095     {
1096         return (c >= '0' && c <= '9');
1097     }
1098 
1099     /**
1100      * Make an integer into an HH hex value.
1101      * Does no checking on the size of the input, since this
1102      * is only meant to be used locally by writeAttrURI.
1103      *
1104      * @param i must be a value less than 255.
1105      *
1106      * @return should be a two character string.
1107      */
makeHHString(int i)1108     private static String makeHHString(int i)
1109     {
1110         String s = Integer.toHexString(i).toUpperCase();
1111         if (s.length() == 1)
1112         {
1113             s = "0" + s;
1114         }
1115         return s;
1116     }
1117 
1118     /**
1119     * Dmitri Ilyin: Makes sure if the String is HH encoded sign.
1120     * @param str must be 2 characters long
1121     *
1122     * @return true or false
1123     */
isHHSign(String str)1124     private boolean isHHSign(String str)
1125     {
1126         boolean sign = true;
1127         try
1128         {
1129             char r = (char) Integer.parseInt(str, 16);
1130         }
1131         catch (NumberFormatException e)
1132         {
1133             sign = false;
1134         }
1135         return sign;
1136     }
1137 
1138     /**
1139      * Write the specified <var>string</var> after substituting non ASCII characters,
1140      * with <CODE>%HH</CODE>, where HH is the hex of the byte value.
1141      *
1142      * @param   string      String to convert to XML format.
1143      * @param doURLEscaping True if we should try to encode as
1144      *                      per http://www.ietf.org/rfc/rfc2396.txt.
1145      *
1146      * @throws org.xml.sax.SAXException if a bad surrogate pair is detected.
1147      */
writeAttrURI( final java.io.Writer writer, String string, boolean doURLEscaping)1148     public void writeAttrURI(
1149         final java.io.Writer writer, String string, boolean doURLEscaping)
1150         throws IOException
1151     {
1152         // http://www.ietf.org/rfc/rfc2396.txt says:
1153         // A URI is always in an "escaped" form, since escaping or unescaping a
1154         // completed URI might change its semantics.  Normally, the only time
1155         // escape encodings can safely be made is when the URI is being created
1156         // from its component parts; each component may have its own set of
1157         // characters that are reserved, so only the mechanism responsible for
1158         // generating or interpreting that component can determine whether or
1159         // not escaping a character will change its semantics. Likewise, a URI
1160         // must be separated into its components before the escaped characters
1161         // within those components can be safely decoded.
1162         //
1163         // ...So we do our best to do limited escaping of the URL, without
1164         // causing damage.  If the URL is already properly escaped, in theory, this
1165         // function should not change the string value.
1166 
1167         final int end = string.length();
1168         if (end > m_attrBuff.length)
1169         {
1170            m_attrBuff = new char[end*2 + 1];
1171         }
1172         string.getChars(0,end, m_attrBuff, 0);
1173         final char[] chars = m_attrBuff;
1174 
1175         int cleanStart = 0;
1176         int cleanLength = 0;
1177 
1178 
1179         char ch = 0;
1180         for (int i = 0; i < end; i++)
1181         {
1182             ch = chars[i];
1183 
1184             if ((ch < 32) || (ch > 126))
1185             {
1186                 if (cleanLength > 0)
1187                 {
1188                     writer.write(chars, cleanStart, cleanLength);
1189                     cleanLength = 0;
1190                 }
1191                 if (doURLEscaping)
1192                 {
1193                     // Encode UTF16 to UTF8.
1194                     // Reference is Unicode, A Primer, by Tony Graham.
1195                     // Page 92.
1196 
1197                     // Note that Kay doesn't escape 0x20...
1198                     //  if(ch == 0x20) // Not sure about this... -sb
1199                     //  {
1200                     //    writer.write(ch);
1201                     //  }
1202                     //  else
1203                     if (ch <= 0x7F)
1204                     {
1205                         writer.write('%');
1206                         writer.write(makeHHString(ch));
1207                     }
1208                     else if (ch <= 0x7FF)
1209                     {
1210                         // Clear low 6 bits before rotate, put high 4 bits in low byte,
1211                         // and set two high bits.
1212                         int high = (ch >> 6) | 0xC0;
1213                         int low = (ch & 0x3F) | 0x80;
1214                         // First 6 bits, + high bit
1215                         writer.write('%');
1216                         writer.write(makeHHString(high));
1217                         writer.write('%');
1218                         writer.write(makeHHString(low));
1219                     }
1220                     else if (Encodings.isHighUTF16Surrogate(ch)) // high surrogate
1221                     {
1222                         // I'm sure this can be done in 3 instructions, but I choose
1223                         // to try and do it exactly like it is done in the book, at least
1224                         // until we are sure this is totally clean.  I don't think performance
1225                         // is a big issue with this particular function, though I could be
1226                         // wrong.  Also, the stuff below clearly does more masking than
1227                         // it needs to do.
1228 
1229                         // Clear high 6 bits.
1230                         int highSurrogate = ((int) ch) & 0x03FF;
1231 
1232                         // Middle 4 bits (wwww) + 1
1233                         // "Note that the value of wwww from the high surrogate bit pattern
1234                         // is incremented to make the uuuuu bit pattern in the scalar value
1235                         // so the surrogate pair don't address the BMP."
1236                         int wwww = ((highSurrogate & 0x03C0) >> 6);
1237                         int uuuuu = wwww + 1;
1238 
1239                         // next 4 bits
1240                         int zzzz = (highSurrogate & 0x003C) >> 2;
1241 
1242                         // low 2 bits
1243                         int yyyyyy = ((highSurrogate & 0x0003) << 4) & 0x30;
1244 
1245                         // Get low surrogate character.
1246                         ch = chars[++i];
1247 
1248                         // Clear high 6 bits.
1249                         int lowSurrogate = ((int) ch) & 0x03FF;
1250 
1251                         // put the middle 4 bits into the bottom of yyyyyy (byte 3)
1252                         yyyyyy = yyyyyy | ((lowSurrogate & 0x03C0) >> 6);
1253 
1254                         // bottom 6 bits.
1255                         int xxxxxx = (lowSurrogate & 0x003F);
1256 
1257                         int byte1 = 0xF0 | (uuuuu >> 2); // top 3 bits of uuuuu
1258                         int byte2 =
1259                             0x80 | (((uuuuu & 0x03) << 4) & 0x30) | zzzz;
1260                         int byte3 = 0x80 | yyyyyy;
1261                         int byte4 = 0x80 | xxxxxx;
1262 
1263                         writer.write('%');
1264                         writer.write(makeHHString(byte1));
1265                         writer.write('%');
1266                         writer.write(makeHHString(byte2));
1267                         writer.write('%');
1268                         writer.write(makeHHString(byte3));
1269                         writer.write('%');
1270                         writer.write(makeHHString(byte4));
1271                     }
1272                     else
1273                     {
1274                         int high = (ch >> 12) | 0xE0; // top 4 bits
1275                         int middle = ((ch & 0x0FC0) >> 6) | 0x80;
1276                         // middle 6 bits
1277                         int low = (ch & 0x3F) | 0x80;
1278                         // First 6 bits, + high bit
1279                         writer.write('%');
1280                         writer.write(makeHHString(high));
1281                         writer.write('%');
1282                         writer.write(makeHHString(middle));
1283                         writer.write('%');
1284                         writer.write(makeHHString(low));
1285                     }
1286 
1287                 }
1288                 else if (escapingNotNeeded(ch))
1289                 {
1290                     writer.write(ch);
1291                 }
1292                 else
1293                 {
1294                     writer.write("&#");
1295                     writer.write(Integer.toString(ch));
1296                     writer.write(';');
1297                 }
1298                 // In this character range we have first written out any previously accumulated
1299                 // "clean" characters, then processed the current more complicated character,
1300                 // which may have incremented "i".
1301                 // We now we reset the next possible clean character.
1302                 cleanStart = i + 1;
1303             }
1304             // Since http://www.ietf.org/rfc/rfc2396.txt refers to the URI grammar as
1305             // not allowing quotes in the URI proper syntax, nor in the fragment
1306             // identifier, we believe that it's OK to double escape quotes.
1307             else if (ch == '"')
1308             {
1309                 // If the character is a '%' number number, try to avoid double-escaping.
1310                 // There is a question if this is legal behavior.
1311 
1312                 // Dmitri Ilyin: to check if '%' number number is invalid. It must be checked if %xx is a sign, that would be encoded
1313                 // The encoded signes are in Hex form. So %xx my be in form %3C that is "<" sign. I will try to change here a little.
1314 
1315                 //        if( ((i+2) < len) && isASCIIDigit(stringArray[i+1]) && isASCIIDigit(stringArray[i+2]) )
1316 
1317                 // We are no longer escaping '%'
1318 
1319                 if (cleanLength > 0)
1320                 {
1321                     writer.write(chars, cleanStart, cleanLength);
1322                     cleanLength = 0;
1323                 }
1324 
1325 
1326                 // Mike Kay encodes this as &#34;, so he may know something I don't?
1327                 if (doURLEscaping)
1328                     writer.write("%22");
1329                 else
1330                     writer.write("&quot;"); // we have to escape this, I guess.
1331 
1332                 // We have written out any clean characters, then the escaped '%' and now we
1333                 // We now we reset the next possible clean character.
1334                 cleanStart = i + 1;
1335             }
1336             else if (ch == '&')
1337             {
1338                 // HTML 4.01 reads, "Authors should use "&amp;" (ASCII decimal 38)
1339                 // instead of "&" to avoid confusion with the beginning of a character
1340                 // reference (entity reference open delimiter).
1341                 if (cleanLength > 0)
1342                 {
1343                     writer.write(chars, cleanStart, cleanLength);
1344                     cleanLength = 0;
1345                 }
1346                 writer.write("&amp;");
1347                 cleanStart = i + 1;
1348             }
1349             else
1350             {
1351                 // no processing for this character, just count how
1352                 // many characters in a row that we have that need no processing
1353                 cleanLength++;
1354             }
1355         }
1356 
1357         // are there any clean characters at the end of the array
1358         // that we haven't processed yet?
1359         if (cleanLength > 1)
1360         {
1361             // if the whole string can be written out as-is do so
1362             // otherwise write out the clean chars at the end of the
1363             // array
1364             if (cleanStart == 0)
1365                 writer.write(string);
1366             else
1367                 writer.write(chars, cleanStart, cleanLength);
1368         }
1369         else if (cleanLength == 1)
1370         {
1371             // a little optimization for 1 clean character
1372             // (we could have let the previous if(...) handle them all)
1373             writer.write(ch);
1374         }
1375     }
1376 
1377     /**
1378      * Writes the specified <var>string</var> after substituting <VAR>specials</VAR>,
1379      * and UTF-16 surrogates for character references <CODE>&amp;#xnn</CODE>.
1380      *
1381      * @param   string      String to convert to XML format.
1382      * @param   encoding    CURRENTLY NOT IMPLEMENTED.
1383      *
1384      * @throws org.xml.sax.SAXException
1385      */
writeAttrString( final java.io.Writer writer, String string, String encoding)1386     public void writeAttrString(
1387         final java.io.Writer writer, String string, String encoding)
1388         throws IOException
1389     {
1390         final int end = string.length();
1391         if (end > m_attrBuff.length)
1392         {
1393             m_attrBuff = new char[end * 2 + 1];
1394         }
1395         string.getChars(0, end, m_attrBuff, 0);
1396         final char[] chars = m_attrBuff;
1397 
1398 
1399 
1400         int cleanStart = 0;
1401         int cleanLength = 0;
1402 
1403         char ch = 0;
1404         for (int i = 0; i < end; i++)
1405         {
1406             ch = chars[i];
1407 
1408             // System.out.println("SPECIALSSIZE: "+SPECIALSSIZE);
1409             // System.out.println("ch: "+(int)ch);
1410             // System.out.println("m_maxCharacter: "+(int)m_maxCharacter);
1411             // System.out.println("m_attrCharsMap[ch]: "+(int)m_attrCharsMap[ch]);
1412             if (escapingNotNeeded(ch) && (!m_charInfo.shouldMapAttrChar(ch)))
1413             {
1414                 cleanLength++;
1415             }
1416             else if ('<' == ch || '>' == ch)
1417             {
1418                 cleanLength++; // no escaping in this case, as specified in 15.2
1419             }
1420             else if (
1421                 ('&' == ch) && ((i + 1) < end) && ('{' == chars[i + 1]))
1422             {
1423                 cleanLength++; // no escaping in this case, as specified in 15.2
1424             }
1425             else
1426             {
1427                 if (cleanLength > 0)
1428                 {
1429                     writer.write(chars,cleanStart,cleanLength);
1430                     cleanLength = 0;
1431                 }
1432                 int pos = accumDefaultEntity(writer, ch, i, chars, end, false, true);
1433 
1434                 if (i != pos)
1435                 {
1436                     i = pos - 1;
1437                 }
1438                 else
1439                 {
1440                     if (Encodings.isHighUTF16Surrogate(ch))
1441                     {
1442 
1443                             writeUTF16Surrogate(ch, chars, i, end);
1444                             i++; // two input characters processed
1445                                  // this increments by one and the for()
1446                                  // loop itself increments by another one.
1447                     }
1448 
1449                     // The next is kind of a hack to keep from escaping in the case
1450                     // of Shift_JIS and the like.
1451 
1452                     /*
1453                     else if ((ch < m_maxCharacter) && (m_maxCharacter == 0xFFFF)
1454                     && (ch != 160))
1455                     {
1456                     writer.write(ch);  // no escaping in this case
1457                     }
1458                     else
1459                     */
1460                     String outputStringForChar = m_charInfo.getOutputStringForChar(ch);
1461                     if (null != outputStringForChar)
1462                     {
1463                         writer.write(outputStringForChar);
1464                     }
1465                     else if (escapingNotNeeded(ch))
1466                     {
1467                         writer.write(ch); // no escaping in this case
1468                     }
1469                     else
1470                     {
1471                         writer.write("&#");
1472                         writer.write(Integer.toString(ch));
1473                         writer.write(';');
1474                     }
1475                 }
1476                 cleanStart = i + 1;
1477             }
1478         } // end of for()
1479 
1480         // are there any clean characters at the end of the array
1481         // that we haven't processed yet?
1482         if (cleanLength > 1)
1483         {
1484             // if the whole string can be written out as-is do so
1485             // otherwise write out the clean chars at the end of the
1486             // array
1487             if (cleanStart == 0)
1488                 writer.write(string);
1489             else
1490                 writer.write(chars, cleanStart, cleanLength);
1491         }
1492         else if (cleanLength == 1)
1493         {
1494             // a little optimization for 1 clean character
1495             // (we could have let the previous if(...) handle them all)
1496             writer.write(ch);
1497         }
1498     }
1499 
1500 
1501 
1502     /**
1503      * Receive notification of character data.
1504      *
1505      * <p>The Parser will call this method to report each chunk of
1506      * character data.  SAX parsers may return all contiguous character
1507      * data in a single chunk, or they may split it into several
1508      * chunks; however, all of the characters in any single event
1509      * must come from the same external entity, so that the Locator
1510      * provides useful information.</p>
1511      *
1512      * <p>The application must not attempt to read from the array
1513      * outside of the specified range.</p>
1514      *
1515      * <p>Note that some parsers will report whitespace using the
1516      * ignorableWhitespace() method rather than this one (validating
1517      * parsers must do so).</p>
1518      *
1519      * @param chars The characters from the XML document.
1520      * @param start The start position in the array.
1521      * @param length The number of characters to read from the array.
1522      * @throws org.xml.sax.SAXException Any SAX exception, possibly
1523      *            wrapping another exception.
1524      * @see #ignorableWhitespace
1525      * @see org.xml.sax.Locator
1526      *
1527      * @throws org.xml.sax.SAXException
1528      */
characters(char chars[], int start, int length)1529     public final void characters(char chars[], int start, int length)
1530         throws org.xml.sax.SAXException
1531     {
1532 
1533         if (m_elemContext.m_isRaw)
1534         {
1535             try
1536             {
1537                 // Clean up some pending issues.
1538                 if (m_elemContext.m_startTagOpen)
1539                 {
1540                     closeStartTag();
1541                     m_elemContext.m_startTagOpen = false;
1542                 }
1543 
1544                 m_ispreserve = true;
1545 
1546                 writeNormalizedChars(chars, start, length, false, m_lineSepUse);
1547 
1548                 // time to generate characters event
1549                 if (m_tracer != null)
1550                     super.fireCharEvent(chars, start, length);
1551 
1552                 return;
1553             }
1554             catch (IOException ioe)
1555             {
1556                 throw new org.xml.sax.SAXException(
1557                     Utils.messages.createMessage(MsgKey.ER_OIERROR,null),ioe);
1558             }
1559         }
1560         else
1561         {
1562             super.characters(chars, start, length);
1563         }
1564     }
1565 
1566     /**
1567      *  Receive notification of cdata.
1568      *
1569      *  <p>The Parser will call this method to report each chunk of
1570      *  character data.  SAX parsers may return all contiguous character
1571      *  data in a single chunk, or they may split it into several
1572      *  chunks; however, all of the characters in any single event
1573      *  must come from the same external entity, so that the Locator
1574      *  provides useful information.</p>
1575      *
1576      *  <p>The application must not attempt to read from the array
1577      *  outside of the specified range.</p>
1578      *
1579      *  <p>Note that some parsers will report whitespace using the
1580      *  ignorableWhitespace() method rather than this one (validating
1581      *  parsers must do so).</p>
1582      *
1583      *  @param ch The characters from the XML document.
1584      *  @param start The start position in the array.
1585      *  @param length The number of characters to read from the array.
1586      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1587      *             wrapping another exception.
1588      *  @see #ignorableWhitespace
1589      *  @see org.xml.sax.Locator
1590      *
1591      * @throws org.xml.sax.SAXException
1592      */
cdata(char ch[], int start, int length)1593     public final void cdata(char ch[], int start, int length)
1594         throws org.xml.sax.SAXException
1595     {
1596 
1597         if ((null != m_elemContext.m_elementName)
1598             && (m_elemContext.m_elementName.equalsIgnoreCase("SCRIPT")
1599                 || m_elemContext.m_elementName.equalsIgnoreCase("STYLE")))
1600         {
1601             try
1602             {
1603                 if (m_elemContext.m_startTagOpen)
1604                 {
1605                     closeStartTag();
1606                     m_elemContext.m_startTagOpen = false;
1607                 }
1608 
1609                 m_ispreserve = true;
1610 
1611                 if (shouldIndent())
1612                     indent();
1613 
1614                 // writer.write(ch, start, length);
1615                 writeNormalizedChars(ch, start, length, true, m_lineSepUse);
1616             }
1617             catch (IOException ioe)
1618             {
1619                 throw new org.xml.sax.SAXException(
1620                     Utils.messages.createMessage(
1621                         MsgKey.ER_OIERROR,
1622                         null),
1623                     ioe);
1624                 //"IO error", ioe);
1625             }
1626         }
1627         else
1628         {
1629             super.cdata(ch, start, length);
1630         }
1631     }
1632 
1633     /**
1634      *  Receive notification of a processing instruction.
1635      *
1636      *  @param target The processing instruction target.
1637      *  @param data The processing instruction data, or null if
1638      *         none was supplied.
1639      *  @throws org.xml.sax.SAXException Any SAX exception, possibly
1640      *             wrapping another exception.
1641      *
1642      * @throws org.xml.sax.SAXException
1643      */
processingInstruction(String target, String data)1644     public void processingInstruction(String target, String data)
1645         throws org.xml.sax.SAXException
1646     {
1647 
1648         // Process any pending starDocument and startElement first.
1649         flushPending();
1650 
1651         // Use a fairly nasty hack to tell if the next node is supposed to be
1652         // unescaped text.
1653         if (target.equals(Result.PI_DISABLE_OUTPUT_ESCAPING))
1654         {
1655             startNonEscaping();
1656         }
1657         else if (target.equals(Result.PI_ENABLE_OUTPUT_ESCAPING))
1658         {
1659             endNonEscaping();
1660         }
1661         else
1662         {
1663             try
1664             {
1665                 // clean up any pending things first
1666                 if (m_elemContext.m_startTagOpen)
1667                 {
1668                     closeStartTag();
1669                     m_elemContext.m_startTagOpen = false;
1670                 }
1671                 else if (m_cdataTagOpen)
1672                 {
1673                     closeCDATA();
1674                 }
1675                 else if (m_needToCallStartDocument)
1676                 {
1677                     startDocumentInternal();
1678                 }
1679 
1680 
1681             /*
1682              * Perhaps processing instructions can be written out in HTML before
1683              * the DOCTYPE, in which case this could be emitted with the
1684              * startElement call, that knows the name of the document element
1685              * doing it right.
1686              */
1687             if (true == m_needToOutputDocTypeDecl)
1688                 outputDocTypeDecl("html"); // best guess for the upcoming element
1689 
1690 
1691             if (shouldIndent())
1692                 indent();
1693 
1694             final java.io.Writer writer = m_writer;
1695             //writer.write("<?" + target);
1696             writer.write("<?");
1697             writer.write(target);
1698 
1699             if (data.length() > 0 && !Character.isSpaceChar(data.charAt(0)))
1700                 writer.write(' ');
1701 
1702             //writer.write(data + ">"); // different from XML
1703             writer.write(data); // different from XML
1704             writer.write('>'); // different from XML
1705 
1706             // Always output a newline char if not inside of an
1707             // element. The whitespace is not significant in that
1708             // case.
1709             if (m_elemContext.m_currentElemDepth <= 0)
1710                 outputLineSep();
1711 
1712             m_startNewLine = true;
1713             }
1714             catch(IOException e)
1715             {
1716                 throw new SAXException(e);
1717             }
1718         }
1719 
1720         // now generate the PI event
1721         if (m_tracer != null)
1722             super.fireEscapingEvent(target, data);
1723      }
1724 
1725     /**
1726      * Receive notivication of a entityReference.
1727      *
1728      * @param name non-null reference to entity name string.
1729      *
1730      * @throws org.xml.sax.SAXException
1731      */
entityReference(String name)1732     public final void entityReference(String name)
1733         throws org.xml.sax.SAXException
1734     {
1735         try
1736         {
1737 
1738         final java.io.Writer writer = m_writer;
1739         writer.write('&');
1740         writer.write(name);
1741         writer.write(';');
1742 
1743         } catch(IOException e)
1744         {
1745             throw new SAXException(e);
1746         }
1747     }
1748     /**
1749      * @see ExtendedContentHandler#endElement(String)
1750      */
endElement(String elemName)1751     public final void endElement(String elemName) throws SAXException
1752     {
1753         endElement(null, null, elemName);
1754     }
1755 
1756     /**
1757      * Process the attributes, which means to write out the currently
1758      * collected attributes to the writer. The attributes are not
1759      * cleared by this method
1760      *
1761      * @param writer the writer to write processed attributes to.
1762      * @param nAttrs the number of attributes in m_attributes
1763      * to be processed
1764      *
1765      * @throws org.xml.sax.SAXException
1766      */
processAttributes(java.io.Writer writer, int nAttrs)1767     public void processAttributes(java.io.Writer writer, int nAttrs)
1768         throws IOException,SAXException
1769     {
1770             /*
1771              * process the collected attributes
1772              */
1773             for (int i = 0; i < nAttrs; i++)
1774             {
1775                 processAttribute(
1776                     writer,
1777                     m_attributes.getQName(i),
1778                     m_attributes.getValue(i),
1779                     m_elemContext.m_elementDesc);
1780             }
1781     }
1782 
1783     /**
1784      * For the enclosing elements starting tag write out out any attributes
1785      * followed by ">". At this point we also mark if this element is
1786      * a cdata-section-element.
1787      *
1788      *@throws org.xml.sax.SAXException
1789      */
closeStartTag()1790     protected void closeStartTag() throws SAXException
1791     {
1792             try
1793             {
1794 
1795             // finish processing attributes, time to fire off the start element event
1796             if (m_tracer != null)
1797                 super.fireStartElem(m_elemContext.m_elementName);
1798 
1799             int nAttrs = m_attributes.getLength();
1800             if (nAttrs>0)
1801             {
1802                 processAttributes(m_writer, nAttrs);
1803                 // clear attributes object for re-use with next element
1804                 m_attributes.clear();
1805             }
1806 
1807             m_writer.write('>');
1808 
1809             /* At this point we have the prefix mappings now, so
1810              * lets determine if the current element is specified in the cdata-
1811              * section-elements list.
1812              */
1813             if (m_CdataElems != null) // if there are any cdata sections
1814                 m_elemContext.m_isCdataSection = isCdataSection();
1815             if (m_doIndent)
1816             {
1817                 m_isprevtext = false;
1818                 m_preserves.push(m_ispreserve);
1819             }
1820 
1821             }
1822             catch(IOException e)
1823             {
1824                 throw new SAXException(e);
1825             }
1826     }
1827 
1828 
1829 
1830         /**
1831          * This method is used when a prefix/uri namespace mapping
1832          * is indicated after the element was started with a
1833          * startElement() and before and endElement().
1834          * startPrefixMapping(prefix,uri) would be used before the
1835          * startElement() call.
1836          * @param uri the URI of the namespace
1837          * @param prefix the prefix associated with the given URI.
1838          *
1839          * @see ExtendedContentHandler#namespaceAfterStartElement(String, String)
1840          */
namespaceAfterStartElement(String prefix, String uri)1841         public void namespaceAfterStartElement(String prefix, String uri)
1842             throws SAXException
1843         {
1844             // hack for XSLTC with finding URI for default namespace
1845             if (m_elemContext.m_elementURI == null)
1846             {
1847                 String prefix1 = getPrefixPart(m_elemContext.m_elementName);
1848                 if (prefix1 == null && EMPTYSTRING.equals(prefix))
1849                 {
1850                     // the elements URI is not known yet, and it
1851                     // doesn't have a prefix, and we are currently
1852                     // setting the uri for prefix "", so we have
1853                     // the uri for the element... lets remember it
1854                     m_elemContext.m_elementURI = uri;
1855                 }
1856             }
1857             startPrefixMapping(prefix,uri,false);
1858         }
1859 
startDTD(String name, String publicId, String systemId)1860     public void startDTD(String name, String publicId, String systemId)
1861         throws SAXException
1862     {
1863         m_inDTD = true;
1864         super.startDTD(name, publicId, systemId);
1865     }
1866 
1867     /**
1868      * Report the end of DTD declarations.
1869      * @throws org.xml.sax.SAXException The application may raise an exception.
1870      * @see #startDTD
1871      */
endDTD()1872     public void endDTD() throws org.xml.sax.SAXException
1873     {
1874         m_inDTD = false;
1875         /* for ToHTMLStream the DOCTYPE is entirely output in the
1876          * startDocumentInternal() method, so don't do anything here
1877          */
1878     }
1879     /**
1880      * This method does nothing.
1881      */
attributeDecl( String eName, String aName, String type, String valueDefault, String value)1882     public void attributeDecl(
1883         String eName,
1884         String aName,
1885         String type,
1886         String valueDefault,
1887         String value)
1888         throws SAXException
1889     {
1890         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1891     }
1892 
1893     /**
1894      * This method does nothing.
1895      */
elementDecl(String name, String model)1896     public void elementDecl(String name, String model) throws SAXException
1897     {
1898         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1899     }
1900     /**
1901      * This method does nothing.
1902      */
internalEntityDecl(String name, String value)1903     public void internalEntityDecl(String name, String value)
1904         throws SAXException
1905     {
1906         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1907     }
1908     /**
1909      * This method does nothing.
1910      */
externalEntityDecl( String name, String publicId, String systemId)1911     public void externalEntityDecl(
1912         String name,
1913         String publicId,
1914         String systemId)
1915         throws SAXException
1916     {
1917         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1918     }
1919 
1920     /**
1921      * This method is used to add an attribute to the currently open element.
1922      * The caller has guaranted that this attribute is unique, which means that it
1923      * not been seen before and will not be seen again.
1924      *
1925      * @param name the qualified name of the attribute
1926      * @param value the value of the attribute which can contain only
1927      * ASCII printable characters characters in the range 32 to 127 inclusive.
1928      * @param flags the bit values of this integer give optimization information.
1929      */
addUniqueAttribute(String name, String value, int flags)1930     public void addUniqueAttribute(String name, String value, int flags)
1931         throws SAXException
1932     {
1933         try
1934         {
1935             final java.io.Writer writer = m_writer;
1936             if ((flags & NO_BAD_CHARS) > 0 && m_htmlcharInfo.onlyQuotAmpLtGt)
1937             {
1938                 // "flags" has indicated that the characters
1939                 // '>'  '<'   '&'  and '"' are not in the value and
1940                 // m_htmlcharInfo has recorded that there are no other
1941                 // entities in the range 0 to 127 so we write out the
1942                 // value directly
1943                 writer.write(' ');
1944                 writer.write(name);
1945                 writer.write("=\"");
1946                 writer.write(value);
1947                 writer.write('"');
1948             }
1949             else if (
1950                 (flags & HTML_ATTREMPTY) > 0
1951                     && (value.length() == 0 || value.equalsIgnoreCase(name)))
1952             {
1953                 writer.write(' ');
1954                 writer.write(name);
1955             }
1956             else
1957             {
1958                 writer.write(' ');
1959                 writer.write(name);
1960                 writer.write("=\"");
1961                 if ((flags & HTML_ATTRURL) > 0)
1962                 {
1963                     writeAttrURI(writer, value, m_specialEscapeURLs);
1964                 }
1965                 else
1966                 {
1967                     writeAttrString(writer, value, this.getEncoding());
1968                 }
1969                 writer.write('"');
1970             }
1971         } catch (IOException e) {
1972             throw new SAXException(e);
1973         }
1974     }
1975 
comment(char ch[], int start, int length)1976     public void comment(char ch[], int start, int length)
1977             throws SAXException
1978     {
1979         // The internal DTD subset is not serialized by the ToHTMLStream serializer
1980         if (m_inDTD)
1981             return;
1982 
1983         // Clean up some pending issues, just in case
1984         // this call is coming right after a startElement()
1985         // or we are in the middle of writing out CDATA
1986         // or if a startDocument() call was not received
1987         if (m_elemContext.m_startTagOpen)
1988         {
1989             closeStartTag();
1990             m_elemContext.m_startTagOpen = false;
1991         }
1992         else if (m_cdataTagOpen)
1993         {
1994             closeCDATA();
1995         }
1996         else if (m_needToCallStartDocument)
1997         {
1998             startDocumentInternal();
1999         }
2000 
2001         /*
2002          * Perhaps comments can be written out in HTML before the DOCTYPE.
2003          * In this case we might delete this call to writeOutDOCTYPE, and
2004          * it would be handled within the startElement() call.
2005          */
2006         if (m_needToOutputDocTypeDecl)
2007             outputDocTypeDecl("html"); // best guess for the upcoming element
2008 
2009         super.comment(ch, start, length);
2010     }
2011 
reset()2012     public boolean reset()
2013     {
2014         boolean ret = super.reset();
2015         if (!ret)
2016             return false;
2017         resetToHTMLStream();
2018         return true;
2019     }
2020 
resetToHTMLStream()2021     private void resetToHTMLStream()
2022     {
2023         // m_htmlcharInfo remains unchanged
2024         // m_htmlInfo = null;  // Don't reset
2025         m_inBlockElem = false;
2026         m_inDTD = false;
2027         m_omitMetaTag = false;
2028         m_specialEscapeURLs = true;
2029     }
2030 
2031     static class Trie
2032     {
2033         /**
2034          * A digital search trie for 7-bit ASCII text
2035          * The API is a subset of java.util.Hashtable
2036          * The key must be a 7-bit ASCII string
2037          * The value may be any Java Object
2038          * One can get an object stored in a trie from its key,
2039          * but the search is either case sensitive or case
2040          * insensitive to the characters in the key, and this
2041          * choice of sensitivity or insensitivity is made when
2042          * the Trie is created, before any objects are put in it.
2043          *
2044          * This class is a copy of the one in org.apache.xml.utils.
2045          * It exists to cut the serializers dependancy on that package.
2046          *
2047          * @xsl.usage internal
2048          */
2049 
2050         /** Size of the m_nextChar array.  */
2051         public static final int ALPHA_SIZE = 128;
2052 
2053         /** The root node of the tree.    */
2054         final Node m_Root;
2055 
2056         /** helper buffer to convert Strings to char arrays */
2057         private char[] m_charBuffer = new char[0];
2058 
2059         /** true if the search for an object is lower case only with the key */
2060         private final boolean m_lowerCaseOnly;
2061 
2062         /**
2063          * Construct the trie that has a case insensitive search.
2064          */
Trie()2065         public Trie()
2066         {
2067             m_Root = new Node();
2068             m_lowerCaseOnly = false;
2069         }
2070 
2071         /**
2072          * Construct the trie given the desired case sensitivity with the key.
2073          * @param lowerCaseOnly true if the search keys are to be loser case only,
2074          * not case insensitive.
2075          */
Trie(boolean lowerCaseOnly)2076         public Trie(boolean lowerCaseOnly)
2077         {
2078             m_Root = new Node();
2079             m_lowerCaseOnly = lowerCaseOnly;
2080         }
2081 
2082         /**
2083          * Put an object into the trie for lookup.
2084          *
2085          * @param key must be a 7-bit ASCII string
2086          * @param value any java object.
2087          *
2088          * @return The old object that matched key, or null.
2089          */
put(String key, Object value)2090         public Object put(String key, Object value)
2091         {
2092 
2093             final int len = key.length();
2094             if (len > m_charBuffer.length)
2095             {
2096                 // make the biggest buffer ever needed in get(String)
2097                 m_charBuffer = new char[len];
2098             }
2099 
2100             Node node = m_Root;
2101 
2102             for (int i = 0; i < len; i++)
2103             {
2104                 Node nextNode =
2105                     node.m_nextChar[Character.toLowerCase(key.charAt(i))];
2106 
2107                 if (nextNode != null)
2108                 {
2109                     node = nextNode;
2110                 }
2111                 else
2112                 {
2113                     for (; i < len; i++)
2114                     {
2115                         Node newNode = new Node();
2116                         if (m_lowerCaseOnly)
2117                         {
2118                             // put this value into the tree only with a lower case key
2119                             node.m_nextChar[Character.toLowerCase(
2120                                 key.charAt(i))] =
2121                                 newNode;
2122                         }
2123                         else
2124                         {
2125                             // put this value into the tree with a case insensitive key
2126                             node.m_nextChar[Character.toUpperCase(
2127                                 key.charAt(i))] =
2128                                 newNode;
2129                             node.m_nextChar[Character.toLowerCase(
2130                                 key.charAt(i))] =
2131                                 newNode;
2132                         }
2133                         node = newNode;
2134                     }
2135                     break;
2136                 }
2137             }
2138 
2139             Object ret = node.m_Value;
2140 
2141             node.m_Value = value;
2142 
2143             return ret;
2144         }
2145 
2146         /**
2147          * Get an object that matches the key.
2148          *
2149          * @param key must be a 7-bit ASCII string
2150          *
2151          * @return The object that matches the key, or null.
2152          */
get(final String key)2153         public Object get(final String key)
2154         {
2155 
2156             final int len = key.length();
2157 
2158             /* If the name is too long, we won't find it, this also keeps us
2159              * from overflowing m_charBuffer
2160              */
2161             if (m_charBuffer.length < len)
2162                 return null;
2163 
2164             Node node = m_Root;
2165             switch (len) // optimize the look up based on the number of chars
2166             {
2167                 // case 0 looks silly, but the generated bytecode runs
2168                 // faster for lookup of elements of length 2 with this in
2169                 // and a fair bit faster.  Don't know why.
2170                 case 0 :
2171                     {
2172                         return null;
2173                     }
2174 
2175                 case 1 :
2176                     {
2177                         final char ch = key.charAt(0);
2178                         if (ch < ALPHA_SIZE)
2179                         {
2180                             node = node.m_nextChar[ch];
2181                             if (node != null)
2182                                 return node.m_Value;
2183                         }
2184                         return null;
2185                     }
2186                     //                comment out case 2 because the default is faster
2187                     //                case 2 :
2188                     //                    {
2189                     //                        final char ch0 = key.charAt(0);
2190                     //                        final char ch1 = key.charAt(1);
2191                     //                        if (ch0 < ALPHA_SIZE && ch1 < ALPHA_SIZE)
2192                     //                        {
2193                     //                            node = node.m_nextChar[ch0];
2194                     //                            if (node != null)
2195                     //                            {
2196                     //
2197                     //                                if (ch1 < ALPHA_SIZE)
2198                     //                                {
2199                     //                                    node = node.m_nextChar[ch1];
2200                     //                                    if (node != null)
2201                     //                                        return node.m_Value;
2202                     //                                }
2203                     //                            }
2204                     //                        }
2205                     //                        return null;
2206                     //                   }
2207                 default :
2208                     {
2209                         for (int i = 0; i < len; i++)
2210                         {
2211                             // A thread-safe way to loop over the characters
2212                             final char ch = key.charAt(i);
2213                             if (ALPHA_SIZE <= ch)
2214                             {
2215                                 // the key is not 7-bit ASCII so we won't find it here
2216                                 return null;
2217                             }
2218 
2219                             node = node.m_nextChar[ch];
2220                             if (node == null)
2221                                 return null;
2222                         }
2223 
2224                         return node.m_Value;
2225                     }
2226             }
2227         }
2228 
2229         /**
2230          * The node representation for the trie.
2231          * @xsl.usage internal
2232          */
2233         private class Node
2234         {
2235 
2236             /**
2237              * Constructor, creates a Node[ALPHA_SIZE].
2238              */
Node()2239             Node()
2240             {
2241                 m_nextChar = new Node[ALPHA_SIZE];
2242                 m_Value = null;
2243             }
2244 
2245             /** The next nodes.   */
2246             final Node m_nextChar[];
2247 
2248             /** The value.   */
2249             Object m_Value;
2250         }
2251         /**
2252          * Construct the trie from another Trie.
2253          * Both the existing Trie and this new one share the same table for
2254          * lookup, and it is assumed that the table is fully populated and
2255          * not changing anymore.
2256          *
2257          * @param existingTrie the Trie that this one is a copy of.
2258          */
Trie(Trie existingTrie)2259         public Trie(Trie existingTrie)
2260         {
2261             // copy some fields from the existing Trie into this one.
2262             m_Root = existingTrie.m_Root;
2263             m_lowerCaseOnly = existingTrie.m_lowerCaseOnly;
2264 
2265             // get a buffer just big enough to hold the longest key in the table.
2266             int max = existingTrie.getLongestKeyLength();
2267             m_charBuffer = new char[max];
2268         }
2269 
2270         /**
2271          * Get an object that matches the key.
2272          * This method is faster than get(), but is not thread-safe.
2273          *
2274          * @param key must be a 7-bit ASCII string
2275          *
2276          * @return The object that matches the key, or null.
2277          */
get2(final String key)2278         public Object get2(final String key)
2279         {
2280 
2281             final int len = key.length();
2282 
2283             /* If the name is too long, we won't find it, this also keeps us
2284              * from overflowing m_charBuffer
2285              */
2286             if (m_charBuffer.length < len)
2287                 return null;
2288 
2289             Node node = m_Root;
2290             switch (len) // optimize the look up based on the number of chars
2291             {
2292                 // case 0 looks silly, but the generated bytecode runs
2293                 // faster for lookup of elements of length 2 with this in
2294                 // and a fair bit faster.  Don't know why.
2295                 case 0 :
2296                     {
2297                         return null;
2298                     }
2299 
2300                 case 1 :
2301                     {
2302                         final char ch = key.charAt(0);
2303                         if (ch < ALPHA_SIZE)
2304                         {
2305                             node = node.m_nextChar[ch];
2306                             if (node != null)
2307                                 return node.m_Value;
2308                         }
2309                         return null;
2310                     }
2311                 default :
2312                     {
2313                         /* Copy string into array. This is not thread-safe because
2314                          * it modifies the contents of m_charBuffer. If multiple
2315                          * threads were to use this Trie they all would be
2316                          * using this same array (not good). So this
2317                          * method is not thread-safe, but it is faster because
2318                          * converting to a char[] and looping over elements of
2319                          * the array is faster than a String's charAt(i).
2320                          */
2321                         key.getChars(0, len, m_charBuffer, 0);
2322 
2323                         for (int i = 0; i < len; i++)
2324                         {
2325                             final char ch = m_charBuffer[i];
2326                             if (ALPHA_SIZE <= ch)
2327                             {
2328                                 // the key is not 7-bit ASCII so we won't find it here
2329                                 return null;
2330                             }
2331 
2332                             node = node.m_nextChar[ch];
2333                             if (node == null)
2334                                 return null;
2335                         }
2336 
2337                         return node.m_Value;
2338                     }
2339             }
2340         }
2341 
2342         /**
2343          * Get the length of the longest key used in the table.
2344          */
getLongestKeyLength()2345         public int getLongestKeyLength()
2346         {
2347             return m_charBuffer.length;
2348         }
2349     }
2350 }
2351