1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the  "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *     http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 /*
19  * $Id$
20  */
21 
22 package org.apache.xml.utils;
23 
24 /**
25  * This class defines the basic XML character properties. The data
26  * in this class can be used to verify that a character is a valid
27  * XML character or if the character is a space, name start, or name
28  * character.
29  * <p>
30  * A series of convenience methods are supplied to ease the burden
31  * of the developer. Because inlining the checks can improve per
32  * character performance, the tables of character properties are
33  * public. Using the character as an index into the <code>CHARS</code>
34  * array and applying the appropriate mask flag (e.g.
35  * <code>MASK_VALID</code>), yields the same results as calling the
36  * convenience methods. There is one exception: check the comments
37  * for the <code>isValid</code> method for details.
38  *
39  * @author Glenn Marcy, IBM
40  * @author Andy Clark, IBM
41  * @author Eric Ye, IBM
42  * @author Arnaud  Le Hors, IBM
43  * @author Rahul Srivastava, Sun Microsystems Inc.
44  *
45  * @version $Id: XMLChar.java,v 1.7 2002/01/29 01:15:18 lehors Exp $
46  */
47 public class XMLChar {
48 
49     //
50     // Constants
51     //
52 
53     /** Character flags. */
54     private static final byte[] CHARS = new byte[1 << 16];
55 
56     /** Valid character mask. */
57     public static final int MASK_VALID = 0x01;
58 
59     /** Space character mask. */
60     public static final int MASK_SPACE = 0x02;
61 
62     /** Name start character mask. */
63     public static final int MASK_NAME_START = 0x04;
64 
65     /** Name character mask. */
66     public static final int MASK_NAME = 0x08;
67 
68     /** Pubid character mask. */
69     public static final int MASK_PUBID = 0x10;
70 
71     /**
72      * Content character mask. Special characters are those that can
73      * be considered the start of markup, such as '&lt;' and '&amp;'.
74      * The various newline characters are considered special as well.
75      * All other valid XML characters can be considered content.
76      * <p>
77      * This is an optimization for the inner loop of character scanning.
78      */
79     public static final int MASK_CONTENT = 0x20;
80 
81     /** NCName start character mask. */
82     public static final int MASK_NCNAME_START = 0x40;
83 
84     /** NCName character mask. */
85     public static final int MASK_NCNAME = 0x80;
86 
87     //
88     // Static initialization
89     //
90 
91     static {
92 
93         //
94         // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
95         //              [#xE000-#xFFFD] | [#x10000-#x10FFFF]
96         //
97 
98         int charRange[] = {
99             0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD,
100         };
101 
102         //
103         // [3] S ::= (#x20 | #x9 | #xD | #xA)+
104         //
105 
106         int spaceChar[] = {
107             0x0020, 0x0009, 0x000D, 0x000A,
108         };
109 
110         //
111         // [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
112         //                  CombiningChar | Extender
113         //
114 
115         int nameChar[] = {
116             0x002D, 0x002E, // '-' and '.'
117         };
118 
119         //
120         // [5] Name ::= (Letter | '_' | ':') (NameChar)*
121         //
122 
123         int nameStartChar[] = {
124             0x003A, 0x005F, // ':' and '_'
125         };
126 
127         //
128         // [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
129         //
130 
131         int pubidChar[] = {
132             0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D,
133             0x005F
134         };
135 
136         int pubidRange[] = {
137             0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A
138         };
139 
140         //
141         // [84] Letter ::= BaseChar | Ideographic
142         //
143 
144         int letterRange[] = {
145             // BaseChar
146             0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
147             0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
148             0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
149             0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
150             0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
151             0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
152             0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
153             0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
154             0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
155             0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
156             0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
157             0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
158             0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
159             0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
160             0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
161             0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
162             0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
163             0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
164             0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
165             0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
166             0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
167             0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
168             0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
169             0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
170             0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
171             0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
172             0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
173             0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
174             0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
175             0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
176             0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
177             0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
178             0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
179             0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
180             0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
181             0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
182             0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
183             0xAC00, 0xD7A3,
184             // Ideographic
185             0x3021, 0x3029, 0x4E00, 0x9FA5,
186         };
187         int letterChar[] = {
188             // BaseChar
189             0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
190             0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
191             0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
192             0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
193             0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
194             0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
195             0x1F5D, 0x1FBE, 0x2126, 0x212E,
196             // Ideographic
197             0x3007,
198         };
199 
200         //
201         // [87] CombiningChar ::= ...
202         //
203 
204         int combiningCharRange[] = {
205             0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
206             0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
207             0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
208             0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
209             0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
210             0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
211             0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
212             0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
213             0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
214             0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
215             0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
216             0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
217             0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
218             0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
219             0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
220             0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
221             0x20D0, 0x20DC, 0x302A, 0x302F,
222         };
223 
224         int combiningCharChar[] = {
225             0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
226             0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
227             0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
228             0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
229         };
230 
231         //
232         // [88] Digit ::= ...
233         //
234 
235         int digitRange[] = {
236             0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
237             0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
238             0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
239             0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
240         };
241 
242         //
243         // [89] Extender ::= ...
244         //
245 
246         int extenderRange[] = {
247             0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
248         };
249 
250         int extenderChar[] = {
251             0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
252         };
253 
254         //
255         // SpecialChar ::= '<', '&', '\n', '\r', ']'
256         //
257 
258         int specialChar[] = {
259             '<', '&', '\n', '\r', ']',
260         };
261 
262         //
263         // Initialize
264         //
265 
266         // set valid characters
267         for (int i = 0; i < charRange.length; i += 2) {
268             for (int j = charRange[i]; j <= charRange[i + 1]; j++) {
269                 CHARS[j] |= MASK_VALID | MASK_CONTENT;
270             }
271         }
272 
273         // remove special characters
274         for (int i = 0; i < specialChar.length; i++) {
275             CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT);
276         }
277 
278         // set space characters
279         for (int i = 0; i < spaceChar.length; i++) {
280             CHARS[spaceChar[i]] |= MASK_SPACE;
281         }
282 
283         // set name start characters
284         for (int i = 0; i < nameStartChar.length; i++) {
285             CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME |
286                                        MASK_NCNAME_START | MASK_NCNAME;
287         }
288         for (int i = 0; i < letterRange.length; i += 2) {
289             for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) {
290                 CHARS[j] |= MASK_NAME_START | MASK_NAME |
291                             MASK_NCNAME_START | MASK_NCNAME;
292             }
293         }
294         for (int i = 0; i < letterChar.length; i++) {
295             CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME |
296                                     MASK_NCNAME_START | MASK_NCNAME;
297         }
298 
299         // set name characters
300         for (int i = 0; i < nameChar.length; i++) {
301             CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME;
302         }
303         for (int i = 0; i < digitRange.length; i += 2) {
304             for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) {
305                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
306             }
307         }
308         for (int i = 0; i < combiningCharRange.length; i += 2) {
309             for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) {
310                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
311             }
312         }
313         for (int i = 0; i < combiningCharChar.length; i++) {
314             CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME;
315         }
316         for (int i = 0; i < extenderRange.length; i += 2) {
317             for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) {
318                 CHARS[j] |= MASK_NAME | MASK_NCNAME;
319             }
320         }
321         for (int i = 0; i < extenderChar.length; i++) {
322             CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME;
323         }
324 
325         // remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars
326         CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME);
327 
328         // set Pubid characters
329         for (int i = 0; i < pubidChar.length; i++) {
330             CHARS[pubidChar[i]] |= MASK_PUBID;
331         }
332         for (int i = 0; i < pubidRange.length; i += 2) {
333             for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) {
334                 CHARS[j] |= MASK_PUBID;
335             }
336         }
337 
338     } // <clinit>()
339 
340     //
341     // Public static methods
342     //
343 
344     /**
345      * Returns true if the specified character is a supplemental character.
346      *
347      * @param c The character to check.
348      */
isSupplemental(int c)349     public static boolean isSupplemental(int c) {
350         return (c >= 0x10000 && c <= 0x10FFFF);
351     }
352 
353     /**
354      * Returns true the supplemental character corresponding to the given
355      * surrogates.
356      *
357      * @param h The high surrogate.
358      * @param l The low surrogate.
359      */
supplemental(char h, char l)360     public static int supplemental(char h, char l) {
361         return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000;
362     }
363 
364     /**
365      * Returns the high surrogate of a supplemental character
366      *
367      * @param c The supplemental character to "split".
368      */
highSurrogate(int c)369     public static char highSurrogate(int c) {
370         return (char) (((c - 0x00010000) >> 10) + 0xD800);
371     }
372 
373     /**
374      * Returns the low surrogate of a supplemental character
375      *
376      * @param c The supplemental character to "split".
377      */
lowSurrogate(int c)378     public static char lowSurrogate(int c) {
379         return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00);
380     }
381 
382     /**
383      * Returns whether the given character is a high surrogate
384      *
385      * @param c The character to check.
386      */
isHighSurrogate(int c)387     public static boolean isHighSurrogate(int c) {
388         return (0xD800 <= c && c <= 0xDBFF);
389     }
390 
391     /**
392      * Returns whether the given character is a low surrogate
393      *
394      * @param c The character to check.
395      */
isLowSurrogate(int c)396     public static boolean isLowSurrogate(int c) {
397         return (0xDC00 <= c && c <= 0xDFFF);
398     }
399 
400 
401     /**
402      * Returns true if the specified character is valid. This method
403      * also checks the surrogate character range from 0x10000 to 0x10FFFF.
404      * <p>
405      * If the program chooses to apply the mask directly to the
406      * <code>CHARS</code> array, then they are responsible for checking
407      * the surrogate character range.
408      *
409      * @param c The character to check.
410      */
isValid(int c)411     public static boolean isValid(int c) {
412         return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) ||
413                (0x10000 <= c && c <= 0x10FFFF);
414     } // isValid(int):boolean
415 
416     /**
417      * Returns true if the specified character is invalid.
418      *
419      * @param c The character to check.
420      */
isInvalid(int c)421     public static boolean isInvalid(int c) {
422         return !isValid(c);
423     } // isInvalid(int):boolean
424 
425     /**
426      * Returns true if the specified character can be considered content.
427      *
428      * @param c The character to check.
429      */
isContent(int c)430     public static boolean isContent(int c) {
431         return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) ||
432                (0x10000 <= c && c <= 0x10FFFF);
433     } // isContent(int):boolean
434 
435     /**
436      * Returns true if the specified character can be considered markup.
437      * Markup characters include '&lt;', '&amp;', and '%'.
438      *
439      * @param c The character to check.
440      */
isMarkup(int c)441     public static boolean isMarkup(int c) {
442         return c == '<' || c == '&' || c == '%';
443     } // isMarkup(int):boolean
444 
445     /**
446      * Returns true if the specified character is a space character
447      * as defined by production [3] in the XML 1.0 specification.
448      *
449      * @param c The character to check.
450      */
isSpace(int c)451     public static boolean isSpace(int c) {
452         return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0;
453     } // isSpace(int):boolean
454 
455     /**
456      * Returns true if the specified character is a valid name start
457      * character as defined by production [5] in the XML 1.0
458      * specification.
459      *
460      * @param c The character to check.
461      */
isNameStart(int c)462     public static boolean isNameStart(int c) {
463         return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0;
464     } // isNameStart(int):boolean
465 
466     /**
467      * Returns true if the specified character is a valid name
468      * character as defined by production [4] in the XML 1.0
469      * specification.
470      *
471      * @param c The character to check.
472      */
isName(int c)473     public static boolean isName(int c) {
474         return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0;
475     } // isName(int):boolean
476 
477     /**
478      * Returns true if the specified character is a valid NCName start
479      * character as defined by production [4] in Namespaces in XML
480      * recommendation.
481      *
482      * @param c The character to check.
483      */
isNCNameStart(int c)484     public static boolean isNCNameStart(int c) {
485         return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0;
486     } // isNCNameStart(int):boolean
487 
488     /**
489      * Returns true if the specified character is a valid NCName
490      * character as defined by production [5] in Namespaces in XML
491      * recommendation.
492      *
493      * @param c The character to check.
494      */
isNCName(int c)495     public static boolean isNCName(int c) {
496         return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0;
497     } // isNCName(int):boolean
498 
499     /**
500      * Returns true if the specified character is a valid Pubid
501      * character as defined by production [13] in the XML 1.0
502      * specification.
503      *
504      * @param c The character to check.
505      */
isPubid(int c)506     public static boolean isPubid(int c) {
507         return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0;
508     } // isPubid(int):boolean
509 
510     /*
511      * [5] Name ::= (Letter | '_' | ':') (NameChar)*
512      */
513     /**
514      * Check to see if a string is a valid Name according to [5]
515      * in the XML 1.0 Recommendation
516      *
517      * @param name string to check
518      * @return true if name is a valid Name
519      */
isValidName(String name)520     public static boolean isValidName(String name) {
521         if (name.length() == 0)
522             return false;
523         char ch = name.charAt(0);
524         if( isNameStart(ch) == false)
525            return false;
526         for (int i = 1; i < name.length(); i++ ) {
527            ch = name.charAt(i);
528            if( isName( ch ) == false ){
529               return false;
530            }
531         }
532         return true;
533     } // isValidName(String):boolean
534 
535 
536     /*
537      * from the namespace rec
538      * [4] NCName ::= (Letter | '_') (NCNameChar)*
539      */
540     /**
541      * Check to see if a string is a valid NCName according to [4]
542      * from the XML Namespaces 1.0 Recommendation
543      *
544      * @param ncName string to check
545      * @return true if name is a valid NCName
546      */
isValidNCName(String ncName)547     public static boolean isValidNCName(String ncName) {
548         if (ncName.length() == 0)
549             return false;
550         char ch = ncName.charAt(0);
551         if( isNCNameStart(ch) == false)
552            return false;
553         for (int i = 1; i < ncName.length(); i++ ) {
554            ch = ncName.charAt(i);
555            if( isNCName( ch ) == false ){
556               return false;
557            }
558         }
559         return true;
560     } // isValidNCName(String):boolean
561 
562     /*
563      * [7] Nmtoken ::= (NameChar)+
564      */
565     /**
566      * Check to see if a string is a valid Nmtoken according to [7]
567      * in the XML 1.0 Recommendation
568      *
569      * @param nmtoken string to check
570      * @return true if nmtoken is a valid Nmtoken
571      */
isValidNmtoken(String nmtoken)572     public static boolean isValidNmtoken(String nmtoken) {
573         if (nmtoken.length() == 0)
574             return false;
575         for (int i = 0; i < nmtoken.length(); i++ ) {
576            char ch = nmtoken.charAt(i);
577            if(  ! isName( ch ) ){
578               return false;
579            }
580         }
581         return true;
582     } // isValidName(String):boolean
583 
584 
585 
586 
587 
588     // encodings
589 
590     /**
591      * Returns true if the encoding name is a valid IANA encoding.
592      * This method does not verify that there is a decoder available
593      * for this encoding, only that the characters are valid for an
594      * IANA encoding name.
595      *
596      * @param ianaEncoding The IANA encoding name.
597      */
isValidIANAEncoding(String ianaEncoding)598     public static boolean isValidIANAEncoding(String ianaEncoding) {
599         if (ianaEncoding != null) {
600             int length = ianaEncoding.length();
601             if (length > 0) {
602                 char c = ianaEncoding.charAt(0);
603                 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
604                     for (int i = 1; i < length; i++) {
605                         c = ianaEncoding.charAt(i);
606                         if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
607                             (c < '0' || c > '9') && c != '.' && c != '_' &&
608                             c != '-') {
609                             return false;
610                         }
611                     }
612                     return true;
613                 }
614             }
615         }
616         return false;
617     } // isValidIANAEncoding(String):boolean
618 
619     /**
620      * Returns true if the encoding name is a valid Java encoding.
621      * This method does not verify that there is a decoder available
622      * for this encoding, only that the characters are valid for an
623      * Java encoding name.
624      *
625      * @param javaEncoding The Java encoding name.
626      */
isValidJavaEncoding(String javaEncoding)627     public static boolean isValidJavaEncoding(String javaEncoding) {
628         if (javaEncoding != null) {
629             int length = javaEncoding.length();
630             if (length > 0) {
631                 for (int i = 1; i < length; i++) {
632                     char c = javaEncoding.charAt(i);
633                     if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
634                         (c < '0' || c > '9') && c != '.' && c != '_' &&
635                         c != '-') {
636                         return false;
637                     }
638                 }
639                 return true;
640             }
641         }
642         return false;
643     } // isValidIANAEncoding(String):boolean
644 
645    /**
646      * Simple check to determine if qname is legal. If it returns false
647      * then <param>str</param> is illegal; if it returns true then
648      * <param>str</param> is legal.
649      */
isValidQName(String str)650     public static boolean isValidQName(String str) {
651 
652        final int colon = str.indexOf(':');
653 
654        if (colon == 0 || colon == str.length() - 1) {
655            return false;
656        }
657 
658        if (colon > 0) {
659            final String prefix = str.substring(0,colon);
660            final String localPart = str.substring(colon+1);
661            return isValidNCName(prefix) && isValidNCName(localPart);
662        }
663        else {
664            return isValidNCName(str);
665        }
666     }
667 
668 } // class XMLChar
669