1 /*
2  * Copyright (c) 1995, 2012, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package java.io;
27 
28 import java.util.Arrays;
29 
30 /**
31  * The {@code StreamTokenizer} class takes an input stream and
32  * parses it into "tokens", allowing the tokens to be
33  * read one at a time. The parsing process is controlled by a table
34  * and a number of flags that can be set to various states. The
35  * stream tokenizer can recognize identifiers, numbers, quoted
36  * strings, and various comment styles.
37  * <p>
38  * Each byte read from the input stream is regarded as a character
39  * in the range {@code '\u005Cu0000'} through {@code '\u005Cu00FF'}.
40  * The character value is used to look up five possible attributes of
41  * the character: <i>white space</i>, <i>alphabetic</i>,
42  * <i>numeric</i>, <i>string quote</i>, and <i>comment character</i>.
43  * Each character can have zero or more of these attributes.
44  * <p>
45  * In addition, an instance has four flags. These flags indicate:
46  * <ul>
47  * <li>Whether line terminators are to be returned as tokens or treated
48  *     as white space that merely separates tokens.
49  * <li>Whether C-style comments are to be recognized and skipped.
50  * <li>Whether C++-style comments are to be recognized and skipped.
51  * <li>Whether the characters of identifiers are converted to lowercase.
52  * </ul>
53  * <p>
54  * A typical application first constructs an instance of this class,
55  * sets up the syntax tables, and then repeatedly loops calling the
56  * {@code nextToken} method in each iteration of the loop until
57  * it returns the value {@code TT_EOF}.
58  *
59  * @author  James Gosling
60  * @see     java.io.StreamTokenizer#nextToken()
61  * @see     java.io.StreamTokenizer#TT_EOF
62  * @since   1.0
63  */
64 
65 public class StreamTokenizer {
66 
67     /* Only one of these will be non-null */
68     private Reader reader = null;
69     private InputStream input = null;
70 
71     private char buf[] = new char[20];
72 
73     /**
74      * The next character to be considered by the nextToken method.  May also
75      * be NEED_CHAR to indicate that a new character should be read, or SKIP_LF
76      * to indicate that a new character should be read and, if it is a '\n'
77      * character, it should be discarded and a second new character should be
78      * read.
79      */
80     private int peekc = NEED_CHAR;
81 
82     private static final int NEED_CHAR = Integer.MAX_VALUE;
83     private static final int SKIP_LF = Integer.MAX_VALUE - 1;
84 
85     private boolean pushedBack;
86     private boolean forceLower;
87     /** The line number of the last token read */
88     private int LINENO = 1;
89 
90     private boolean eolIsSignificantP = false;
91     private boolean slashSlashCommentsP = false;
92     private boolean slashStarCommentsP = false;
93 
94     private byte ctype[] = new byte[256];
95     private static final byte CT_WHITESPACE = 1;
96     private static final byte CT_DIGIT = 2;
97     private static final byte CT_ALPHA = 4;
98     private static final byte CT_QUOTE = 8;
99     private static final byte CT_COMMENT = 16;
100 
101     /**
102      * After a call to the {@code nextToken} method, this field
103      * contains the type of the token just read. For a single character
104      * token, its value is the single character, converted to an integer.
105      * For a quoted string token, its value is the quote character.
106      * Otherwise, its value is one of the following:
107      * <ul>
108      * <li>{@code TT_WORD} indicates that the token is a word.
109      * <li>{@code TT_NUMBER} indicates that the token is a number.
110      * <li>{@code TT_EOL} indicates that the end of line has been read.
111      *     The field can only have this value if the
112      *     {@code eolIsSignificant} method has been called with the
113      *     argument {@code true}.
114      * <li>{@code TT_EOF} indicates that the end of the input stream
115      *     has been reached.
116      * </ul>
117      * <p>
118      * The initial value of this field is -4.
119      *
120      * @see     java.io.StreamTokenizer#eolIsSignificant(boolean)
121      * @see     java.io.StreamTokenizer#nextToken()
122      * @see     java.io.StreamTokenizer#quoteChar(int)
123      * @see     java.io.StreamTokenizer#TT_EOF
124      * @see     java.io.StreamTokenizer#TT_EOL
125      * @see     java.io.StreamTokenizer#TT_NUMBER
126      * @see     java.io.StreamTokenizer#TT_WORD
127      */
128     public int ttype = TT_NOTHING;
129 
130     /**
131      * A constant indicating that the end of the stream has been read.
132      */
133     public static final int TT_EOF = -1;
134 
135     /**
136      * A constant indicating that the end of the line has been read.
137      */
138     public static final int TT_EOL = '\n';
139 
140     /**
141      * A constant indicating that a number token has been read.
142      */
143     public static final int TT_NUMBER = -2;
144 
145     /**
146      * A constant indicating that a word token has been read.
147      */
148     public static final int TT_WORD = -3;
149 
150     /* A constant indicating that no token has been read, used for
151      * initializing ttype.  FIXME This could be made public and
152      * made available as the part of the API in a future release.
153      */
154     private static final int TT_NOTHING = -4;
155 
156     /**
157      * If the current token is a word token, this field contains a
158      * string giving the characters of the word token. When the current
159      * token is a quoted string token, this field contains the body of
160      * the string.
161      * <p>
162      * The current token is a word when the value of the
163      * {@code ttype} field is {@code TT_WORD}. The current token is
164      * a quoted string token when the value of the {@code ttype} field is
165      * a quote character.
166      * <p>
167      * The initial value of this field is null.
168      *
169      * @see     java.io.StreamTokenizer#quoteChar(int)
170      * @see     java.io.StreamTokenizer#TT_WORD
171      * @see     java.io.StreamTokenizer#ttype
172      */
173     public String sval;
174 
175     /**
176      * If the current token is a number, this field contains the value
177      * of that number. The current token is a number when the value of
178      * the {@code ttype} field is {@code TT_NUMBER}.
179      * <p>
180      * The initial value of this field is 0.0.
181      *
182      * @see     java.io.StreamTokenizer#TT_NUMBER
183      * @see     java.io.StreamTokenizer#ttype
184      */
185     public double nval;
186 
187     /** Private constructor that initializes everything except the streams. */
StreamTokenizer()188     private StreamTokenizer() {
189         wordChars('a', 'z');
190         wordChars('A', 'Z');
191         wordChars(128 + 32, 255);
192         whitespaceChars(0, ' ');
193         commentChar('/');
194         quoteChar('"');
195         quoteChar('\'');
196         parseNumbers();
197     }
198 
199     /**
200      * Creates a stream tokenizer that parses the specified input
201      * stream. The stream tokenizer is initialized to the following
202      * default state:
203      * <ul>
204      * <li>All byte values {@code 'A'} through {@code 'Z'},
205      *     {@code 'a'} through {@code 'z'}, and
206      *     {@code '\u005Cu00A0'} through {@code '\u005Cu00FF'} are
207      *     considered to be alphabetic.
208      * <li>All byte values {@code '\u005Cu0000'} through
209      *     {@code '\u005Cu0020'} are considered to be white space.
210      * <li>{@code '/'} is a comment character.
211      * <li>Single quote {@code '\u005C''} and double quote {@code '"'}
212      *     are string quote characters.
213      * <li>Numbers are parsed.
214      * <li>Ends of lines are treated as white space, not as separate tokens.
215      * <li>C-style and C++-style comments are not recognized.
216      * </ul>
217      *
218      * @deprecated As of JDK version 1.1, the preferred way to tokenize an
219      * input stream is to convert it into a character stream, for example:
220      * <blockquote><pre>
221      *   Reader r = new BufferedReader(new InputStreamReader(is));
222      *   StreamTokenizer st = new StreamTokenizer(r);
223      * </pre></blockquote>
224      *
225      * @param      is        an input stream.
226      * @see        java.io.BufferedReader
227      * @see        java.io.InputStreamReader
228      * @see        java.io.StreamTokenizer#StreamTokenizer(java.io.Reader)
229      */
230     @Deprecated
StreamTokenizer(InputStream is)231     public StreamTokenizer(InputStream is) {
232         this();
233         if (is == null) {
234             throw new NullPointerException();
235         }
236         input = is;
237     }
238 
239     /**
240      * Create a tokenizer that parses the given character stream.
241      *
242      * @param r  a Reader object providing the input stream.
243      * @since   1.1
244      */
StreamTokenizer(Reader r)245     public StreamTokenizer(Reader r) {
246         this();
247         if (r == null) {
248             throw new NullPointerException();
249         }
250         reader = r;
251     }
252 
253     /**
254      * Resets this tokenizer's syntax table so that all characters are
255      * "ordinary." See the {@code ordinaryChar} method
256      * for more information on a character being ordinary.
257      *
258      * @see     java.io.StreamTokenizer#ordinaryChar(int)
259      */
resetSyntax()260     public void resetSyntax() {
261         for (int i = ctype.length; --i >= 0;)
262             ctype[i] = 0;
263     }
264 
265     /**
266      * Specifies that all characters <i>c</i> in the range
267      * <code>low&nbsp;&lt;=&nbsp;<i>c</i>&nbsp;&lt;=&nbsp;high</code>
268      * are word constituents. A word token consists of a word constituent
269      * followed by zero or more word constituents or number constituents.
270      *
271      * @param   low   the low end of the range.
272      * @param   hi    the high end of the range.
273      */
wordChars(int low, int hi)274     public void wordChars(int low, int hi) {
275         if (low < 0)
276             low = 0;
277         if (hi >= ctype.length)
278             hi = ctype.length - 1;
279         while (low <= hi)
280             ctype[low++] |= CT_ALPHA;
281     }
282 
283     /**
284      * Specifies that all characters <i>c</i> in the range
285      * <code>low&nbsp;&lt;=&nbsp;<i>c</i>&nbsp;&lt;=&nbsp;high</code>
286      * are white space characters. White space characters serve only to
287      * separate tokens in the input stream.
288      *
289      * <p>Any other attribute settings for the characters in the specified
290      * range are cleared.
291      *
292      * @param   low   the low end of the range.
293      * @param   hi    the high end of the range.
294      */
whitespaceChars(int low, int hi)295     public void whitespaceChars(int low, int hi) {
296         if (low < 0)
297             low = 0;
298         if (hi >= ctype.length)
299             hi = ctype.length - 1;
300         while (low <= hi)
301             ctype[low++] = CT_WHITESPACE;
302     }
303 
304     /**
305      * Specifies that all characters <i>c</i> in the range
306      * <code>low&nbsp;&lt;=&nbsp;<i>c</i>&nbsp;&lt;=&nbsp;high</code>
307      * are "ordinary" in this tokenizer. See the
308      * {@code ordinaryChar} method for more information on a
309      * character being ordinary.
310      *
311      * @param   low   the low end of the range.
312      * @param   hi    the high end of the range.
313      * @see     java.io.StreamTokenizer#ordinaryChar(int)
314      */
ordinaryChars(int low, int hi)315     public void ordinaryChars(int low, int hi) {
316         if (low < 0)
317             low = 0;
318         if (hi >= ctype.length)
319             hi = ctype.length - 1;
320         while (low <= hi)
321             ctype[low++] = 0;
322     }
323 
324     /**
325      * Specifies that the character argument is "ordinary"
326      * in this tokenizer. It removes any special significance the
327      * character has as a comment character, word component, string
328      * delimiter, white space, or number character. When such a character
329      * is encountered by the parser, the parser treats it as a
330      * single-character token and sets {@code ttype} field to the
331      * character value.
332      *
333      * <p>Making a line terminator character "ordinary" may interfere
334      * with the ability of a {@code StreamTokenizer} to count
335      * lines. The {@code lineno} method may no longer reflect
336      * the presence of such terminator characters in its line count.
337      *
338      * @param   ch   the character.
339      * @see     java.io.StreamTokenizer#ttype
340      */
ordinaryChar(int ch)341     public void ordinaryChar(int ch) {
342         if (ch >= 0 && ch < ctype.length)
343             ctype[ch] = 0;
344     }
345 
346     /**
347      * Specified that the character argument starts a single-line
348      * comment. All characters from the comment character to the end of
349      * the line are ignored by this stream tokenizer.
350      *
351      * <p>Any other attribute settings for the specified character are cleared.
352      *
353      * @param   ch   the character.
354      */
commentChar(int ch)355     public void commentChar(int ch) {
356         if (ch >= 0 && ch < ctype.length)
357             ctype[ch] = CT_COMMENT;
358     }
359 
360     /**
361      * Specifies that matching pairs of this character delimit string
362      * constants in this tokenizer.
363      * <p>
364      * When the {@code nextToken} method encounters a string
365      * constant, the {@code ttype} field is set to the string
366      * delimiter and the {@code sval} field is set to the body of
367      * the string.
368      * <p>
369      * If a string quote character is encountered, then a string is
370      * recognized, consisting of all characters after (but not including)
371      * the string quote character, up to (but not including) the next
372      * occurrence of that same string quote character, or a line
373      * terminator, or end of file. The usual escape sequences such as
374      * {@code "\u005Cn"} and {@code "\u005Ct"} are recognized and
375      * converted to single characters as the string is parsed.
376      *
377      * <p>Any other attribute settings for the specified character are cleared.
378      *
379      * @param   ch   the character.
380      * @see     java.io.StreamTokenizer#nextToken()
381      * @see     java.io.StreamTokenizer#sval
382      * @see     java.io.StreamTokenizer#ttype
383      */
quoteChar(int ch)384     public void quoteChar(int ch) {
385         if (ch >= 0 && ch < ctype.length)
386             ctype[ch] = CT_QUOTE;
387     }
388 
389     /**
390      * Specifies that numbers should be parsed by this tokenizer. The
391      * syntax table of this tokenizer is modified so that each of the twelve
392      * characters:
393      * <blockquote><pre>
394      *      0 1 2 3 4 5 6 7 8 9 . -
395      * </pre></blockquote>
396      * <p>
397      * has the "numeric" attribute.
398      * <p>
399      * When the parser encounters a word token that has the format of a
400      * double precision floating-point number, it treats the token as a
401      * number rather than a word, by setting the {@code ttype}
402      * field to the value {@code TT_NUMBER} and putting the numeric
403      * value of the token into the {@code nval} field.
404      *
405      * @see     java.io.StreamTokenizer#nval
406      * @see     java.io.StreamTokenizer#TT_NUMBER
407      * @see     java.io.StreamTokenizer#ttype
408      */
parseNumbers()409     public void parseNumbers() {
410         for (int i = '0'; i <= '9'; i++)
411             ctype[i] |= CT_DIGIT;
412         ctype['.'] |= CT_DIGIT;
413         ctype['-'] |= CT_DIGIT;
414     }
415 
416     /**
417      * Determines whether or not ends of line are treated as tokens.
418      * If the flag argument is true, this tokenizer treats end of lines
419      * as tokens; the {@code nextToken} method returns
420      * {@code TT_EOL} and also sets the {@code ttype} field to
421      * this value when an end of line is read.
422      * <p>
423      * A line is a sequence of characters ending with either a
424      * carriage-return character ({@code '\u005Cr'}) or a newline
425      * character ({@code '\u005Cn'}). In addition, a carriage-return
426      * character followed immediately by a newline character is treated
427      * as a single end-of-line token.
428      * <p>
429      * If the {@code flag} is false, end-of-line characters are
430      * treated as white space and serve only to separate tokens.
431      *
432      * @param   flag   {@code true} indicates that end-of-line characters
433      *                 are separate tokens; {@code false} indicates that
434      *                 end-of-line characters are white space.
435      * @see     java.io.StreamTokenizer#nextToken()
436      * @see     java.io.StreamTokenizer#ttype
437      * @see     java.io.StreamTokenizer#TT_EOL
438      */
eolIsSignificant(boolean flag)439     public void eolIsSignificant(boolean flag) {
440         eolIsSignificantP = flag;
441     }
442 
443     /**
444      * Determines whether or not the tokenizer recognizes C-style comments.
445      * If the flag argument is {@code true}, this stream tokenizer
446      * recognizes C-style comments. All text between successive
447      * occurrences of {@code /*} and <code>*&#47;</code> are discarded.
448      * <p>
449      * If the flag argument is {@code false}, then C-style comments
450      * are not treated specially.
451      *
452      * @param   flag   {@code true} indicates to recognize and ignore
453      *                 C-style comments.
454      */
slashStarComments(boolean flag)455     public void slashStarComments(boolean flag) {
456         slashStarCommentsP = flag;
457     }
458 
459     /**
460      * Determines whether or not the tokenizer recognizes C++-style comments.
461      * If the flag argument is {@code true}, this stream tokenizer
462      * recognizes C++-style comments. Any occurrence of two consecutive
463      * slash characters ({@code '/'}) is treated as the beginning of
464      * a comment that extends to the end of the line.
465      * <p>
466      * If the flag argument is {@code false}, then C++-style
467      * comments are not treated specially.
468      *
469      * @param   flag   {@code true} indicates to recognize and ignore
470      *                 C++-style comments.
471      */
slashSlashComments(boolean flag)472     public void slashSlashComments(boolean flag) {
473         slashSlashCommentsP = flag;
474     }
475 
476     /**
477      * Determines whether or not word token are automatically lowercased.
478      * If the flag argument is {@code true}, then the value in the
479      * {@code sval} field is lowercased whenever a word token is
480      * returned (the {@code ttype} field has the
481      * value {@code TT_WORD} by the {@code nextToken} method
482      * of this tokenizer.
483      * <p>
484      * If the flag argument is {@code false}, then the
485      * {@code sval} field is not modified.
486      *
487      * @param   fl   {@code true} indicates that all word tokens should
488      *               be lowercased.
489      * @see     java.io.StreamTokenizer#nextToken()
490      * @see     java.io.StreamTokenizer#ttype
491      * @see     java.io.StreamTokenizer#TT_WORD
492      */
lowerCaseMode(boolean fl)493     public void lowerCaseMode(boolean fl) {
494         forceLower = fl;
495     }
496 
497     /** Read the next character */
read()498     private int read() throws IOException {
499         if (reader != null)
500             return reader.read();
501         else if (input != null)
502             return input.read();
503         else
504             throw new IllegalStateException();
505     }
506 
507     /**
508      * Parses the next token from the input stream of this tokenizer.
509      * The type of the next token is returned in the {@code ttype}
510      * field. Additional information about the token may be in the
511      * {@code nval} field or the {@code sval} field of this
512      * tokenizer.
513      * <p>
514      * Typical clients of this
515      * class first set up the syntax tables and then sit in a loop
516      * calling nextToken to parse successive tokens until TT_EOF
517      * is returned.
518      *
519      * @return     the value of the {@code ttype} field.
520      * @exception  IOException  if an I/O error occurs.
521      * @see        java.io.StreamTokenizer#nval
522      * @see        java.io.StreamTokenizer#sval
523      * @see        java.io.StreamTokenizer#ttype
524      */
nextToken()525     public int nextToken() throws IOException {
526         if (pushedBack) {
527             pushedBack = false;
528             return ttype;
529         }
530         byte ct[] = ctype;
531         sval = null;
532 
533         int c = peekc;
534         if (c < 0)
535             c = NEED_CHAR;
536         if (c == SKIP_LF) {
537             c = read();
538             if (c < 0)
539                 return ttype = TT_EOF;
540             if (c == '\n')
541                 c = NEED_CHAR;
542         }
543         if (c == NEED_CHAR) {
544             c = read();
545             if (c < 0)
546                 return ttype = TT_EOF;
547         }
548         ttype = c;              /* Just to be safe */
549 
550         /* Set peekc so that the next invocation of nextToken will read
551          * another character unless peekc is reset in this invocation
552          */
553         peekc = NEED_CHAR;
554 
555         int ctype = c < 256 ? ct[c] : CT_ALPHA;
556         while ((ctype & CT_WHITESPACE) != 0) {
557             if (c == '\r') {
558                 LINENO++;
559                 if (eolIsSignificantP) {
560                     peekc = SKIP_LF;
561                     return ttype = TT_EOL;
562                 }
563                 c = read();
564                 if (c == '\n')
565                     c = read();
566             } else {
567                 if (c == '\n') {
568                     LINENO++;
569                     if (eolIsSignificantP) {
570                         return ttype = TT_EOL;
571                     }
572                 }
573                 c = read();
574             }
575             if (c < 0)
576                 return ttype = TT_EOF;
577             ctype = c < 256 ? ct[c] : CT_ALPHA;
578         }
579 
580         if ((ctype & CT_DIGIT) != 0) {
581             boolean neg = false;
582             if (c == '-') {
583                 c = read();
584                 if (c != '.' && (c < '0' || c > '9')) {
585                     peekc = c;
586                     return ttype = '-';
587                 }
588                 neg = true;
589             }
590             double v = 0;
591             int decexp = 0;
592             int seendot = 0;
593             while (true) {
594                 if (c == '.' && seendot == 0)
595                     seendot = 1;
596                 else if ('0' <= c && c <= '9') {
597                     v = v * 10 + (c - '0');
598                     decexp += seendot;
599                 } else
600                     break;
601                 c = read();
602             }
603             peekc = c;
604             if (decexp != 0) {
605                 double denom = 10;
606                 decexp--;
607                 while (decexp > 0) {
608                     denom *= 10;
609                     decexp--;
610                 }
611                 /* Do one division of a likely-to-be-more-accurate number */
612                 v = v / denom;
613             }
614             nval = neg ? -v : v;
615             return ttype = TT_NUMBER;
616         }
617 
618         if ((ctype & CT_ALPHA) != 0) {
619             int i = 0;
620             do {
621                 if (i >= buf.length) {
622                     buf = Arrays.copyOf(buf, buf.length * 2);
623                 }
624                 buf[i++] = (char) c;
625                 c = read();
626                 ctype = c < 0 ? CT_WHITESPACE : c < 256 ? ct[c] : CT_ALPHA;
627             } while ((ctype & (CT_ALPHA | CT_DIGIT)) != 0);
628             peekc = c;
629             sval = String.copyValueOf(buf, 0, i);
630             if (forceLower)
631                 sval = sval.toLowerCase();
632             return ttype = TT_WORD;
633         }
634 
635         if ((ctype & CT_QUOTE) != 0) {
636             ttype = c;
637             int i = 0;
638             /* Invariants (because \Octal needs a lookahead):
639              *   (i)  c contains char value
640              *   (ii) d contains the lookahead
641              */
642             int d = read();
643             while (d >= 0 && d != ttype && d != '\n' && d != '\r') {
644                 if (d == '\\') {
645                     c = read();
646                     int first = c;   /* To allow \377, but not \477 */
647                     if (c >= '0' && c <= '7') {
648                         c = c - '0';
649                         int c2 = read();
650                         if ('0' <= c2 && c2 <= '7') {
651                             c = (c << 3) + (c2 - '0');
652                             c2 = read();
653                             if ('0' <= c2 && c2 <= '7' && first <= '3') {
654                                 c = (c << 3) + (c2 - '0');
655                                 d = read();
656                             } else
657                                 d = c2;
658                         } else
659                           d = c2;
660                     } else {
661                         switch (c) {
662                         case 'a':
663                             c = 0x7;
664                             break;
665                         case 'b':
666                             c = '\b';
667                             break;
668                         case 'f':
669                             c = 0xC;
670                             break;
671                         case 'n':
672                             c = '\n';
673                             break;
674                         case 'r':
675                             c = '\r';
676                             break;
677                         case 't':
678                             c = '\t';
679                             break;
680                         case 'v':
681                             c = 0xB;
682                             break;
683                         }
684                         d = read();
685                     }
686                 } else {
687                     c = d;
688                     d = read();
689                 }
690                 if (i >= buf.length) {
691                     buf = Arrays.copyOf(buf, buf.length * 2);
692                 }
693                 buf[i++] = (char)c;
694             }
695 
696             /* If we broke out of the loop because we found a matching quote
697              * character then arrange to read a new character next time
698              * around; otherwise, save the character.
699              */
700             peekc = (d == ttype) ? NEED_CHAR : d;
701 
702             sval = String.copyValueOf(buf, 0, i);
703             return ttype;
704         }
705 
706         if (c == '/' && (slashSlashCommentsP || slashStarCommentsP)) {
707             c = read();
708             if (c == '*' && slashStarCommentsP) {
709                 int prevc = 0;
710                 while ((c = read()) != '/' || prevc != '*') {
711                     if (c == '\r') {
712                         LINENO++;
713                         c = read();
714                         if (c == '\n') {
715                             c = read();
716                         }
717                     } else {
718                         if (c == '\n') {
719                             LINENO++;
720                             c = read();
721                         }
722                     }
723                     if (c < 0)
724                         return ttype = TT_EOF;
725                     prevc = c;
726                 }
727                 return nextToken();
728             } else if (c == '/' && slashSlashCommentsP) {
729                 while ((c = read()) != '\n' && c != '\r' && c >= 0);
730                 peekc = c;
731                 return nextToken();
732             } else {
733                 /* Now see if it is still a single line comment */
734                 if ((ct['/'] & CT_COMMENT) != 0) {
735                     while ((c = read()) != '\n' && c != '\r' && c >= 0);
736                     peekc = c;
737                     return nextToken();
738                 } else {
739                     peekc = c;
740                     return ttype = '/';
741                 }
742             }
743         }
744 
745         if ((ctype & CT_COMMENT) != 0) {
746             while ((c = read()) != '\n' && c != '\r' && c >= 0);
747             peekc = c;
748             return nextToken();
749         }
750 
751         return ttype = c;
752     }
753 
754     /**
755      * Causes the next call to the {@code nextToken} method of this
756      * tokenizer to return the current value in the {@code ttype}
757      * field, and not to modify the value in the {@code nval} or
758      * {@code sval} field.
759      *
760      * @see     java.io.StreamTokenizer#nextToken()
761      * @see     java.io.StreamTokenizer#nval
762      * @see     java.io.StreamTokenizer#sval
763      * @see     java.io.StreamTokenizer#ttype
764      */
765     public void pushBack() {
766         if (ttype != TT_NOTHING)   /* No-op if nextToken() not called */
767             pushedBack = true;
768     }
769 
770     /**
771      * Return the current line number.
772      *
773      * @return  the current line number of this stream tokenizer.
774      */
775     public int lineno() {
776         return LINENO;
777     }
778 
779     /**
780      * Returns the string representation of the current stream token and
781      * the line number it occurs on.
782      *
783      * <p>The precise string returned is unspecified, although the following
784      * example can be considered typical:
785      *
786      * <blockquote><pre>Token['a'], line 10</pre></blockquote>
787      *
788      * @return  a string representation of the token
789      * @see     java.io.StreamTokenizer#nval
790      * @see     java.io.StreamTokenizer#sval
791      * @see     java.io.StreamTokenizer#ttype
792      */
793     public String toString() {
794         String ret;
795         switch (ttype) {
796           case TT_EOF:
797             ret = "EOF";
798             break;
799           case TT_EOL:
800             ret = "EOL";
801             break;
802           case TT_WORD:
803             ret = sval;
804             break;
805           case TT_NUMBER:
806             ret = "n=" + nval;
807             break;
808           case TT_NOTHING:
809             ret = "NOTHING";
810             break;
811           default: {
812                 /*
813                  * ttype is the first character of either a quoted string or
814                  * is an ordinary character. ttype can definitely not be less
815                  * than 0, since those are reserved values used in the previous
816                  * case statements
817                  */
818                 if (ttype < 256 &&
819                     ((ctype[ttype] & CT_QUOTE) != 0)) {
820                     ret = sval;
821                     break;
822                 }
823 
824                 char s[] = new char[3];
825                 s[0] = s[2] = '\'';
826                 s[1] = (char) ttype;
827                 ret = new String(s);
828                 break;
829             }
830         }
831         return "Token[" + ret + "], line " + LINENO;
832     }
833 
834 }
835