1 /*
2 * Conditions Of Use
3 *
4 * This software was developed by employees of the National Institute of
5 * Standards and Technology (NIST), an agency of the Federal Government.
6 * Pursuant to title 15 Untied States Code Section 105, works of NIST
7 * employees are not subject to copyright protection in the United States
8 * and are considered to be in the public domain.  As a result, a formal
9 * license is not needed to use the software.
10 *
11 * This software is provided by NIST as a service and is expressly
12 * provided "AS IS."  NIST MAKES NO WARRANTY OF ANY KIND, EXPRESS, IMPLIED
13 * OR STATUTORY, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF
14 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT
15 * AND DATA ACCURACY.  NIST does not warrant or make any representations
16 * regarding the use of the software or the results thereof, including but
17 * not limited to the correctness, accuracy, reliability or usefulness of
18 * the software.
19 *
20 * Permission to use this software is contingent upon your acceptance
21 * of the terms of this agreement
22 *
23 * .
24 *
25 */
26 package gov.nist.core;
27 
28 import java.text.ParseException;
29 import java.util.Hashtable;
30 
31 /** A lexical analyzer that is used by all parsers in our implementation.
32  *
33  *@version 1.2
34  *@since 1.1
35  *
36  *@author M. Ranganathan
37  */
38 public class LexerCore extends StringTokenizer {
39 
40     // IMPORTANT - All keyword matches should be between START and END
41     public static final int START = 2048;
42     public static final int END = START + 2048;
43     // IMPORTANT -- This should be < END
44     public static final int ID = END - 1;
45     public static final int SAFE = END - 2;
46     // Individial token classes.
47     public static final int WHITESPACE = END + 1;
48     public static final int DIGIT = END + 2;
49     public static final int ALPHA = END + 3;
50     public static final int BACKSLASH = (int) '\\';
51     public static final int QUOTE = (int) '\'';
52     public static final int AT = (int) '@';
53     public static final int SP = (int) ' ';
54     public static final int HT = (int) '\t';
55     public static final int COLON = (int) ':';
56     public static final int STAR = (int) '*';
57     public static final int DOLLAR = (int) '$';
58     public static final int PLUS = (int) '+';
59     public static final int POUND = (int) '#';
60     public static final int MINUS = (int) '-';
61     public static final int DOUBLEQUOTE = (int) '\"';
62     public static final int TILDE = (int) '~';
63     public static final int BACK_QUOTE = (int) '`';
64     public static final int NULL = (int) '\0';
65     public static final int EQUALS = (int) '=';
66     public static final int SEMICOLON = (int) ';';
67     public static final int SLASH = (int) '/';
68     public static final int L_SQUARE_BRACKET = (int) '[';
69     public static final int R_SQUARE_BRACKET = (int) ']';
70     public static final int R_CURLY = (int) '}';
71     public static final int L_CURLY = (int) '{';
72     public static final int HAT = (int) '^';
73     public static final int BAR = (int) '|';
74     public static final int DOT = (int) '.';
75     public static final int EXCLAMATION = (int) '!';
76     public static final int LPAREN = (int) '(';
77     public static final int RPAREN = (int) ')';
78     public static final int GREATER_THAN = (int) '>';
79     public static final int LESS_THAN = (int) '<';
80     public static final int PERCENT = (int) '%';
81     public static final int QUESTION = (int) '?';
82     public static final int AND = (int) '&';
83     public static final int UNDERSCORE = (int) '_';
84 
85     protected static final Hashtable globalSymbolTable;
86     protected static final Hashtable lexerTables;
87     protected Hashtable currentLexer;
88     protected String currentLexerName;
89     protected Token currentMatch;
90 
91     static {
92         globalSymbolTable = new Hashtable();
93         lexerTables = new Hashtable();
94     }
95 
addKeyword(String name, int value)96     protected void addKeyword(String name, int value) {
97         // System.out.println("addKeyword " + name + " value = " + value);
98         // new Exception().printStackTrace();
99         Integer val = Integer.valueOf(value);
100         currentLexer.put(name, val);
101         if (!globalSymbolTable.containsKey(val))
102             globalSymbolTable.put(val, name);
103     }
104 
lookupToken(int value)105     public String lookupToken(int value) {
106         if (value > START) {
107             return (String) globalSymbolTable.get(Integer.valueOf(value));
108         } else {
109             Character ch = Character.valueOf((char) value);
110             return ch.toString();
111         }
112     }
113 
addLexer(String lexerName)114     protected Hashtable addLexer(String lexerName) {
115         currentLexer = (Hashtable) lexerTables.get(lexerName);
116         if (currentLexer == null) {
117             currentLexer = new Hashtable();
118             lexerTables.put(lexerName, currentLexer);
119         }
120         return currentLexer;
121     }
122 
123     //public abstract void selectLexer(String lexerName);
124 
selectLexer(String lexerName)125     public void selectLexer(String lexerName) {
126         this.currentLexerName = lexerName;
127     }
128 
LexerCore()129     protected LexerCore() {
130         this.currentLexer = new Hashtable();
131         this.currentLexerName = "charLexer";
132     }
133 
134     /** Initialize the lexer with a buffer.
135      */
LexerCore(String lexerName, String buffer)136     public LexerCore(String lexerName, String buffer) {
137         super(buffer);
138         this.currentLexerName = lexerName;
139     }
140 
141     /** Peek the next id but dont move the buffer pointer forward.
142      */
143 
peekNextId()144     public String peekNextId() {
145         int oldPtr = ptr;
146         String retval = ttoken();
147         savedPtr = ptr;
148         ptr = oldPtr;
149         return retval;
150     }
151 
152     /** Get the next id.
153      */
getNextId()154     public String getNextId() {
155         return ttoken();
156     }
157 
158     // call this after you call match
getNextToken()159     public Token getNextToken() {
160         return this.currentMatch;
161 
162     }
163 
164     /** Look ahead for one token.
165      */
peekNextToken()166     public Token peekNextToken() throws ParseException {
167         return (Token) peekNextToken(1)[0];
168     }
169 
peekNextToken(int ntokens)170     public Token[] peekNextToken(int ntokens) throws ParseException {
171         int old = ptr;
172         Token[] retval = new Token[ntokens];
173         for (int i = 0; i < ntokens; i++) {
174             Token tok = new Token();
175             if (startsId()) {
176                 String id = ttoken();
177                 tok.tokenValue = id;
178                 String idUppercase = id.toUpperCase();
179                 if (currentLexer.containsKey(idUppercase)) {
180                     Integer type = (Integer) currentLexer.get(idUppercase);
181                     tok.tokenType = type.intValue();
182                 } else
183                     tok.tokenType = ID;
184             } else {
185                 char nextChar = getNextChar();
186                 tok.tokenValue = String.valueOf(nextChar);
187                 if (isAlpha(nextChar)) {
188                     tok.tokenType = ALPHA;
189                 } else if (isDigit(nextChar)) {
190                     tok.tokenType = DIGIT;
191                 } else
192                     tok.tokenType = (int) nextChar;
193             }
194             retval[i] = tok;
195         }
196         savedPtr = ptr;
197         ptr = old;
198         return retval;
199     }
200 
201     /** Match the given token or throw an exception if no such token
202      * can be matched.
203      */
match(int tok)204     public Token match(int tok) throws ParseException {
205         if (Debug.parserDebug) {
206             Debug.println("match " + tok);
207         }
208         if (tok > START && tok < END) {
209             if (tok == ID) {
210                 // Generic ID sought.
211                 if (!startsId())
212                     throw new ParseException(buffer + "\nID expected", ptr);
213                 String id = getNextId();
214                 this.currentMatch = new Token();
215                 this.currentMatch.tokenValue = id;
216                 this.currentMatch.tokenType = ID;
217             } else if (tok == SAFE) {
218                 if (!startsSafeToken())
219                     throw new ParseException(buffer + "\nID expected", ptr);
220                 String id = ttokenSafe();
221                 this.currentMatch = new Token();
222                 this.currentMatch.tokenValue = id;
223                 this.currentMatch.tokenType = SAFE;
224             } else {
225                 String nexttok = getNextId();
226                 Integer cur = (Integer) currentLexer.get(nexttok.toUpperCase());
227 
228                 if (cur == null || cur.intValue() != tok)
229                     throw new ParseException(
230                         buffer + "\nUnexpected Token : " + nexttok,
231                         ptr);
232                 this.currentMatch = new Token();
233                 this.currentMatch.tokenValue = nexttok;
234                 this.currentMatch.tokenType = tok;
235             }
236         } else if (tok > END) {
237             // Character classes.
238             char next = lookAhead(0);
239             if (tok == DIGIT) {
240                 if (!isDigit(next))
241                     throw new ParseException(buffer + "\nExpecting DIGIT", ptr);
242                 this.currentMatch = new Token();
243                 this.currentMatch.tokenValue =
244                     String.valueOf(next);
245                 this.currentMatch.tokenType = tok;
246                 consume(1);
247 
248             } else if (tok == ALPHA) {
249                 if (!isAlpha(next))
250                     throw new ParseException(buffer + "\nExpecting ALPHA", ptr);
251                 this.currentMatch = new Token();
252                 this.currentMatch.tokenValue =
253                     String.valueOf(next);
254                 this.currentMatch.tokenType = tok;
255                 consume(1);
256 
257             }
258 
259         } else {
260             // This is a direct character spec.
261             char ch = (char) tok;
262             char next = lookAhead(0);
263             if (next == ch) {
264                 /*this.currentMatch = new Token();
265                 this.currentMatch.tokenValue =
266                     String.valueOf(ch);
267                 this.currentMatch.tokenType = tok;*/
268                 consume(1);
269             } else
270                 throw new ParseException(
271                     buffer + "\nExpecting  >>>" + ch + "<<< got >>>"
272                     + next + "<<<", ptr);
273         }
274         return this.currentMatch;
275     }
276 
SPorHT()277     public void SPorHT() {
278         try {
279             char c = lookAhead(0);
280             while (c == ' ' || c == '\t') {
281                 consume(1);
282                 c = lookAhead(0);
283             }
284         } catch (ParseException ex) {
285             // Ignore
286         }
287     }
288 
289     /**
290      * JvB: utility function added to validate tokens
291      *
292      * @see RFC3261 section 25.1:
293      * token       =  1*(alphanum / "-" / "." / "!" / "%" / "*"
294                      / "_" / "+" / "`" / "'" / "~" )
295 
296      * @param c - character to check
297      * @return true iff character c is a valid token character as per RFC3261
298      */
isTokenChar( char c )299     public static final boolean isTokenChar( char c ) {
300         if ( isAlphaDigit(c) ) return true;
301         else switch (c)
302         {
303             case '-':
304             case '.':
305             case '!':
306             case '%':
307             case '*':
308             case '_':
309             case '+':
310             case '`':
311             case '\'':
312             case '~':
313                 return true;
314             default:
315                 return false;
316         }
317     }
318 
319 
startsId()320     public boolean startsId() {
321         try {
322             char nextChar = lookAhead(0);
323             return isTokenChar(nextChar);
324         } catch (ParseException ex) {
325             return false;
326         }
327     }
328 
startsSafeToken()329     public boolean startsSafeToken() {
330         try {
331             char nextChar = lookAhead(0);
332             if (isAlphaDigit(nextChar)) {
333                 return true;
334             }
335             else {
336                 switch (nextChar) {
337                     case '_':
338                     case '+':
339                     case '-':
340                     case '!':
341                     case '`':
342                     case '\'':
343                     case '.':
344                     case '/':
345                     case '}':
346                     case '{':
347                     case ']':
348                     case '[':
349                     case '^':
350                     case '|':
351                     case '~':
352                     case '%': // bug fix by Bruno Konik, JvB copied here
353                     case '#':
354                     case '@':
355                     case '$':
356                     case ':':
357                     case ';':
358                     case '?':
359                     case '\"':
360                     case '*':
361                     case '=': // Issue 155 on java.net
362                         return true;
363                     default:
364                         return false;
365                 }
366             }
367         } catch (ParseException ex) {
368             return false;
369         }
370     }
371 
ttoken()372     public String ttoken() {
373         int startIdx = ptr;
374         try {
375             while (hasMoreChars()) {
376                 char nextChar = lookAhead(0);
377                 if ( isTokenChar(nextChar) ) {
378                     consume(1);
379                 } else {
380                     break;
381                 }
382             }
383             return buffer.substring(startIdx, ptr);
384         } catch (ParseException ex) {
385             return null;
386         }
387     }
388 
389     /* JvB: unreferenced
390     public String ttokenAllowSpace() {
391         int startIdx = ptr;
392         try {
393             while (hasMoreChars()) {
394                 char nextChar = lookAhead(0);
395                 if (isAlphaDigit(nextChar)) {
396                     consume(1);
397                 }
398                 else {
399                     boolean isValidChar = false;
400                     switch (nextChar) {
401                         case '_':
402                         case '+':
403                         case '-':
404                         case '!':
405                         case '`':
406                         case '\'':
407                         case '~':
408                         case '%': // bug fix by Bruno Konik, JvB copied here
409                         case '.':
410                         case ' ':
411                         case '\t':
412                         case '*':
413                             isValidChar = true;
414                     }
415                     if (isValidChar) {
416                         consume(1);
417                     }
418                     else {
419                         break;
420                     }
421                 }
422 
423             }
424             return buffer.substring(startIdx, ptr);
425         } catch (ParseException ex) {
426             return null;
427         }
428     }*/
429 
ttokenSafe()430     public String ttokenSafe() {
431         int startIdx = ptr;
432         try {
433             while (hasMoreChars()) {
434                 char nextChar = lookAhead(0);
435                 if (isAlphaDigit(nextChar)) {
436                     consume(1);
437                 }
438                 else {
439                     boolean isValidChar = false;
440                     switch (nextChar) {
441                         case '_':
442                         case '+':
443                         case '-':
444                         case '!':
445                         case '`':
446                         case '\'':
447                         case '.':
448                         case '/':
449                         case '}':
450                         case '{':
451                         case ']':
452                         case '[':
453                         case '^':
454                         case '|':
455                         case '~':
456                         case '%': // bug fix by Bruno Konik, JvB copied here
457                         case '#':
458                         case '@':
459                         case '$':
460                         case ':':
461                         case ';':
462                         case '?':
463                         case '\"':
464                         case '*':
465                             isValidChar = true;
466                     }
467                     if (isValidChar) {
468                         consume(1);
469                     }
470                     else {
471                         break;
472                     }
473                 }
474             }
475             return buffer.substring(startIdx, ptr);
476         } catch (ParseException ex) {
477             return null;
478         }
479     }
480 
481     static final char ALPHA_VALID_CHARS = Character.MAX_VALUE;
482     static final char DIGIT_VALID_CHARS = Character.MAX_VALUE - 1;
483     static final char ALPHADIGIT_VALID_CHARS = Character.MAX_VALUE - 2;
consumeValidChars(char[] validChars)484     public void consumeValidChars(char[] validChars) {
485         int validCharsLength = validChars.length;
486         try {
487             while (hasMoreChars()) {
488                 char nextChar = lookAhead(0);
489                 boolean isValid = false;
490                 for (int i = 0; i < validCharsLength; i++) {
491                     char validChar = validChars[i];
492                     switch(validChar) {
493                         case ALPHA_VALID_CHARS:
494                             isValid = isAlpha(nextChar);
495                             break;
496                         case DIGIT_VALID_CHARS:
497                             isValid = isDigit(nextChar);
498                             break;
499                         case ALPHADIGIT_VALID_CHARS:
500                             isValid = isAlphaDigit(nextChar);
501                             break;
502                         default:
503                             isValid = nextChar == validChar;
504                     }
505                     if (isValid) {
506                         break;
507                     }
508                 }
509                 if (isValid) {
510                     consume(1);
511                 }
512                 else {
513                     break;
514                 }
515             }
516         } catch (ParseException ex) {
517 
518         }
519     }
520 
521     /** Parse a comment string cursor is at a ". Leave cursor at closing "
522     *@return the substring containing the quoted string excluding the
523     * closing quote.
524     */
quotedString()525     public String quotedString() throws ParseException {
526         int startIdx = ptr + 1;
527         if (lookAhead(0) != '\"')
528             return null;
529         consume(1);
530         while (true) {
531             char next = getNextChar();
532             if (next == '\"') {
533                 // Got to the terminating quote.
534                 break;
535             } else if (next == '\0') {
536                 throw new ParseException(
537                     this.buffer + " :unexpected EOL",
538                     this.ptr);
539             } else if (next == '\\') {
540                 consume(1);
541             }
542         }
543         return buffer.substring(startIdx, ptr - 1);
544     }
545 
546     /** Parse a comment string cursor is at a "(". Leave cursor at )
547     *@return the substring containing the comment excluding the
548     * closing brace.
549     */
comment()550     public String comment() throws ParseException {
551         StringBuffer retval = new StringBuffer();
552         if (lookAhead(0) != '(')
553             return null;
554         consume(1);
555         while (true) {
556             char next = getNextChar();
557             if (next == ')') {
558                 break;
559             } else if (next == '\0') {
560                 throw new ParseException(
561                     this.buffer + " :unexpected EOL",
562                     this.ptr);
563             } else if (next == '\\') {
564                 retval.append(next);
565                 next = getNextChar();
566                 if (next == '\0')
567                     throw new ParseException(
568                         this.buffer + " : unexpected EOL",
569                         this.ptr);
570                 retval.append(next);
571             } else {
572                 retval.append(next);
573             }
574         }
575         return retval.toString();
576     }
577 
578     /** Return a substring containing no semicolons.
579     *@return a substring containing no semicolons.
580     */
byteStringNoSemicolon()581     public String byteStringNoSemicolon() {
582         StringBuffer retval = new StringBuffer();
583         try {
584             while (true) {
585                 char next = lookAhead(0);
586                 // bug fix from Ben Evans.
587                 if (next == '\0' || next == '\n' || next == ';' || next == ',' ) {
588                     break;
589                 } else {
590                     consume(1);
591                     retval.append(next);
592                 }
593             }
594         } catch (ParseException ex) {
595             return retval.toString();
596         }
597         return retval.toString();
598     }
599 
600     /**
601      * Scan until you see a slash or an EOL.
602      *
603      * @return substring containing no slash.
604      */
byteStringNoSlash()605     public String byteStringNoSlash() {
606         StringBuffer retval = new StringBuffer();
607         try {
608             while (true) {
609                 char next = lookAhead(0);
610                 // bug fix from Ben Evans.
611                 if (next == '\0' || next == '\n' || next == '/'  ) {
612                     break;
613                 } else {
614                     consume(1);
615                     retval.append(next);
616                 }
617             }
618         } catch (ParseException ex) {
619             return retval.toString();
620         }
621         return retval.toString();
622     }
623 
624     /** Return a substring containing no commas
625     *@return a substring containing no commas.
626     */
627 
byteStringNoComma()628     public String byteStringNoComma() {
629         StringBuffer retval = new StringBuffer();
630         try {
631             while (true) {
632                 char next = lookAhead(0);
633                 if (next == '\n' || next == ',') {
634                     break;
635                 } else {
636                     consume(1);
637                     retval.append(next);
638                 }
639             }
640         } catch (ParseException ex) {
641         }
642         return retval.toString();
643     }
644 
charAsString(char ch)645     public static String charAsString(char ch) {
646         return String.valueOf(ch);
647     }
648 
649     /** Lookahead in the inputBuffer for n chars and return as a string.
650      * Do not consume the input.
651      */
charAsString(int nchars)652     public String charAsString(int nchars) {
653         return buffer.substring(ptr, ptr + nchars);
654     }
655 
656     /** Get and consume the next number.
657      *@return a substring corresponding to a number
658      *(i.e. sequence of digits).
659      */
number()660     public String number() throws ParseException {
661 
662         int startIdx = ptr;
663         try {
664             if (!isDigit(lookAhead(0))) {
665                 throw new ParseException(
666                     buffer + ": Unexpected token at " + lookAhead(0),
667                     ptr);
668             }
669             consume(1);
670             while (true) {
671                 char next = lookAhead(0);
672                 if (isDigit(next)) {
673                     consume(1);
674                 } else
675                     break;
676             }
677             return buffer.substring(startIdx, ptr);
678         } catch (ParseException ex) {
679             return buffer.substring(startIdx, ptr);
680         }
681     }
682 
683     /** Mark the position for backtracking.
684      *@return the current location of the pointer.
685      */
markInputPosition()686     public int markInputPosition() {
687         return ptr;
688     }
689 
690     /** Rewind the input ptr to the marked position.
691      *@param position - the position to rewind the parser to.
692      */
rewindInputPosition(int position)693     public void rewindInputPosition(int position) {
694         this.ptr = position;
695     }
696 
697     /** Get the rest of the String
698      * @return rest of the buffer.
699      */
getRest()700     public String getRest() {
701         if (ptr >= buffer.length())
702             return null;
703         else
704             return buffer.substring(ptr);
705     }
706 
707     /** Get the sub-String until the character is encountered
708      * @param c the character to match
709      * @return the substring that matches.
710      */
getString(char c)711     public String getString(char c) throws ParseException {
712         StringBuffer retval = new StringBuffer();
713         while (true) {
714             char next = lookAhead(0);
715             //System.out.println(" next = [" + next + ']' + "ptr = " + ptr);
716             //System.out.println(next == '\0');
717 
718             if (next == '\0') {
719                 throw new ParseException(
720                     this.buffer + "unexpected EOL",
721                     this.ptr);
722             } else if (next == c) {
723                 consume(1);
724                 break;
725             } else if (next == '\\') {
726                 consume(1);
727                 char nextchar = lookAhead(0);
728                 if (nextchar == '\0') {
729                     throw new ParseException(
730                         this.buffer + "unexpected EOL",
731                         this.ptr);
732                 } else {
733                     consume(1);
734                     retval.append(nextchar);
735                 }
736             } else {
737                 consume(1);
738                 retval.append(next);
739             }
740         }
741         return retval.toString();
742     }
743 
744     /** Get the read pointer.
745      */
getPtr()746     public int getPtr() {
747         return this.ptr;
748     }
749 
750     /** Get the buffer.
751      */
getBuffer()752     public String getBuffer() {
753         return this.buffer;
754     }
755 
756     /** Create a parse exception.
757      */
createParseException()758     public ParseException createParseException() {
759         return new ParseException(this.buffer, this.ptr);
760     }
761 }
762