1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package dasm;
18 
19 
20 import dasm.tokens.number_token;
21 import dasm.tokens.relative_number_token;
22 
23 import java.io.IOException;
24 import java.io.Reader;
25 import java.util.Hashtable;
26 
27 import java_cup.runtime.int_token;
28 import java_cup.runtime.str_token;
29 import java_cup.runtime.token;
30 
31 /**
32  * Tokenizer
33  */
34 
35 
36 class Scanner {
37     /**
38      * Chars buffer with autoexpanding.
39      */
40     class CharBuffer {
41         private int buffer_size = 256;
42         private char[] buffer = new char[buffer_size];
43         private int cur_pos = 0;
44 
expand()45         private void expand() {
46             char tmp[] = new char[buffer_size * 2];
47             System.arraycopy(buffer, 0, tmp, 0, buffer_size);
48             buffer_size *= 2;
49             buffer = tmp;
50         }
51 
add(char c)52         void add(char c) {
53             buffer[cur_pos] = c;
54             cur_pos++;
55             if (cur_pos == buffer_size) expand();
56         }
57 
size()58         int size() {
59             return cur_pos;
60         }
61 
charAt(int idx)62         char charAt(int idx) {
63             return buffer[idx];
64         }
65 
toString()66         public String toString() {
67             return new String(buffer, 0, cur_pos);
68         }
69 
reset()70         void reset() {
71             cur_pos = 0;
72         }
73     }
74 
75 
76     protected Reader inputReader;
77 
78     /**
79      * next character in input stream
80      */
81     protected int nextChar;
82 
83     protected CharBuffer charBuf = new CharBuffer();
84 
85     /**
86      * Whitespace characters
87      */
88     protected static final String WHITESPACE = " \n\t\r";
89 
90     /**
91      * Separator characters
92      */
93     protected static final String SEPARATORS = WHITESPACE + ":=" + ",";
94 
95     /**
96      * error reporting
97      */
98     public int line_num, token_line_num, char_num;
99     public StringBuffer line;
100 
101     /**
102      * Holds new variables defined by .set directive
103      */
104     public Hashtable dict = new Hashtable();
105 
Scanner(Reader i)106     public Scanner(Reader i) throws IOException, DasmError {
107         inputReader = i;
108         line_num = 1;
109         char_num = 0;
110         line = new StringBuffer();
111         nextChar = 0;
112         skipEmptyLines();
113         if (nextChar == -1) throw new DasmError("empty source file");
114     }
115 
116     /**
117      * Checks if a character code is a whitespace character
118      */
isWhitespace(int c)119     protected static boolean isWhitespace(int c) {
120         return (WHITESPACE.indexOf(c) != -1);
121     }
122 
123     /**
124      * Checks if a character code is a separator character
125      */
isSeparator(int c)126     protected static boolean isSeparator(int c) {
127         return (c == -1 || SEPARATORS.indexOf(c) != -1);
128     }
129 
130     /**
131      * Gets next char from input
132      */
readNextChar()133     protected void readNextChar() throws IOException {
134         nextChar = inputReader.read();
135         switch (nextChar) {
136         case -1: // EOF
137             if (char_num == 0) {
138                 char_num = -1;
139                 break;
140             }
141             nextChar = '\n';
142             // fall thru
143         case '\n':
144             line_num++;
145             char_num = 0;
146             break;
147         default:
148             line.append((char) nextChar);
149             char_num++;
150             return;
151         }
152         line.setLength(0);
153     }
154 
155     /**
156      * Skips empty lines in input stream
157      */
skipEmptyLines()158     private void skipEmptyLines() throws IOException {
159         for (;;) {
160             if (nextChar != ';') {
161                 do {
162                     readNextChar();
163                 } while (isWhitespace(nextChar));
164                 if (nextChar != ';') return;
165             }
166             do {
167                 readNextChar();
168                 if (nextChar == -1) return;
169             } while (nextChar != '\n');
170         }
171     }
172 
173     /**
174      * Reads unicode char (\\uXXXX)
175      */
readUnicodeChar()176     private char readUnicodeChar() throws IOException, DasmError {
177         int result = 0;
178         for (int i = 0; i < 4; i++) {
179             readNextChar();
180             if (nextChar == -1) return 0;
181 
182             int tmp = Character.digit((char) nextChar, 16);
183             if (tmp == -1)
184                 throw new DasmError("Invalid '\\u' escape sequence");
185             result = (result << 4) | tmp;
186         }
187         return (char) result;
188     }
189 
nameEscape()190     private char nameEscape() throws IOException, DasmError {
191         readNextChar();
192         if (nextChar != 'u')
193             throw new DasmError("Only '\\u' escape sequence allowed in names");
194         char chval = readUnicodeChar();
195         if (nextChar == -1)
196             throw new DasmError("Left over '\\u' escape sequence");
197         return chval;
198     }
199 
200     /**
201      * Read and recognize next token
202      */
next_token()203     public token next_token() throws IOException, DasmError {
204         token_line_num = line_num;
205 
206         for (;;)
207             switch (nextChar) {
208             case ';': // a comment
209             case '\n':
210                 // return single SEP token (skip multiple newlines
211                 // interspersed with whitespace or comments)
212                 skipEmptyLines();
213                 token_line_num = line_num;
214                 return new token(sym.SEP);
215 
216             case ' ':
217             case '\t':
218             case '\r':
219             case ',': // whitespace
220                 readNextChar();
221                 break;
222 
223             case -1: // EOF token
224                 char_num = -1;
225                 return new token(sym.EOF);
226 
227             case '=': // EQUALS token
228                 readNextChar();
229                 return new token(sym.EQ);
230 
231             case ':': // COLON token
232                 readNextChar();
233                 return new token(sym.COLON);
234 
235             case '-':
236             case '+':
237             case '0':
238             case '1':
239             case '2':
240             case '3':
241             case '4':
242             case '5':
243             case '6':
244             case '7':
245             case '8':
246             case '9':
247             case '.': // a number
248             {
249                 return readNumber();
250             }
251 
252             case '"': // quoted string
253             {
254                 return readQuotedString();
255             }
256 
257             case '{': // list of registers
258             {
259                 return readRegList();
260             }
261 
262             case '\'': // quotation for overloading reserved words
263                 return readQuotedReservedWord();
264 
265             default: {
266                 // read up until a separatorcharacter
267                 boolean only_name = false;
268 
269                 charBuf.reset();
270                 do {
271                     char chval = (char) nextChar;
272                     if (nextChar == '\\') {
273                         chval = nameEscape();
274                         only_name = true;
275                     }
276                     charBuf.add(chval);
277                     readNextChar();
278                 } while (!isSeparator(nextChar));
279 
280                 String str = charBuf.toString();
281 
282                 if (!only_name) {
283                     token tok;
284 
285                     // keyword or directive?
286                     if ((tok = ReservedWords.get(str)) != null) return tok;
287 
288                     // VM instruction?
289                     if (DopInfo.contains(str))
290                         return new str_token(sym.Insn, str);
291 
292                     if (str.charAt(0) == '$') {
293                         String s = str.substring(1);
294                         Object v;
295                         int n = 10;
296                         boolean neg = false;
297                         switch (s.charAt(0)) {
298                         default:
299                             break;
300 
301                         case '-':
302                             neg = true;
303                         case '+':
304                             s = s.substring(1);
305                             if (s.startsWith("0x")) {
306                                 n = 16;
307                                 s = s.substring(2);
308                             }
309                             try {
310                                 n = Integer.parseInt(s, n);
311                             } catch (NumberFormatException e) {
312                                 throw new DasmError(
313                                         "Bad relative offset number");
314                             }
315                             if (neg) n = -n;
316                             return new relative_number_token(sym.Relative, n);
317                         }
318                         // Do variable substitution
319                         if ((v = dict.get(s)) != null) return (token) v;
320                     } // not begin from '$'
321                 } // !only_name
322                 // Unrecognized string token (e.g. a classname)
323                 return new str_token(sym.Word, str);
324             }
325             }
326     }
327 
328     /**
329      * Reads "-quoted string
330      */
readQuotedString()331     protected token readQuotedString() throws IOException, DasmError {
332         boolean f = false;
333         charBuf.reset();
334         for (;;) {
335             if (f)
336                 f = false;
337             else
338                 readNextChar();
339 
340             if (nextChar == '"') {
341                 readNextChar(); // skip closing quote
342                 return new str_token(sym.Str, charBuf.toString());
343             }
344 
345             if (nextChar == -1) throw new DasmError("Unterminated string");
346 
347             char chval = (char) nextChar;
348 
349             if (chval == '\\') {
350                 readNextChar();
351                 switch (nextChar) {
352                 case -1:
353                     f = true;
354                     continue;
355                 case 'n':
356                     chval = '\n';
357                     break;
358                 case 'r':
359                     chval = '\r';
360                     break;
361                 case 't':
362                     chval = '\t';
363                     break;
364                 case 'f':
365                     chval = '\f';
366                     break;
367                 case 'b':
368                     chval = '\b';
369                     break;
370                 case '"':
371                     chval = '"';
372                     break;
373                 case '\'':
374                     chval = '\'';
375                     break;
376                 case '\\':
377                     chval = '\\';
378                     break;
379 
380                 case 'u':
381                     chval = readUnicodeChar();
382                     if (nextChar == -1) {
383                         f = true;
384                         continue;
385                     }
386                     break;
387 
388                 // octals
389                 case '0':
390                 case '1':
391                 case '2':
392                 case '3':
393                 case '4':
394                 case '5':
395                 case '6':
396                 case '7': {
397                     int res = nextChar & 7;
398                     readNextChar();
399                     if (nextChar < '0' || nextChar > '7')
400                         f = true;
401                     else {
402                         res = res * 8 + (nextChar & 7);
403                         readNextChar();
404                         if (nextChar < '0' || nextChar > '7')
405                             f = true;
406                         else {
407                             int val = res * 8 + (nextChar & 7);
408                             if (val >= 0x100)
409                                 f = true;
410                             else
411                                 res = val;
412                         }
413                     }
414                     chval = (char) res;
415                 }
416                     break;
417 
418                 default:
419                     throw new DasmError("Incorrect backslash escape sequence");
420                 }
421             }
422             charBuf.add(chval);
423         }
424     }
425 
426     /**
427      * Reads list of registers ({v1, v2, v3} or {v1..v3})
428      */
readRegList()429     protected token readRegList() throws IOException, DasmError {
430         charBuf.reset();
431         for (;;) {
432             readNextChar();
433 
434             if (nextChar == '}') {
435                 readNextChar(); // skip closing quote
436                 return new str_token(sym.Word, charBuf.toString());
437             }
438 
439             if (nextChar == -1)
440                 throw new DasmError("Unterminated list of registers");
441 
442 
443             charBuf.add((char) nextChar);
444         }
445     }
446 
447     /**
448      * Reads number
449      */
readNumber()450     protected token readNumber() throws IOException, DasmError {
451         charBuf.reset();
452 
453         do {
454             charBuf.add((char) nextChar);
455             readNextChar();
456         } while (!isSeparator(nextChar));
457 
458         String str = charBuf.toString();
459         token tok;
460 
461         // directive?
462         if ((tok = ReservedWords.get(str)) != null) return tok;
463 
464         Number num;
465         try {
466             num = Utils.stringToNumber(str);
467         } catch (NumberFormatException e) {
468             if (charBuf.charAt(0) != '.') // directive?
469                 throw new DasmError("Bad number format");
470             throw new DasmError("Unknown directive or bad number format");
471         }
472 
473         if (num instanceof Integer) {
474             return new int_token(sym.Int, num.intValue());
475         }
476 
477         return new number_token(sym.Num, num);
478     }
479 
480     /**
481      * Reads ''-quoted overloaded reserved words
482      */
readQuotedReservedWord()483     protected token readQuotedReservedWord() throws IOException, DasmError {
484         charBuf.reset();
485         for (;;) {
486             readNextChar();
487             if (isSeparator(nextChar))
488                 throw new DasmError("Unterminated ''-enclosed name");
489             if (nextChar == '\'') {
490                 if (charBuf.size() == 0)
491                     throw new DasmError("Empty ''-enclosed name");
492                 readNextChar(); // skip close quote
493                 if (!isSeparator(nextChar))
494                     throw new DasmError(
495                             "Missed separator after ''-enclosed name");
496                 return new str_token(sym.Word, charBuf.toString());
497             }
498             char chval = (char) nextChar;
499             if (nextChar == '\\') chval = nameEscape();
500             charBuf.add(chval);
501         }
502     }
503 };
504