1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package dasm; 18 19 20 import dasm.tokens.number_token; 21 import dasm.tokens.relative_number_token; 22 23 import java.io.IOException; 24 import java.io.Reader; 25 import java.util.Hashtable; 26 27 import java_cup.runtime.int_token; 28 import java_cup.runtime.str_token; 29 import java_cup.runtime.token; 30 31 /** 32 * Tokenizer 33 */ 34 35 36 class Scanner { 37 /** 38 * Chars buffer with autoexpanding. 39 */ 40 class CharBuffer { 41 private int buffer_size = 256; 42 private char[] buffer = new char[buffer_size]; 43 private int cur_pos = 0; 44 expand()45 private void expand() { 46 char tmp[] = new char[buffer_size * 2]; 47 System.arraycopy(buffer, 0, tmp, 0, buffer_size); 48 buffer_size *= 2; 49 buffer = tmp; 50 } 51 add(char c)52 void add(char c) { 53 buffer[cur_pos] = c; 54 cur_pos++; 55 if (cur_pos == buffer_size) expand(); 56 } 57 size()58 int size() { 59 return cur_pos; 60 } 61 charAt(int idx)62 char charAt(int idx) { 63 return buffer[idx]; 64 } 65 toString()66 public String toString() { 67 return new String(buffer, 0, cur_pos); 68 } 69 reset()70 void reset() { 71 cur_pos = 0; 72 } 73 } 74 75 76 protected Reader inputReader; 77 78 /** 79 * next character in input stream 80 */ 81 protected int nextChar; 82 83 protected CharBuffer charBuf = new CharBuffer(); 84 85 /** 86 * Whitespace characters 87 */ 88 protected static final String WHITESPACE = " \n\t\r"; 89 90 /** 91 * Separator characters 92 */ 93 protected static final String SEPARATORS = WHITESPACE + ":=" + ","; 94 95 /** 96 * error reporting 97 */ 98 public int line_num, token_line_num, char_num; 99 public StringBuffer line; 100 101 /** 102 * Holds new variables defined by .set directive 103 */ 104 public Hashtable dict = new Hashtable(); 105 Scanner(Reader i)106 public Scanner(Reader i) throws IOException, DasmError { 107 inputReader = i; 108 line_num = 1; 109 char_num = 0; 110 line = new StringBuffer(); 111 nextChar = 0; 112 skipEmptyLines(); 113 if (nextChar == -1) throw new DasmError("empty source file"); 114 } 115 116 /** 117 * Checks if a character code is a whitespace character 118 */ isWhitespace(int c)119 protected static boolean isWhitespace(int c) { 120 return (WHITESPACE.indexOf(c) != -1); 121 } 122 123 /** 124 * Checks if a character code is a separator character 125 */ isSeparator(int c)126 protected static boolean isSeparator(int c) { 127 return (c == -1 || SEPARATORS.indexOf(c) != -1); 128 } 129 130 /** 131 * Gets next char from input 132 */ readNextChar()133 protected void readNextChar() throws IOException { 134 nextChar = inputReader.read(); 135 switch (nextChar) { 136 case -1: // EOF 137 if (char_num == 0) { 138 char_num = -1; 139 break; 140 } 141 nextChar = '\n'; 142 // fall thru 143 case '\n': 144 line_num++; 145 char_num = 0; 146 break; 147 default: 148 line.append((char) nextChar); 149 char_num++; 150 return; 151 } 152 line.setLength(0); 153 } 154 155 /** 156 * Skips empty lines in input stream 157 */ skipEmptyLines()158 private void skipEmptyLines() throws IOException { 159 for (;;) { 160 if (nextChar != ';') { 161 do { 162 readNextChar(); 163 } while (isWhitespace(nextChar)); 164 if (nextChar != ';') return; 165 } 166 do { 167 readNextChar(); 168 if (nextChar == -1) return; 169 } while (nextChar != '\n'); 170 } 171 } 172 173 /** 174 * Reads unicode char (\\uXXXX) 175 */ readUnicodeChar()176 private char readUnicodeChar() throws IOException, DasmError { 177 int result = 0; 178 for (int i = 0; i < 4; i++) { 179 readNextChar(); 180 if (nextChar == -1) return 0; 181 182 int tmp = Character.digit((char) nextChar, 16); 183 if (tmp == -1) 184 throw new DasmError("Invalid '\\u' escape sequence"); 185 result = (result << 4) | tmp; 186 } 187 return (char) result; 188 } 189 nameEscape()190 private char nameEscape() throws IOException, DasmError { 191 readNextChar(); 192 if (nextChar != 'u') 193 throw new DasmError("Only '\\u' escape sequence allowed in names"); 194 char chval = readUnicodeChar(); 195 if (nextChar == -1) 196 throw new DasmError("Left over '\\u' escape sequence"); 197 return chval; 198 } 199 200 /** 201 * Read and recognize next token 202 */ next_token()203 public token next_token() throws IOException, DasmError { 204 token_line_num = line_num; 205 206 for (;;) 207 switch (nextChar) { 208 case ';': // a comment 209 case '\n': 210 // return single SEP token (skip multiple newlines 211 // interspersed with whitespace or comments) 212 skipEmptyLines(); 213 token_line_num = line_num; 214 return new token(sym.SEP); 215 216 case ' ': 217 case '\t': 218 case '\r': 219 case ',': // whitespace 220 readNextChar(); 221 break; 222 223 case -1: // EOF token 224 char_num = -1; 225 return new token(sym.EOF); 226 227 case '=': // EQUALS token 228 readNextChar(); 229 return new token(sym.EQ); 230 231 case ':': // COLON token 232 readNextChar(); 233 return new token(sym.COLON); 234 235 case '-': 236 case '+': 237 case '0': 238 case '1': 239 case '2': 240 case '3': 241 case '4': 242 case '5': 243 case '6': 244 case '7': 245 case '8': 246 case '9': 247 case '.': // a number 248 { 249 return readNumber(); 250 } 251 252 case '"': // quoted string 253 { 254 return readQuotedString(); 255 } 256 257 case '{': // list of registers 258 { 259 return readRegList(); 260 } 261 262 case '\'': // quotation for overloading reserved words 263 return readQuotedReservedWord(); 264 265 default: { 266 // read up until a separatorcharacter 267 boolean only_name = false; 268 269 charBuf.reset(); 270 do { 271 char chval = (char) nextChar; 272 if (nextChar == '\\') { 273 chval = nameEscape(); 274 only_name = true; 275 } 276 charBuf.add(chval); 277 readNextChar(); 278 } while (!isSeparator(nextChar)); 279 280 String str = charBuf.toString(); 281 282 if (!only_name) { 283 token tok; 284 285 // keyword or directive? 286 if ((tok = ReservedWords.get(str)) != null) return tok; 287 288 // VM instruction? 289 if (DopInfo.contains(str)) 290 return new str_token(sym.Insn, str); 291 292 if (str.charAt(0) == '$') { 293 String s = str.substring(1); 294 Object v; 295 int n = 10; 296 boolean neg = false; 297 switch (s.charAt(0)) { 298 default: 299 break; 300 301 case '-': 302 neg = true; 303 case '+': 304 s = s.substring(1); 305 if (s.startsWith("0x")) { 306 n = 16; 307 s = s.substring(2); 308 } 309 try { 310 n = Integer.parseInt(s, n); 311 } catch (NumberFormatException e) { 312 throw new DasmError( 313 "Bad relative offset number"); 314 } 315 if (neg) n = -n; 316 return new relative_number_token(sym.Relative, n); 317 } 318 // Do variable substitution 319 if ((v = dict.get(s)) != null) return (token) v; 320 } // not begin from '$' 321 } // !only_name 322 // Unrecognized string token (e.g. a classname) 323 return new str_token(sym.Word, str); 324 } 325 } 326 } 327 328 /** 329 * Reads "-quoted string 330 */ readQuotedString()331 protected token readQuotedString() throws IOException, DasmError { 332 boolean f = false; 333 charBuf.reset(); 334 for (;;) { 335 if (f) 336 f = false; 337 else 338 readNextChar(); 339 340 if (nextChar == '"') { 341 readNextChar(); // skip closing quote 342 return new str_token(sym.Str, charBuf.toString()); 343 } 344 345 if (nextChar == -1) throw new DasmError("Unterminated string"); 346 347 char chval = (char) nextChar; 348 349 if (chval == '\\') { 350 readNextChar(); 351 switch (nextChar) { 352 case -1: 353 f = true; 354 continue; 355 case 'n': 356 chval = '\n'; 357 break; 358 case 'r': 359 chval = '\r'; 360 break; 361 case 't': 362 chval = '\t'; 363 break; 364 case 'f': 365 chval = '\f'; 366 break; 367 case 'b': 368 chval = '\b'; 369 break; 370 case '"': 371 chval = '"'; 372 break; 373 case '\'': 374 chval = '\''; 375 break; 376 case '\\': 377 chval = '\\'; 378 break; 379 380 case 'u': 381 chval = readUnicodeChar(); 382 if (nextChar == -1) { 383 f = true; 384 continue; 385 } 386 break; 387 388 // octals 389 case '0': 390 case '1': 391 case '2': 392 case '3': 393 case '4': 394 case '5': 395 case '6': 396 case '7': { 397 int res = nextChar & 7; 398 readNextChar(); 399 if (nextChar < '0' || nextChar > '7') 400 f = true; 401 else { 402 res = res * 8 + (nextChar & 7); 403 readNextChar(); 404 if (nextChar < '0' || nextChar > '7') 405 f = true; 406 else { 407 int val = res * 8 + (nextChar & 7); 408 if (val >= 0x100) 409 f = true; 410 else 411 res = val; 412 } 413 } 414 chval = (char) res; 415 } 416 break; 417 418 default: 419 throw new DasmError("Incorrect backslash escape sequence"); 420 } 421 } 422 charBuf.add(chval); 423 } 424 } 425 426 /** 427 * Reads list of registers ({v1, v2, v3} or {v1..v3}) 428 */ readRegList()429 protected token readRegList() throws IOException, DasmError { 430 charBuf.reset(); 431 for (;;) { 432 readNextChar(); 433 434 if (nextChar == '}') { 435 readNextChar(); // skip closing quote 436 return new str_token(sym.Word, charBuf.toString()); 437 } 438 439 if (nextChar == -1) 440 throw new DasmError("Unterminated list of registers"); 441 442 443 charBuf.add((char) nextChar); 444 } 445 } 446 447 /** 448 * Reads number 449 */ readNumber()450 protected token readNumber() throws IOException, DasmError { 451 charBuf.reset(); 452 453 do { 454 charBuf.add((char) nextChar); 455 readNextChar(); 456 } while (!isSeparator(nextChar)); 457 458 String str = charBuf.toString(); 459 token tok; 460 461 // directive? 462 if ((tok = ReservedWords.get(str)) != null) return tok; 463 464 Number num; 465 try { 466 num = Utils.stringToNumber(str); 467 } catch (NumberFormatException e) { 468 if (charBuf.charAt(0) != '.') // directive? 469 throw new DasmError("Bad number format"); 470 throw new DasmError("Unknown directive or bad number format"); 471 } 472 473 if (num instanceof Integer) { 474 return new int_token(sym.Int, num.intValue()); 475 } 476 477 return new number_token(sym.Num, num); 478 } 479 480 /** 481 * Reads ''-quoted overloaded reserved words 482 */ readQuotedReservedWord()483 protected token readQuotedReservedWord() throws IOException, DasmError { 484 charBuf.reset(); 485 for (;;) { 486 readNextChar(); 487 if (isSeparator(nextChar)) 488 throw new DasmError("Unterminated ''-enclosed name"); 489 if (nextChar == '\'') { 490 if (charBuf.size() == 0) 491 throw new DasmError("Empty ''-enclosed name"); 492 readNextChar(); // skip close quote 493 if (!isSeparator(nextChar)) 494 throw new DasmError( 495 "Missed separator after ''-enclosed name"); 496 return new str_token(sym.Word, charBuf.toString()); 497 } 498 char chval = (char) nextChar; 499 if (nextChar == '\\') chval = nameEscape(); 500 charBuf.add(chval); 501 } 502 } 503 }; 504