1// Copyright 2019 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15import { Token, TokenType } from "./token.js"; 16 17export default class Lexer { 18 /** 19 * @param {String} input The input string to tokenize. 20 */ 21 constructor(input) { 22 this.input_ = input; 23 this.len_ = input.length; 24 this.cur_pos_ = 0; 25 this.cur_line_ = 1; 26 27 this.num_regex_ = /^[0-9]+$/; 28 this.alpha_regex_ = /^[a-zA-Z_]+$/; 29 this.op_regex_ = /^Op[A-Z][^\s]*$/; 30 this.hex_regex_ = /^[0-9a-fA-F]$/; 31 } 32 33 /** 34 * Parses the next token from the input stream. 35 * @return {Token} the next token. 36 */ 37 next() { 38 this.skipWhitespace(); 39 this.skipComments(); 40 41 if (this.cur_pos_ >= this.len_) 42 return new Token(TokenType.kEOF, this.cur_line_); 43 44 let n = this.tryHexInteger(); 45 if (n !== undefined) 46 return n; 47 48 n = this.tryFloat(); 49 if (n !== undefined) 50 return n; 51 52 n = this.tryInteger(); 53 if (n !== undefined) 54 return n; 55 56 n = this.tryString(); 57 if (n !== undefined) 58 return n; 59 60 n = this.tryOp(); 61 if (n !== undefined) 62 return n; 63 64 n = this.tryPunctuation(); 65 if (n !== undefined) 66 return n; 67 68 n = this.tryResultId(); 69 if (n !== undefined) 70 return n; 71 72 n = this.tryIdent(); 73 if (n !== undefined) 74 return n; 75 76 return new Token(TokenType.kError, this.cur_line_, "Failed to match token"); 77 } 78 79 is(str) { 80 if (this.len_ <= this.cur_pos_ + (str.length - 1)) 81 return false; 82 83 for (let i = 0; i < str.length; ++i) { 84 if (this.input_[this.cur_pos_ + i] !== str[i]) 85 return false; 86 } 87 88 return true; 89 } 90 91 isNum(ch) { 92 return ch.match(this.num_regex_); 93 } 94 95 isAlpha(ch) { 96 return ch.match(this.alpha_regex_); 97 } 98 99 isAlphaNum(ch) { 100 return this.isNum(ch) || this.isAlpha(ch); 101 } 102 103 isHex(char) { 104 return char.match(this.hex_regex_); 105 } 106 107 isCurWhitespace() { 108 return this.is(" ") || this.is("\t") || this.is("\r") || this.is("\n"); 109 } 110 111 skipWhitespace() { 112 for(;;) { 113 let cur_pos = this.cur_pos_; 114 while (this.cur_pos_ < this.len_ && 115 this.isCurWhitespace()) { 116 if (this.is("\n")) 117 this.cur_line_ += 1; 118 119 this.cur_pos_ += 1; 120 } 121 122 this.skipComments(); 123 124 // Cursor didn't move so no whitespace matched. 125 if (cur_pos === this.cur_pos_) 126 break; 127 } 128 } 129 130 skipComments() { 131 if (!this.is(";")) 132 return; 133 134 while (this.cur_pos_ < this.len_ && !this.is("\n")) 135 this.cur_pos_ += 1; 136 } 137 138 /** 139 * Attempt to parse the next part of the input as a float. 140 * @return {Token|undefined} returns a Token if a float is matched, 141 * undefined otherwise. 142 */ 143 tryFloat() { 144 let start = this.cur_pos_; 145 let end = start; 146 147 if (this.cur_pos_ >= this.len_) 148 return undefined; 149 if (this.input_[end] === "-") 150 end += 1; 151 152 while (end < this.len_ && this.isNum(this.input_[end])) 153 end += 1; 154 155 // Must have a "." in a float 156 if (end >= this.len_ || this.input_[end] !== ".") 157 return undefined; 158 159 end += 1; 160 while (end < this.len_ && this.isNum(this.input_[end])) 161 end += 1; 162 163 let substr = this.input_.substr(start, end - start); 164 if (substr === "." || substr === "-.") 165 return undefined; 166 167 this.cur_pos_ = end; 168 169 return new Token(TokenType.kFloatLiteral, this.cur_line_, parseFloat(substr)); 170 } 171 172 /** 173 * Attempt to parse a hex encoded integer. 174 * @return {Token|undefined} returns a Token if a Hex number is matched, 175 * undefined otherwise. 176 */ 177 tryHexInteger() { 178 let start = this.cur_pos_; 179 let end = start; 180 181 if (this.cur_pos_ >= this.len_) 182 return undefined; 183 if (end + 2 >= this.len_ || this.input_[end] !== "0" || 184 this.input_[end + 1] !== "x") { 185 return undefined; 186 } 187 188 end += 2; 189 190 while (end < this.len_ && this.isHex(this.input_[end])) 191 end += 1; 192 193 this.cur_pos_ = end; 194 195 let val = parseInt(this.input_.substr(start, end - start), 16); 196 return new Token(TokenType.kIntegerLiteral, this.cur_line_, val); 197 } 198 199 /** 200 * Attempt to parse an encoded integer. 201 * @return {Token|undefined} returns a Token if a number is matched, 202 * undefined otherwise. 203 */ 204 tryInteger() { 205 let start = this.cur_pos_; 206 let end = start; 207 208 if (this.cur_pos_ >= this.len_) 209 return undefined; 210 if (this.input_[end] === "-") 211 end += 1; 212 213 if (end >= this.len_ || !this.isNum(this.input_[end])) 214 return undefined; 215 216 while (end < this.len_ && this.isNum(this.input_[end])) 217 end += 1; 218 219 this.cur_pos_ = end; 220 221 let val = parseInt(this.input_.substr(start, end - start), 10); 222 return new Token(TokenType.kIntegerLiteral, this.cur_line_, val); 223 } 224 225 /** 226 * Attempt to parse a result id. 227 * @return {Token|undefined} returns a Token if a result id is matched, 228 * undefined otherwise. 229 */ 230 tryResultId() { 231 let start = this.cur_pos_; 232 if (start >= this.len_) 233 return undefined; 234 if (!this.is("%")) 235 return undefined; 236 237 start += 1; 238 this.cur_pos_ += 1; 239 while (this.cur_pos_ < this.len_ && 240 (this.isAlphaNum(this.input_[this.cur_pos_]) || this.is("_"))) { 241 this.cur_pos_ += 1; 242 } 243 244 let ident = this.input_.substr(start, this.cur_pos_ - start); 245 let value = undefined; 246 if (ident.match(this.num_regex_)) 247 value = parseInt(ident, 10); 248 249 return new Token(TokenType.kResultId, this.cur_line_, { 250 name: ident, 251 val: value 252 }); 253 } 254 255 /** 256 * Attempt to parse an identifier. 257 * @return {Token|undefined} returns a Token if an identifier is matched, 258 * undefined otherwise. 259 */ 260 tryIdent() { 261 let start = this.cur_pos_; 262 if (start >= this.len_) 263 return undefined; 264 265 while (this.cur_pos_ < this.len_ && 266 (this.isAlphaNum(this.input_[this.cur_pos_]) || this.is("_"))) { 267 this.cur_pos_ += 1; 268 } 269 270 let ident = this.input_.substr(start, this.cur_pos_ - start); 271 return new Token(TokenType.kIdentifier, this.cur_line_, ident); 272 } 273 274 /** 275 * Attempt to parse an Op command. 276 * @return {Token|undefined} returns a Token if an Op command is matched, 277 * undefined otherwise. 278 */ 279 tryOp() { 280 let start = this.cur_pos_; 281 if (this.cur_pos_ >= this.len_ || (this.cur_pos_ + 1 >= this.len_)) 282 return undefined; 283 284 if (this.input_[this.cur_pos_] !== "O" || 285 this.input_[this.cur_pos_ + 1] !== "p") { 286 return undefined; 287 } 288 289 while (this.cur_pos_ < this.len_ && 290 !this.isCurWhitespace()) { 291 this.cur_pos_ += 1; 292 } 293 294 return new Token(TokenType.kOp, this.cur_line_, { 295 name: this.input_.substr(start, this.cur_pos_ - start) 296 }); 297 } 298 299 /** 300 * Attempts to match punctuation strings against the input 301 * @return {Token|undefined} Returns the Token for the punctuation or 302 * undefined if no matches found. 303 */ 304 tryPunctuation() { 305 let type = undefined; 306 if (this.is("=")) 307 type = TokenType.kEqual; 308 else if (this.is("|")) 309 type = TokenType.kPipe; 310 311 if (type === undefined) 312 return undefined; 313 314 this.cur_pos_ += type.length; 315 return new Token(type, this.cur_line_, type); 316 } 317 318 /** 319 * Attempts to match strings against the input 320 * @return {Token|undefined} Returns the Token for the string or undefined 321 * if no match found. 322 */ 323 tryString() { 324 let start = this.cur_pos_; 325 326 // Must have at least 2 chars for a string. 327 if (this.cur_pos_ >= this.len_ || (this.cur_pos_ + 1 >= this.len_)) 328 return undefined; 329 if (!this.is("\"")) 330 return undefined; 331 332 this.cur_pos_ += 1; 333 let str = ""; 334 while (this.cur_pos_ <= this.len_) { 335 if (this.is("\"")) 336 break; 337 338 if (this.is("\\")) { 339 this.cur_pos_ += 1; 340 if (this.cur_pos_ >= this.len_) 341 return undefined; 342 343 if (this.is("\\")) { 344 str += "\\"; 345 } else if (this.is("\"")) { 346 str += '"'; 347 } else { 348 str += this.input_[this.cur_pos_]; 349 } 350 } else { 351 str += this.input_[this.cur_pos_]; 352 } 353 this.cur_pos_ += 1; 354 } 355 356 if (this.cur_pos_ >= this.len_) 357 return undefined; 358 359 this.cur_pos_ += 1; 360 361 return new Token(TokenType.kStringLiteral, this.cur_line_, str); 362 } 363} 364