// Copyright 2019 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. import { Token, TokenType } from "./token.js"; export default class Lexer { /** * @param {String} input The input string to tokenize. */ constructor(input) { this.input_ = input; this.len_ = input.length; this.cur_pos_ = 0; this.cur_line_ = 1; this.num_regex_ = /^[0-9]+$/; this.alpha_regex_ = /^[a-zA-Z_]+$/; this.op_regex_ = /^Op[A-Z][^\s]*$/; this.hex_regex_ = /^[0-9a-fA-F]$/; } /** * Parses the next token from the input stream. * @return {Token} the next token. */ next() { this.skipWhitespace(); this.skipComments(); if (this.cur_pos_ >= this.len_) return new Token(TokenType.kEOF, this.cur_line_); let n = this.tryHexInteger(); if (n !== undefined) return n; n = this.tryFloat(); if (n !== undefined) return n; n = this.tryInteger(); if (n !== undefined) return n; n = this.tryString(); if (n !== undefined) return n; n = this.tryOp(); if (n !== undefined) return n; n = this.tryPunctuation(); if (n !== undefined) return n; n = this.tryResultId(); if (n !== undefined) return n; n = this.tryIdent(); if (n !== undefined) return n; return new Token(TokenType.kError, this.cur_line_, "Failed to match token"); } is(str) { if (this.len_ <= this.cur_pos_ + (str.length - 1)) return false; for (let i = 0; i < str.length; ++i) { if (this.input_[this.cur_pos_ + i] !== str[i]) return false; } return true; } isNum(ch) { return ch.match(this.num_regex_); } isAlpha(ch) { return ch.match(this.alpha_regex_); } isAlphaNum(ch) { return this.isNum(ch) || this.isAlpha(ch); } isHex(char) { return char.match(this.hex_regex_); } isCurWhitespace() { return this.is(" ") || this.is("\t") || this.is("\r") || this.is("\n"); } skipWhitespace() { for(;;) { let cur_pos = this.cur_pos_; while (this.cur_pos_ < this.len_ && this.isCurWhitespace()) { if (this.is("\n")) this.cur_line_ += 1; this.cur_pos_ += 1; } this.skipComments(); // Cursor didn't move so no whitespace matched. if (cur_pos === this.cur_pos_) break; } } skipComments() { if (!this.is(";")) return; while (this.cur_pos_ < this.len_ && !this.is("\n")) this.cur_pos_ += 1; } /** * Attempt to parse the next part of the input as a float. * @return {Token|undefined} returns a Token if a float is matched, * undefined otherwise. */ tryFloat() { let start = this.cur_pos_; let end = start; if (this.cur_pos_ >= this.len_) return undefined; if (this.input_[end] === "-") end += 1; while (end < this.len_ && this.isNum(this.input_[end])) end += 1; // Must have a "." in a float if (end >= this.len_ || this.input_[end] !== ".") return undefined; end += 1; while (end < this.len_ && this.isNum(this.input_[end])) end += 1; let substr = this.input_.substr(start, end - start); if (substr === "." || substr === "-.") return undefined; this.cur_pos_ = end; return new Token(TokenType.kFloatLiteral, this.cur_line_, parseFloat(substr)); } /** * Attempt to parse a hex encoded integer. * @return {Token|undefined} returns a Token if a Hex number is matched, * undefined otherwise. */ tryHexInteger() { let start = this.cur_pos_; let end = start; if (this.cur_pos_ >= this.len_) return undefined; if (end + 2 >= this.len_ || this.input_[end] !== "0" || this.input_[end + 1] !== "x") { return undefined; } end += 2; while (end < this.len_ && this.isHex(this.input_[end])) end += 1; this.cur_pos_ = end; let val = parseInt(this.input_.substr(start, end - start), 16); return new Token(TokenType.kIntegerLiteral, this.cur_line_, val); } /** * Attempt to parse an encoded integer. * @return {Token|undefined} returns a Token if a number is matched, * undefined otherwise. */ tryInteger() { let start = this.cur_pos_; let end = start; if (this.cur_pos_ >= this.len_) return undefined; if (this.input_[end] === "-") end += 1; if (end >= this.len_ || !this.isNum(this.input_[end])) return undefined; while (end < this.len_ && this.isNum(this.input_[end])) end += 1; this.cur_pos_ = end; let val = parseInt(this.input_.substr(start, end - start), 10); return new Token(TokenType.kIntegerLiteral, this.cur_line_, val); } /** * Attempt to parse a result id. * @return {Token|undefined} returns a Token if a result id is matched, * undefined otherwise. */ tryResultId() { let start = this.cur_pos_; if (start >= this.len_) return undefined; if (!this.is("%")) return undefined; start += 1; this.cur_pos_ += 1; while (this.cur_pos_ < this.len_ && (this.isAlphaNum(this.input_[this.cur_pos_]) || this.is("_"))) { this.cur_pos_ += 1; } let ident = this.input_.substr(start, this.cur_pos_ - start); let value = undefined; if (ident.match(this.num_regex_)) value = parseInt(ident, 10); return new Token(TokenType.kResultId, this.cur_line_, { name: ident, val: value }); } /** * Attempt to parse an identifier. * @return {Token|undefined} returns a Token if an identifier is matched, * undefined otherwise. */ tryIdent() { let start = this.cur_pos_; if (start >= this.len_) return undefined; while (this.cur_pos_ < this.len_ && (this.isAlphaNum(this.input_[this.cur_pos_]) || this.is("_"))) { this.cur_pos_ += 1; } let ident = this.input_.substr(start, this.cur_pos_ - start); return new Token(TokenType.kIdentifier, this.cur_line_, ident); } /** * Attempt to parse an Op command. * @return {Token|undefined} returns a Token if an Op command is matched, * undefined otherwise. */ tryOp() { let start = this.cur_pos_; if (this.cur_pos_ >= this.len_ || (this.cur_pos_ + 1 >= this.len_)) return undefined; if (this.input_[this.cur_pos_] !== "O" || this.input_[this.cur_pos_ + 1] !== "p") { return undefined; } while (this.cur_pos_ < this.len_ && !this.isCurWhitespace()) { this.cur_pos_ += 1; } return new Token(TokenType.kOp, this.cur_line_, { name: this.input_.substr(start, this.cur_pos_ - start) }); } /** * Attempts to match punctuation strings against the input * @return {Token|undefined} Returns the Token for the punctuation or * undefined if no matches found. */ tryPunctuation() { let type = undefined; if (this.is("=")) type = TokenType.kEqual; else if (this.is("|")) type = TokenType.kPipe; if (type === undefined) return undefined; this.cur_pos_ += type.length; return new Token(type, this.cur_line_, type); } /** * Attempts to match strings against the input * @return {Token|undefined} Returns the Token for the string or undefined * if no match found. */ tryString() { let start = this.cur_pos_; // Must have at least 2 chars for a string. if (this.cur_pos_ >= this.len_ || (this.cur_pos_ + 1 >= this.len_)) return undefined; if (!this.is("\"")) return undefined; this.cur_pos_ += 1; let str = ""; while (this.cur_pos_ <= this.len_) { if (this.is("\"")) break; if (this.is("\\")) { this.cur_pos_ += 1; if (this.cur_pos_ >= this.len_) return undefined; if (this.is("\\")) { str += "\\"; } else if (this.is("\"")) { str += '"'; } else { str += this.input_[this.cur_pos_]; } } else { str += this.input_[this.cur_pos_]; } this.cur_pos_ += 1; } if (this.cur_pos_ >= this.len_) return undefined; this.cur_pos_ += 1; return new Token(TokenType.kStringLiteral, this.cur_line_, str); } }