1// Copyright 2019 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15import { Token, TokenType } from "./token.js";
16
17export default class Lexer {
18  /**
19   * @param {String} input The input string to tokenize.
20   */
21  constructor(input) {
22    this.input_ = input;
23    this.len_ = input.length;
24    this.cur_pos_ = 0;
25    this.cur_line_ = 1;
26
27    this.num_regex_ = /^[0-9]+$/;
28    this.alpha_regex_ = /^[a-zA-Z_]+$/;
29    this.op_regex_ = /^Op[A-Z][^\s]*$/;
30    this.hex_regex_ = /^[0-9a-fA-F]$/;
31  }
32
33  /**
34   * Parses the next token from the input stream.
35   * @return {Token} the next token.
36   */
37  next() {
38    this.skipWhitespace();
39    this.skipComments();
40
41    if (this.cur_pos_ >= this.len_)
42      return new Token(TokenType.kEOF, this.cur_line_);
43
44    let n = this.tryHexInteger();
45    if (n !== undefined)
46      return n;
47
48    n = this.tryFloat();
49    if (n !== undefined)
50      return n;
51
52    n = this.tryInteger();
53    if (n !== undefined)
54      return n;
55
56    n = this.tryString();
57    if (n !== undefined)
58      return n;
59
60    n = this.tryOp();
61    if (n !== undefined)
62      return n;
63
64    n = this.tryPunctuation();
65    if (n !== undefined)
66      return n;
67
68    n = this.tryResultId();
69    if (n !== undefined)
70      return n;
71
72    n = this.tryIdent();
73    if (n !== undefined)
74      return n;
75
76    return new Token(TokenType.kError, this.cur_line_, "Failed to match token");
77  }
78
79  is(str) {
80    if (this.len_ <= this.cur_pos_ + (str.length - 1))
81      return false;
82
83    for (let i = 0; i < str.length; ++i) {
84      if (this.input_[this.cur_pos_ + i] !== str[i])
85        return false;
86    }
87
88    return true;
89  }
90
91  isNum(ch) {
92    return ch.match(this.num_regex_);
93  }
94
95  isAlpha(ch) {
96    return ch.match(this.alpha_regex_);
97  }
98
99  isAlphaNum(ch) {
100    return this.isNum(ch) || this.isAlpha(ch);
101  }
102
103  isHex(char) {
104    return char.match(this.hex_regex_);
105  }
106
107  isCurWhitespace() {
108    return this.is(" ") || this.is("\t") || this.is("\r") || this.is("\n");
109  }
110
111  skipWhitespace() {
112    for(;;) {
113      let cur_pos = this.cur_pos_;
114      while (this.cur_pos_ < this.len_ &&
115          this.isCurWhitespace()) {
116        if (this.is("\n"))
117          this.cur_line_ += 1;
118
119        this.cur_pos_ += 1;
120      }
121
122      this.skipComments();
123
124      // Cursor didn't move so no whitespace matched.
125      if (cur_pos === this.cur_pos_)
126        break;
127    }
128  }
129
130  skipComments() {
131    if (!this.is(";"))
132      return;
133
134    while (this.cur_pos_ < this.len_ && !this.is("\n"))
135      this.cur_pos_ += 1;
136  }
137
138  /**
139   * Attempt to parse the next part of the input as a float.
140   * @return {Token|undefined} returns a Token if a float is matched,
141   *                           undefined otherwise.
142   */
143  tryFloat() {
144    let start = this.cur_pos_;
145    let end = start;
146
147    if (this.cur_pos_ >= this.len_)
148      return undefined;
149    if (this.input_[end] === "-")
150      end += 1;
151
152    while (end < this.len_ && this.isNum(this.input_[end]))
153      end += 1;
154
155    // Must have a "." in a float
156    if (end >= this.len_ || this.input_[end] !== ".")
157      return undefined;
158
159    end += 1;
160    while (end < this.len_ && this.isNum(this.input_[end]))
161      end += 1;
162
163    let substr = this.input_.substr(start, end - start);
164    if (substr === "." || substr === "-.")
165      return undefined;
166
167    this.cur_pos_ = end;
168
169    return new Token(TokenType.kFloatLiteral, this.cur_line_, parseFloat(substr));
170  }
171
172  /**
173   * Attempt to parse a hex encoded integer.
174   * @return {Token|undefined} returns a Token if a Hex number is matched,
175   *                           undefined otherwise.
176   */
177  tryHexInteger() {
178    let start = this.cur_pos_;
179    let end = start;
180
181    if (this.cur_pos_ >= this.len_)
182      return undefined;
183    if (end + 2 >= this.len_ || this.input_[end] !== "0" ||
184        this.input_[end + 1] !== "x") {
185      return undefined;
186    }
187
188    end += 2;
189
190    while (end < this.len_ && this.isHex(this.input_[end]))
191      end += 1;
192
193    this.cur_pos_ = end;
194
195    let val = parseInt(this.input_.substr(start, end - start), 16);
196    return new Token(TokenType.kIntegerLiteral, this.cur_line_, val);
197  }
198
199  /**
200   * Attempt to parse an encoded integer.
201   * @return {Token|undefined} returns a Token if a number is matched,
202   *                           undefined otherwise.
203   */
204  tryInteger() {
205    let start = this.cur_pos_;
206    let end = start;
207
208    if (this.cur_pos_ >= this.len_)
209      return undefined;
210    if (this.input_[end] === "-")
211      end += 1;
212
213    if (end >= this.len_ || !this.isNum(this.input_[end]))
214      return undefined;
215
216    while (end < this.len_ && this.isNum(this.input_[end]))
217      end += 1;
218
219    this.cur_pos_ = end;
220
221    let val = parseInt(this.input_.substr(start, end - start), 10);
222    return new Token(TokenType.kIntegerLiteral, this.cur_line_, val);
223  }
224
225  /**
226   * Attempt to parse a result id.
227   * @return {Token|undefined} returns a Token if a result id is matched,
228   *                           undefined otherwise.
229   */
230  tryResultId() {
231    let start = this.cur_pos_;
232    if (start >= this.len_)
233      return undefined;
234    if (!this.is("%"))
235      return undefined;
236
237    start += 1;
238    this.cur_pos_ += 1;
239    while (this.cur_pos_ < this.len_ &&
240        (this.isAlphaNum(this.input_[this.cur_pos_]) || this.is("_"))) {
241      this.cur_pos_ += 1;
242    }
243
244    let ident = this.input_.substr(start, this.cur_pos_ - start);
245    let value = undefined;
246    if (ident.match(this.num_regex_))
247      value = parseInt(ident, 10);
248
249    return new Token(TokenType.kResultId, this.cur_line_, {
250      name: ident,
251      val: value
252    });
253  }
254
255  /**
256   * Attempt to parse an identifier.
257   * @return {Token|undefined} returns a Token if an identifier is matched,
258   *                           undefined otherwise.
259   */
260  tryIdent() {
261    let start = this.cur_pos_;
262    if (start >= this.len_)
263      return undefined;
264
265    while (this.cur_pos_ < this.len_ &&
266        (this.isAlphaNum(this.input_[this.cur_pos_]) || this.is("_"))) {
267      this.cur_pos_ += 1;
268    }
269
270    let ident = this.input_.substr(start, this.cur_pos_ - start);
271    return new Token(TokenType.kIdentifier, this.cur_line_, ident);
272  }
273
274  /**
275   * Attempt to parse an Op command.
276   * @return {Token|undefined} returns a Token if an Op command is matched,
277   *                           undefined otherwise.
278   */
279  tryOp() {
280    let start = this.cur_pos_;
281    if (this.cur_pos_ >= this.len_ || (this.cur_pos_ + 1 >= this.len_))
282      return undefined;
283
284    if (this.input_[this.cur_pos_] !== "O" ||
285        this.input_[this.cur_pos_ + 1] !== "p") {
286      return undefined;
287    }
288
289    while (this.cur_pos_ < this.len_ &&
290        !this.isCurWhitespace()) {
291      this.cur_pos_ += 1;
292    }
293
294    return new Token(TokenType.kOp, this.cur_line_, {
295      name: this.input_.substr(start, this.cur_pos_ - start)
296    });
297  }
298
299  /**
300   * Attempts to match punctuation strings against the input
301   * @return {Token|undefined} Returns the Token for the punctuation or
302   *                           undefined if no matches found.
303   */
304  tryPunctuation() {
305    let type = undefined;
306    if (this.is("="))
307      type = TokenType.kEqual;
308    else if (this.is("|"))
309      type = TokenType.kPipe;
310
311    if (type === undefined)
312      return undefined;
313
314    this.cur_pos_ += type.length;
315    return new Token(type, this.cur_line_, type);
316  }
317
318  /**
319   * Attempts to match strings against the input
320   * @return {Token|undefined} Returns the Token for the string or undefined
321   *                           if no match found.
322   */
323  tryString() {
324    let start = this.cur_pos_;
325
326    // Must have at least 2 chars for a string.
327    if (this.cur_pos_ >= this.len_ || (this.cur_pos_ + 1 >= this.len_))
328      return undefined;
329    if (!this.is("\""))
330      return undefined;
331
332    this.cur_pos_ += 1;
333    let str = "";
334    while (this.cur_pos_ <= this.len_) {
335      if (this.is("\""))
336        break;
337
338      if (this.is("\\")) {
339        this.cur_pos_ += 1;
340        if (this.cur_pos_ >= this.len_)
341          return undefined;
342
343        if (this.is("\\")) {
344          str += "\\";
345        } else if (this.is("\"")) {
346          str += '"';
347        } else {
348          str += this.input_[this.cur_pos_];
349        }
350      } else {
351        str += this.input_[this.cur_pos_];
352      }
353      this.cur_pos_ += 1;
354    }
355
356    if (this.cur_pos_ >= this.len_)
357      return undefined;
358
359    this.cur_pos_ += 1;
360
361    return new Token(TokenType.kStringLiteral, this.cur_line_, str);
362  }
363}
364