1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_PARSING_TOKEN_H_
6 #define V8_PARSING_TOKEN_H_
7 
8 #include "src/base/logging.h"
9 #include "src/globals.h"
10 
11 namespace v8 {
12 namespace internal {
13 
14 // TOKEN_LIST takes a list of 3 macros M, all of which satisfy the
15 // same signature M(name, string, precedence), where name is the
16 // symbolic token name, string is the corresponding syntactic symbol
17 // (or nullptr, for literals), and precedence is the precedence (or 0).
18 // The parameters are invoked for token categories as follows:
19 //
20 //   T: Non-keyword tokens
21 //   K: Keyword tokens
22 //   C: Contextual keyword token
23 //
24 // Contextual keyword tokens are tokens that are scanned as Token::IDENTIFIER,
25 // but that in some contexts are treated as keywords. This mostly happens
26 // when ECMAScript introduces new keywords, but for backwards compatibility
27 // allows them to still be used as indentifiers in most contexts.
28 
29 // IGNORE_TOKEN is a convenience macro that can be supplied as
30 // an argument (at any position) for a TOKEN_LIST call. It does
31 // nothing with tokens belonging to the respective category.
32 
33 #define IGNORE_TOKEN(name, string, precedence)
34 
35 #define TOKEN_LIST(T, K, C)                                        \
36   /* End of source indicator. */                                   \
37   T(EOS, "EOS", 0)                                                 \
38                                                                    \
39   /* Punctuators (ECMA-262, section 7.7, page 15). */              \
40   T(LPAREN, "(", 0)                                                \
41   T(RPAREN, ")", 0)                                                \
42   T(LBRACK, "[", 0)                                                \
43   T(RBRACK, "]", 0)                                                \
44   T(LBRACE, "{", 0)                                                \
45   T(RBRACE, "}", 0)                                                \
46   T(COLON, ":", 0)                                                 \
47   T(SEMICOLON, ";", 0)                                             \
48   T(PERIOD, ".", 0)                                                \
49   T(ELLIPSIS, "...", 0)                                            \
50   T(CONDITIONAL, "?", 3)                                           \
51   T(INC, "++", 0)                                                  \
52   T(DEC, "--", 0)                                                  \
53   T(ARROW, "=>", 0)                                                \
54                                                                    \
55   /* Assignment operators. */                                      \
56   /* IsAssignmentOp() relies on this block of enum values being */ \
57   /* contiguous and sorted in the same order! */                   \
58   T(INIT, "=init", 2) /* AST-use only. */                          \
59   T(ASSIGN, "=", 2)                                                \
60   T(ASSIGN_BIT_OR, "|=", 2)                                        \
61   T(ASSIGN_BIT_XOR, "^=", 2)                                       \
62   T(ASSIGN_BIT_AND, "&=", 2)                                       \
63   T(ASSIGN_SHL, "<<=", 2)                                          \
64   T(ASSIGN_SAR, ">>=", 2)                                          \
65   T(ASSIGN_SHR, ">>>=", 2)                                         \
66   T(ASSIGN_ADD, "+=", 2)                                           \
67   T(ASSIGN_SUB, "-=", 2)                                           \
68   T(ASSIGN_MUL, "*=", 2)                                           \
69   T(ASSIGN_DIV, "/=", 2)                                           \
70   T(ASSIGN_MOD, "%=", 2)                                           \
71   T(ASSIGN_EXP, "**=", 2)                                          \
72                                                                    \
73   /* Binary operators sorted by precedence. */                     \
74   /* IsBinaryOp() relies on this block of enum values */           \
75   /* being contiguous and sorted in the same order! */             \
76   T(COMMA, ",", 1)                                                 \
77   T(OR, "||", 4)                                                   \
78   T(AND, "&&", 5)                                                  \
79   T(BIT_OR, "|", 6)                                                \
80   T(BIT_XOR, "^", 7)                                               \
81   T(BIT_AND, "&", 8)                                               \
82   T(SHL, "<<", 11)                                                 \
83   T(SAR, ">>", 11)                                                 \
84   T(SHR, ">>>", 11)                                                \
85   T(ADD, "+", 12)                                                  \
86   T(SUB, "-", 12)                                                  \
87   T(MUL, "*", 13)                                                  \
88   T(DIV, "/", 13)                                                  \
89   T(MOD, "%", 13)                                                  \
90   T(EXP, "**", 14)                                                 \
91                                                                    \
92   /* Compare operators sorted by precedence. */                    \
93   /* IsCompareOp() relies on this block of enum values */          \
94   /* being contiguous and sorted in the same order! */             \
95   T(EQ, "==", 9)                                                   \
96   T(NE, "!=", 9)                                                   \
97   T(EQ_STRICT, "===", 9)                                           \
98   T(NE_STRICT, "!==", 9)                                           \
99   T(LT, "<", 10)                                                   \
100   T(GT, ">", 10)                                                   \
101   T(LTE, "<=", 10)                                                 \
102   T(GTE, ">=", 10)                                                 \
103   K(INSTANCEOF, "instanceof", 10)                                  \
104   K(IN, "in", 10)                                                  \
105                                                                    \
106   /* Unary operators. */                                           \
107   /* IsUnaryOp() relies on this block of enum values */            \
108   /* being contiguous and sorted in the same order! */             \
109   T(NOT, "!", 0)                                                   \
110   T(BIT_NOT, "~", 0)                                               \
111   K(DELETE, "delete", 0)                                           \
112   K(TYPEOF, "typeof", 0)                                           \
113   K(VOID, "void", 0)                                               \
114                                                                    \
115   /* Keywords (ECMA-262, section 7.5.2, page 13). */               \
116   K(BREAK, "break", 0)                                             \
117   K(CASE, "case", 0)                                               \
118   K(CATCH, "catch", 0)                                             \
119   K(CONTINUE, "continue", 0)                                       \
120   K(DEBUGGER, "debugger", 0)                                       \
121   K(DEFAULT, "default", 0)                                         \
122   /* DELETE */                                                     \
123   K(DO, "do", 0)                                                   \
124   K(ELSE, "else", 0)                                               \
125   K(FINALLY, "finally", 0)                                         \
126   K(FOR, "for", 0)                                                 \
127   K(FUNCTION, "function", 0)                                       \
128   K(IF, "if", 0)                                                   \
129   /* IN */                                                         \
130   /* INSTANCEOF */                                                 \
131   K(NEW, "new", 0)                                                 \
132   K(RETURN, "return", 0)                                           \
133   K(SWITCH, "switch", 0)                                           \
134   K(THIS, "this", 0)                                               \
135   K(THROW, "throw", 0)                                             \
136   K(TRY, "try", 0)                                                 \
137   /* TYPEOF */                                                     \
138   K(VAR, "var", 0)                                                 \
139   /* VOID */                                                       \
140   K(WHILE, "while", 0)                                             \
141   K(WITH, "with", 0)                                               \
142                                                                    \
143   /* Literals (ECMA-262, section 7.8, page 16). */                 \
144   K(NULL_LITERAL, "null", 0)                                       \
145   K(TRUE_LITERAL, "true", 0)                                       \
146   K(FALSE_LITERAL, "false", 0)                                     \
147   T(NUMBER, nullptr, 0)                                            \
148   T(SMI, nullptr, 0)                                               \
149   T(STRING, nullptr, 0)                                            \
150   T(BIGINT, nullptr, 0)                                            \
151                                                                    \
152   /* Identifiers (not keywords or future reserved words). */       \
153   T(IDENTIFIER, nullptr, 0)                                        \
154   T(PRIVATE_NAME, nullptr, 0)                                      \
155                                                                    \
156   /* Future reserved words (ECMA-262, section 7.6.1.2). */         \
157   T(FUTURE_STRICT_RESERVED_WORD, nullptr, 0)                       \
158   K(ASYNC, "async", 0)                                             \
159   /* `await` is a reserved word in module code only */             \
160   K(AWAIT, "await", 0)                                             \
161   K(CLASS, "class", 0)                                             \
162   K(CONST, "const", 0)                                             \
163   K(ENUM, "enum", 0)                                               \
164   K(EXPORT, "export", 0)                                           \
165   K(EXTENDS, "extends", 0)                                         \
166   K(IMPORT, "import", 0)                                           \
167   K(LET, "let", 0)                                                 \
168   K(STATIC, "static", 0)                                           \
169   K(YIELD, "yield", 0)                                             \
170   K(SUPER, "super", 0)                                             \
171                                                                    \
172   /* Illegal token - not able to scan. */                          \
173   T(ILLEGAL, "ILLEGAL", 0)                                         \
174   T(ESCAPED_KEYWORD, nullptr, 0)                                   \
175   T(ESCAPED_STRICT_RESERVED_WORD, nullptr, 0)                      \
176                                                                    \
177   /* Scanner-internal use only. */                                 \
178   T(WHITESPACE, nullptr, 0)                                        \
179   T(UNINITIALIZED, nullptr, 0)                                     \
180   T(REGEXP_LITERAL, nullptr, 0)                                    \
181                                                                    \
182   /* ES6 Template Literals */                                      \
183   T(TEMPLATE_SPAN, nullptr, 0)                                     \
184   T(TEMPLATE_TAIL, nullptr, 0)                                     \
185                                                                    \
186   /* Contextual keyword tokens */                                  \
187   C(GET, "get", 0)                                                 \
188   C(SET, "set", 0)                                                 \
189   C(OF, "of", 0)                                                   \
190   C(TARGET, "target", 0)                                           \
191   C(META, "meta", 0)                                               \
192   C(AS, "as", 0)                                                   \
193   C(FROM, "from", 0)                                               \
194   C(NAME, "name", 0)                                               \
195   C(PROTO_UNDERSCORED, "__proto__", 0)                             \
196   C(CONSTRUCTOR, "constructor", 0)                                 \
197   C(PRIVATE_CONSTRUCTOR, "#constructor", 0)                        \
198   C(PROTOTYPE, "prototype", 0)                                     \
199   C(EVAL, "eval", 0)                                               \
200   C(ARGUMENTS, "arguments", 0)                                     \
201   C(UNDEFINED, "undefined", 0)                                     \
202   C(ANONYMOUS, "anonymous", 0)
203 
204 class Token {
205  public:
206   // All token values.
207 #define T(name, string, precedence) name,
208   enum Value { TOKEN_LIST(T, T, T) NUM_TOKENS };
209 #undef T
210 
211   // Returns a string corresponding to the C++ token name
212   // (e.g. "LT" for the token LT).
Name(Value tok)213   static const char* Name(Value tok) {
214     DCHECK(tok < NUM_TOKENS);  // tok is unsigned
215     return name_[tok];
216   }
217 
218   // Predicates
IsKeyword(Value tok)219   static bool IsKeyword(Value tok) {
220     return token_type[tok] == 'K';
221   }
IsContextualKeyword(Value tok)222   static bool IsContextualKeyword(Value tok) { return token_type[tok] == 'C'; }
223 
IsIdentifier(Value tok,LanguageMode language_mode,bool is_generator,bool disallow_await)224   static bool IsIdentifier(Value tok, LanguageMode language_mode,
225                            bool is_generator, bool disallow_await) {
226     switch (tok) {
227       case IDENTIFIER:
228       case ASYNC:
229         return true;
230       case ESCAPED_STRICT_RESERVED_WORD:
231       case FUTURE_STRICT_RESERVED_WORD:
232       case LET:
233       case STATIC:
234         return is_sloppy(language_mode);
235       case YIELD:
236         return !is_generator && is_sloppy(language_mode);
237       case AWAIT:
238         return !disallow_await;
239       default:
240         return false;
241     }
242     UNREACHABLE();
243   }
244 
IsAssignmentOp(Value tok)245   static bool IsAssignmentOp(Value tok) {
246     return INIT <= tok && tok <= ASSIGN_EXP;
247   }
248 
IsBinaryOp(Value op)249   static bool IsBinaryOp(Value op) { return COMMA <= op && op <= EXP; }
250 
IsCompareOp(Value op)251   static bool IsCompareOp(Value op) {
252     return EQ <= op && op <= IN;
253   }
254 
IsOrderedRelationalCompareOp(Value op)255   static bool IsOrderedRelationalCompareOp(Value op) {
256     return op == LT || op == LTE || op == GT || op == GTE;
257   }
258 
IsEqualityOp(Value op)259   static bool IsEqualityOp(Value op) {
260     return op == EQ || op == EQ_STRICT;
261   }
262 
BinaryOpForAssignment(Value op)263   static Value BinaryOpForAssignment(Value op) {
264     DCHECK(IsAssignmentOp(op));
265     switch (op) {
266       case Token::ASSIGN_BIT_OR:
267         return Token::BIT_OR;
268       case Token::ASSIGN_BIT_XOR:
269         return Token::BIT_XOR;
270       case Token::ASSIGN_BIT_AND:
271         return Token::BIT_AND;
272       case Token::ASSIGN_SHL:
273         return Token::SHL;
274       case Token::ASSIGN_SAR:
275         return Token::SAR;
276       case Token::ASSIGN_SHR:
277         return Token::SHR;
278       case Token::ASSIGN_ADD:
279         return Token::ADD;
280       case Token::ASSIGN_SUB:
281         return Token::SUB;
282       case Token::ASSIGN_MUL:
283         return Token::MUL;
284       case Token::ASSIGN_DIV:
285         return Token::DIV;
286       case Token::ASSIGN_MOD:
287         return Token::MOD;
288       case Token::ASSIGN_EXP:
289         return Token::EXP;
290       default:
291         UNREACHABLE();
292     }
293   }
294 
IsBitOp(Value op)295   static bool IsBitOp(Value op) {
296     return (BIT_OR <= op && op <= SHR) || op == BIT_NOT;
297   }
298 
IsUnaryOp(Value op)299   static bool IsUnaryOp(Value op) {
300     return (NOT <= op && op <= VOID) || op == ADD || op == SUB;
301   }
302 
IsCountOp(Value op)303   static bool IsCountOp(Value op) {
304     return op == INC || op == DEC;
305   }
306 
IsShiftOp(Value op)307   static bool IsShiftOp(Value op) {
308     return (SHL <= op) && (op <= SHR);
309   }
310 
311   // Returns a string corresponding to the JS token string
312   // (.e., "<" for the token LT) or nullptr if the token doesn't
313   // have a (unique) string (e.g. an IDENTIFIER).
String(Value tok)314   static const char* String(Value tok) {
315     DCHECK(tok < NUM_TOKENS);  // tok is unsigned.
316     return string_[tok];
317   }
318 
StringLength(Value tok)319   static uint8_t StringLength(Value tok) {
320     DCHECK(tok < NUM_TOKENS);
321     return string_length_[tok];
322   }
323 
324   // Returns the precedence > 0 for binary and compare
325   // operators; returns 0 otherwise.
Precedence(Value tok)326   static int Precedence(Value tok) {
327     DCHECK(tok < NUM_TOKENS);  // tok is unsigned.
328     return precedence_[tok];
329   }
330 
331  private:
332   static const char* const name_[NUM_TOKENS];
333   static const char* const string_[NUM_TOKENS];
334   static const uint8_t string_length_[NUM_TOKENS];
335   static const int8_t precedence_[NUM_TOKENS];
336   static const char token_type[NUM_TOKENS];
337 };
338 
339 }  // namespace internal
340 }  // namespace v8
341 
342 #endif  // V8_PARSING_TOKEN_H_
343