1#!/usr/bin/env python
2#
3# Copyright 2010 The Closure Linter Authors. All Rights Reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS-IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Metadata pass for annotating tokens in EcmaScript files."""
18
19__author__ = ('robbyw@google.com (Robert Walker)')
20
21from closure_linter import javascripttokens
22from closure_linter import tokenutil
23
24
25TokenType = javascripttokens.JavaScriptTokenType
26
27
28class ParseError(Exception):
29  """Exception indicating a parse error at the given token.
30
31  Attributes:
32    token: The token where the parse error occurred.
33  """
34
35  def __init__(self, token, message=None):
36    """Initialize a parse error at the given token with an optional message.
37
38    Args:
39      token: The token where the parse error occurred.
40      message: A message describing the parse error.
41    """
42    Exception.__init__(self, message)
43    self.token = token
44
45
46class EcmaContext(object):
47  """Context object for EcmaScript languages.
48
49  Attributes:
50    type: The context type.
51    start_token: The token where this context starts.
52    end_token: The token where this context ends.
53    parent: The parent context.
54  """
55
56  # The root context.
57  ROOT = 'root'
58
59  # A block of code.
60  BLOCK = 'block'
61
62  # A pseudo-block of code for a given case or default section.
63  CASE_BLOCK = 'case_block'
64
65  # Block of statements in a for loop's parentheses.
66  FOR_GROUP_BLOCK = 'for_block'
67
68  # An implied block of code for 1 line if, while, and for statements
69  IMPLIED_BLOCK = 'implied_block'
70
71  # An index in to an array or object.
72  INDEX = 'index'
73
74  # An array literal in [].
75  ARRAY_LITERAL = 'array_literal'
76
77  # An object literal in {}.
78  OBJECT_LITERAL = 'object_literal'
79
80  # An individual element in an array or object literal.
81  LITERAL_ELEMENT = 'literal_element'
82
83  # The portion of a ternary statement between ? and :
84  TERNARY_TRUE = 'ternary_true'
85
86  # The portion of a ternary statment after :
87  TERNARY_FALSE = 'ternary_false'
88
89  # The entire switch statment.  This will contain a GROUP with the variable
90  # and a BLOCK with the code.
91
92  # Since that BLOCK is not a normal block, it can not contain statements except
93  # for case and default.
94  SWITCH = 'switch'
95
96  # A normal comment.
97  COMMENT = 'comment'
98
99  # A JsDoc comment.
100  DOC = 'doc'
101
102  # An individual statement.
103  STATEMENT = 'statement'
104
105  # Code within parentheses.
106  GROUP = 'group'
107
108  # Parameter names in a function declaration.
109  PARAMETERS = 'parameters'
110
111  # A set of variable declarations appearing after the 'var' keyword.
112  VAR = 'var'
113
114  # Context types that are blocks.
115  BLOCK_TYPES = frozenset([
116      ROOT, BLOCK, CASE_BLOCK, FOR_GROUP_BLOCK, IMPLIED_BLOCK])
117
118  def __init__(self, context_type, start_token, parent=None):
119    """Initializes the context object.
120
121    Args:
122      context_type: The context type.
123      start_token: The token where this context starts.
124      parent: The parent context.
125
126    Attributes:
127      type: The context type.
128      start_token: The token where this context starts.
129      end_token: The token where this context ends.
130      parent: The parent context.
131      children: The child contexts of this context, in order.
132    """
133    self.type = context_type
134    self.start_token = start_token
135    self.end_token = None
136
137    self.parent = None
138    self.children = []
139
140    if parent:
141      parent.AddChild(self)
142
143  def __repr__(self):
144    """Returns a string representation of the context object."""
145    stack = []
146    context = self
147    while context:
148      stack.append(context.type)
149      context = context.parent
150    return 'Context(%s)' % ' > '.join(stack)
151
152  def AddChild(self, child):
153    """Adds a child to this context and sets child's parent to this context.
154
155    Args:
156      child: A child EcmaContext.  The child's parent will be set to this
157          context.
158    """
159
160    child.parent = self
161
162    self.children.append(child)
163    self.children.sort(EcmaContext._CompareContexts)
164
165  def GetRoot(self):
166    """Get the root context that contains this context, if any."""
167    context = self
168    while context:
169      if context.type is EcmaContext.ROOT:
170        return context
171      context = context.parent
172
173  @staticmethod
174  def _CompareContexts(context1, context2):
175    """Sorts contexts 1 and 2 by start token document position."""
176    return tokenutil.Compare(context1.start_token, context2.start_token)
177
178
179class EcmaMetaData(object):
180  """Token metadata for EcmaScript languages.
181
182  Attributes:
183    last_code: The last code token to appear before this one.
184    context: The context this token appears in.
185    operator_type: The operator type, will be one of the *_OPERATOR constants
186        defined below.
187    aliased_symbol: The full symbol being identified, as a string (e.g. an
188        'XhrIo' alias for 'goog.net.XhrIo'). Only applicable to identifier
189        tokens. This is set in aliaspass.py and is a best guess.
190    is_alias_definition: True if the symbol is part of an alias definition.
191        If so, these symbols won't be counted towards goog.requires/provides.
192  """
193
194  UNARY_OPERATOR = 'unary'
195
196  UNARY_POST_OPERATOR = 'unary_post'
197
198  BINARY_OPERATOR = 'binary'
199
200  TERNARY_OPERATOR = 'ternary'
201
202  def __init__(self):
203    """Initializes a token metadata object."""
204    self.last_code = None
205    self.context = None
206    self.operator_type = None
207    self.is_implied_semicolon = False
208    self.is_implied_block = False
209    self.is_implied_block_close = False
210    self.aliased_symbol = None
211    self.is_alias_definition = False
212
213  def __repr__(self):
214    """Returns a string representation of the context object."""
215    parts = ['%r' % self.context]
216    if self.operator_type:
217      parts.append('optype: %r' % self.operator_type)
218    if self.is_implied_semicolon:
219      parts.append('implied;')
220    if self.aliased_symbol:
221      parts.append('alias for: %s' % self.aliased_symbol)
222    return 'MetaData(%s)' % ', '.join(parts)
223
224  def IsUnaryOperator(self):
225    return self.operator_type in (EcmaMetaData.UNARY_OPERATOR,
226                                  EcmaMetaData.UNARY_POST_OPERATOR)
227
228  def IsUnaryPostOperator(self):
229    return self.operator_type == EcmaMetaData.UNARY_POST_OPERATOR
230
231
232class EcmaMetaDataPass(object):
233  """A pass that iterates over all tokens and builds metadata about them."""
234
235  def __init__(self):
236    """Initialize the meta data pass object."""
237    self.Reset()
238
239  def Reset(self):
240    """Resets the metadata pass to prepare for the next file."""
241    self._token = None
242    self._context = None
243    self._AddContext(EcmaContext.ROOT)
244    self._last_code = None
245
246  def _CreateContext(self, context_type):
247    """Overridable by subclasses to create the appropriate context type."""
248    return EcmaContext(context_type, self._token, self._context)
249
250  def _CreateMetaData(self):
251    """Overridable by subclasses to create the appropriate metadata type."""
252    return EcmaMetaData()
253
254  def _AddContext(self, context_type):
255    """Adds a context of the given type to the context stack.
256
257    Args:
258      context_type: The type of context to create
259    """
260    self._context = self._CreateContext(context_type)
261
262  def _PopContext(self):
263    """Moves up one level in the context stack.
264
265    Returns:
266      The former context.
267
268    Raises:
269      ParseError: If the root context is popped.
270    """
271    top_context = self._context
272    top_context.end_token = self._token
273    self._context = top_context.parent
274    if self._context:
275      return top_context
276    else:
277      raise ParseError(self._token)
278
279  def _PopContextType(self, *stop_types):
280    """Pops the context stack until a context of the given type is popped.
281
282    Args:
283      *stop_types: The types of context to pop to - stops at the first match.
284
285    Returns:
286      The context object of the given type that was popped.
287    """
288    last = None
289    while not last or last.type not in stop_types:
290      last = self._PopContext()
291    return last
292
293  def _EndStatement(self):
294    """Process the end of a statement."""
295    self._PopContextType(EcmaContext.STATEMENT)
296    if self._context.type == EcmaContext.IMPLIED_BLOCK:
297      self._token.metadata.is_implied_block_close = True
298      self._PopContext()
299
300  def _ProcessContext(self):
301    """Process the context at the current token.
302
303    Returns:
304      The context that should be assigned to the current token, or None if
305      the current context after this method should be used.
306
307    Raises:
308      ParseError: When the token appears in an invalid context.
309    """
310    token = self._token
311    token_type = token.type
312
313    if self._context.type in EcmaContext.BLOCK_TYPES:
314      # Whenever we're in a block, we add a statement context.  We make an
315      # exception for switch statements since they can only contain case: and
316      # default: and therefore don't directly contain statements.
317      # The block we add here may be immediately removed in some cases, but
318      # that causes no harm.
319      parent = self._context.parent
320      if not parent or parent.type != EcmaContext.SWITCH:
321        self._AddContext(EcmaContext.STATEMENT)
322
323    elif self._context.type == EcmaContext.ARRAY_LITERAL:
324      self._AddContext(EcmaContext.LITERAL_ELEMENT)
325
326    if token_type == TokenType.START_PAREN:
327      if self._last_code and self._last_code.IsKeyword('for'):
328        # for loops contain multiple statements in the group unlike while,
329        # switch, if, etc.
330        self._AddContext(EcmaContext.FOR_GROUP_BLOCK)
331      else:
332        self._AddContext(EcmaContext.GROUP)
333
334    elif token_type == TokenType.END_PAREN:
335      result = self._PopContextType(EcmaContext.GROUP,
336                                    EcmaContext.FOR_GROUP_BLOCK)
337      keyword_token = result.start_token.metadata.last_code
338      # keyword_token will not exist if the open paren is the first line of the
339      # file, for example if all code is wrapped in an immediately executed
340      # annonymous function.
341      if keyword_token and keyword_token.string in ('if', 'for', 'while'):
342        next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
343        if next_code.type != TokenType.START_BLOCK:
344          # Check for do-while.
345          is_do_while = False
346          pre_keyword_token = keyword_token.metadata.last_code
347          if (pre_keyword_token and
348              pre_keyword_token.type == TokenType.END_BLOCK):
349            start_block_token = pre_keyword_token.metadata.context.start_token
350            is_do_while = start_block_token.metadata.last_code.string == 'do'
351
352          # If it's not do-while, it's an implied block.
353          if not is_do_while:
354            self._AddContext(EcmaContext.IMPLIED_BLOCK)
355            token.metadata.is_implied_block = True
356
357      return result
358
359    # else (not else if) with no open brace after it should be considered the
360    # start of an implied block, similar to the case with if, for, and while
361    # above.
362    elif (token_type == TokenType.KEYWORD and
363          token.string == 'else'):
364      next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
365      if (next_code.type != TokenType.START_BLOCK and
366          (next_code.type != TokenType.KEYWORD or next_code.string != 'if')):
367        self._AddContext(EcmaContext.IMPLIED_BLOCK)
368        token.metadata.is_implied_block = True
369
370    elif token_type == TokenType.START_PARAMETERS:
371      self._AddContext(EcmaContext.PARAMETERS)
372
373    elif token_type == TokenType.END_PARAMETERS:
374      return self._PopContextType(EcmaContext.PARAMETERS)
375
376    elif token_type == TokenType.START_BRACKET:
377      if (self._last_code and
378          self._last_code.type in TokenType.EXPRESSION_ENDER_TYPES):
379        self._AddContext(EcmaContext.INDEX)
380      else:
381        self._AddContext(EcmaContext.ARRAY_LITERAL)
382
383    elif token_type == TokenType.END_BRACKET:
384      return self._PopContextType(EcmaContext.INDEX, EcmaContext.ARRAY_LITERAL)
385
386    elif token_type == TokenType.START_BLOCK:
387      if (self._last_code.type in (TokenType.END_PAREN,
388                                   TokenType.END_PARAMETERS) or
389          self._last_code.IsKeyword('else') or
390          self._last_code.IsKeyword('do') or
391          self._last_code.IsKeyword('try') or
392          self._last_code.IsKeyword('finally') or
393          (self._last_code.IsOperator(':') and
394           self._last_code.metadata.context.type == EcmaContext.CASE_BLOCK)):
395        # else, do, try, and finally all might have no () before {.
396        # Also, handle the bizzare syntax case 10: {...}.
397        self._AddContext(EcmaContext.BLOCK)
398      else:
399        self._AddContext(EcmaContext.OBJECT_LITERAL)
400
401    elif token_type == TokenType.END_BLOCK:
402      context = self._PopContextType(EcmaContext.BLOCK,
403                                     EcmaContext.OBJECT_LITERAL)
404      if self._context.type == EcmaContext.SWITCH:
405        # The end of the block also means the end of the switch statement it
406        # applies to.
407        return self._PopContext()
408      return context
409
410    elif token.IsKeyword('switch'):
411      self._AddContext(EcmaContext.SWITCH)
412
413    elif (token_type == TokenType.KEYWORD and
414          token.string in ('case', 'default') and
415          self._context.type != EcmaContext.OBJECT_LITERAL):
416      # Pop up to but not including the switch block.
417      while self._context.parent.type != EcmaContext.SWITCH:
418        self._PopContext()
419        if self._context.parent is None:
420          raise ParseError(token, 'Encountered case/default statement '
421                           'without switch statement')
422
423    elif token.IsOperator('?'):
424      self._AddContext(EcmaContext.TERNARY_TRUE)
425
426    elif token.IsOperator(':'):
427      if self._context.type == EcmaContext.OBJECT_LITERAL:
428        self._AddContext(EcmaContext.LITERAL_ELEMENT)
429
430      elif self._context.type == EcmaContext.TERNARY_TRUE:
431        self._PopContext()
432        self._AddContext(EcmaContext.TERNARY_FALSE)
433
434      # Handle nested ternary statements like:
435      # foo = bar ? baz ? 1 : 2 : 3
436      # When we encounter the second ":" the context is
437      # ternary_false > ternary_true > statement > root
438      elif (self._context.type == EcmaContext.TERNARY_FALSE and
439            self._context.parent.type == EcmaContext.TERNARY_TRUE):
440        self._PopContext()  # Leave current ternary false context.
441        self._PopContext()  # Leave current parent ternary true
442        self._AddContext(EcmaContext.TERNARY_FALSE)
443
444      elif self._context.parent.type == EcmaContext.SWITCH:
445        self._AddContext(EcmaContext.CASE_BLOCK)
446
447    elif token.IsKeyword('var'):
448      self._AddContext(EcmaContext.VAR)
449
450    elif token.IsOperator(','):
451      while self._context.type not in (EcmaContext.VAR,
452                                       EcmaContext.ARRAY_LITERAL,
453                                       EcmaContext.OBJECT_LITERAL,
454                                       EcmaContext.STATEMENT,
455                                       EcmaContext.PARAMETERS,
456                                       EcmaContext.GROUP):
457        self._PopContext()
458
459    elif token_type == TokenType.SEMICOLON:
460      self._EndStatement()
461
462  def Process(self, first_token):
463    """Processes the token stream starting with the given token."""
464    self._token = first_token
465    while self._token:
466      self._ProcessToken()
467
468      if self._token.IsCode():
469        self._last_code = self._token
470
471      self._token = self._token.next
472
473    try:
474      self._PopContextType(self, EcmaContext.ROOT)
475    except ParseError:
476      # Ignore the "popped to root" error.
477      pass
478
479  def _ProcessToken(self):
480    """Process the given token."""
481    token = self._token
482    token.metadata = self._CreateMetaData()
483    context = (self._ProcessContext() or self._context)
484    token.metadata.context = context
485    token.metadata.last_code = self._last_code
486
487    # Determine the operator type of the token, if applicable.
488    if token.type == TokenType.OPERATOR:
489      token.metadata.operator_type = self._GetOperatorType(token)
490
491    # Determine if there is an implied semicolon after the token.
492    if token.type != TokenType.SEMICOLON:
493      next_code = tokenutil.SearchExcept(token, TokenType.NON_CODE_TYPES)
494      # A statement like if (x) does not need a semicolon after it
495      is_implied_block = self._context == EcmaContext.IMPLIED_BLOCK
496      is_last_code_in_line = token.IsCode() and (
497          not next_code or next_code.line_number != token.line_number)
498      is_continued_operator = (token.type == TokenType.OPERATOR and
499                               not token.metadata.IsUnaryPostOperator())
500      is_continued_dot = token.string == '.'
501      next_code_is_operator = next_code and next_code.type == TokenType.OPERATOR
502      is_end_of_block = (
503          token.type == TokenType.END_BLOCK and
504          token.metadata.context.type != EcmaContext.OBJECT_LITERAL)
505      is_multiline_string = token.type == TokenType.STRING_TEXT
506      is_continued_var_decl = (token.IsKeyword('var') and
507                               next_code and
508                               (next_code.type in [TokenType.IDENTIFIER,
509                                                   TokenType.SIMPLE_LVALUE]) and
510                               token.line_number < next_code.line_number)
511      next_code_is_block = next_code and next_code.type == TokenType.START_BLOCK
512      if (is_last_code_in_line and
513          self._StatementCouldEndInContext() and
514          not is_multiline_string and
515          not is_end_of_block and
516          not is_continued_var_decl and
517          not is_continued_operator and
518          not is_continued_dot and
519          not next_code_is_operator and
520          not is_implied_block and
521          not next_code_is_block):
522        token.metadata.is_implied_semicolon = True
523        self._EndStatement()
524
525  def _StatementCouldEndInContext(self):
526    """Returns if the current statement (if any) may end in this context."""
527    # In the basic statement or variable declaration context, statement can
528    # always end in this context.
529    if self._context.type in (EcmaContext.STATEMENT, EcmaContext.VAR):
530      return True
531
532    # End of a ternary false branch inside a statement can also be the
533    # end of the statement, for example:
534    # var x = foo ? foo.bar() : null
535    # In this case the statement ends after the null, when the context stack
536    # looks like ternary_false > var > statement > root.
537    if (self._context.type == EcmaContext.TERNARY_FALSE and
538        self._context.parent.type in (EcmaContext.STATEMENT, EcmaContext.VAR)):
539      return True
540
541    # In all other contexts like object and array literals, ternary true, etc.
542    # the statement can't yet end.
543    return False
544
545  def _GetOperatorType(self, token):
546    """Returns the operator type of the given operator token.
547
548    Args:
549      token: The token to get arity for.
550
551    Returns:
552      The type of the operator.  One of the *_OPERATOR constants defined in
553      EcmaMetaData.
554    """
555    if token.string == '?':
556      return EcmaMetaData.TERNARY_OPERATOR
557
558    if token.string in TokenType.UNARY_OPERATORS:
559      return EcmaMetaData.UNARY_OPERATOR
560
561    last_code = token.metadata.last_code
562    if not last_code or last_code.type == TokenType.END_BLOCK:
563      return EcmaMetaData.UNARY_OPERATOR
564
565    if (token.string in TokenType.UNARY_POST_OPERATORS and
566        last_code.type in TokenType.EXPRESSION_ENDER_TYPES):
567      return EcmaMetaData.UNARY_POST_OPERATOR
568
569    if (token.string in TokenType.UNARY_OK_OPERATORS and
570        last_code.type not in TokenType.EXPRESSION_ENDER_TYPES and
571        last_code.string not in TokenType.UNARY_POST_OPERATORS):
572      return EcmaMetaData.UNARY_OPERATOR
573
574    return EcmaMetaData.BINARY_OPERATOR
575