1#!/usr/bin/env python
2#
3# Copyright 2007 The Closure Linter Authors. All Rights Reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS-IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""Token utility functions."""
18
19__author__ = ('robbyw@google.com (Robert Walker)',
20              'ajp@google.com (Andy Perelson)')
21
22import copy
23import StringIO
24
25from closure_linter.common import tokens
26from closure_linter.javascripttokens import JavaScriptToken
27from closure_linter.javascripttokens import JavaScriptTokenType
28
29# Shorthand
30Type = tokens.TokenType
31
32
33def GetFirstTokenInSameLine(token):
34  """Returns the first token in the same line as token.
35
36  Args:
37    token: Any token in the line.
38
39  Returns:
40    The first token in the same line as token.
41  """
42  while not token.IsFirstInLine():
43    token = token.previous
44  return token
45
46
47def GetFirstTokenInPreviousLine(token):
48  """Returns the first token in the previous line as token.
49
50  Args:
51    token: Any token in the line.
52
53  Returns:
54    The first token in the previous line as token, or None if token is on the
55    first line.
56  """
57  first_in_line = GetFirstTokenInSameLine(token)
58  if first_in_line.previous:
59    return GetFirstTokenInSameLine(first_in_line.previous)
60
61  return None
62
63
64def GetLastTokenInSameLine(token):
65  """Returns the last token in the same line as token.
66
67  Args:
68    token: Any token in the line.
69
70  Returns:
71    The last token in the same line as token.
72  """
73  while not token.IsLastInLine():
74    token = token.next
75  return token
76
77
78def GetAllTokensInSameLine(token):
79  """Returns all tokens in the same line as the given token.
80
81  Args:
82    token: Any token in the line.
83
84  Returns:
85    All tokens on the same line as the given token.
86  """
87  first_token = GetFirstTokenInSameLine(token)
88  last_token = GetLastTokenInSameLine(token)
89
90  tokens_in_line = []
91  while first_token != last_token:
92    tokens_in_line.append(first_token)
93    first_token = first_token.next
94  tokens_in_line.append(last_token)
95
96  return tokens_in_line
97
98
99def CustomSearch(start_token, func, end_func=None, distance=None,
100                 reverse=False):
101  """Returns the first token where func is True within distance of this token.
102
103  Args:
104    start_token: The token to start searching from
105    func: The function to call to test a token for applicability
106    end_func: The function to call to test a token to determine whether to abort
107          the search.
108    distance: The number of tokens to look through before failing search.  Must
109        be positive.  If unspecified, will search until the end of the token
110        chain
111    reverse: When true, search the tokens before this one instead of the tokens
112        after it
113
114  Returns:
115    The first token matching func within distance of this token, or None if no
116    such token is found.
117  """
118  token = start_token
119  if reverse:
120    while token and (distance is None or distance > 0):
121      previous = token.previous
122      if previous:
123        if func(previous):
124          return previous
125        if end_func and end_func(previous):
126          return None
127
128      token = previous
129      if distance is not None:
130        distance -= 1
131
132  else:
133    while token and (distance is None or distance > 0):
134      next_token = token.next
135      if next_token:
136        if func(next_token):
137          return next_token
138        if end_func and end_func(next_token):
139          return None
140
141      token = next_token
142      if distance is not None:
143        distance -= 1
144
145  return None
146
147
148def Search(start_token, token_types, distance=None, reverse=False):
149  """Returns the first token of type in token_types within distance.
150
151  Args:
152    start_token: The token to start searching from
153    token_types: The allowable types of the token being searched for
154    distance: The number of tokens to look through before failing search.  Must
155        be positive.  If unspecified, will search until the end of the token
156        chain
157    reverse: When true, search the tokens before this one instead of the tokens
158        after it
159
160  Returns:
161    The first token of any type in token_types within distance of this token, or
162    None if no such token is found.
163  """
164  return CustomSearch(start_token, lambda token: token.IsAnyType(token_types),
165                      None, distance, reverse)
166
167
168def SearchExcept(start_token, token_types, distance=None, reverse=False):
169  """Returns the first token not of any type in token_types within distance.
170
171  Args:
172    start_token: The token to start searching from
173    token_types: The unallowable types of the token being searched for
174    distance: The number of tokens to look through before failing search.  Must
175        be positive.  If unspecified, will search until the end of the token
176        chain
177    reverse: When true, search the tokens before this one instead of the tokens
178        after it
179
180  Returns:
181    The first token of any type in token_types within distance of this token, or
182    None if no such token is found.
183  """
184  return CustomSearch(start_token,
185                      lambda token: not token.IsAnyType(token_types),
186                      None, distance, reverse)
187
188
189def SearchUntil(start_token, token_types, end_types, distance=None,
190                reverse=False):
191  """Returns the first token of type in token_types before a token of end_type.
192
193  Args:
194    start_token: The token to start searching from.
195    token_types: The allowable types of the token being searched for.
196    end_types: Types of tokens to abort search if we find.
197    distance: The number of tokens to look through before failing search.  Must
198        be positive.  If unspecified, will search until the end of the token
199        chain
200    reverse: When true, search the tokens before this one instead of the tokens
201        after it
202
203  Returns:
204    The first token of any type in token_types within distance of this token
205    before any tokens of type in end_type, or None if no such token is found.
206  """
207  return CustomSearch(start_token, lambda token: token.IsAnyType(token_types),
208                      lambda token: token.IsAnyType(end_types),
209                      distance, reverse)
210
211
212def DeleteToken(token):
213  """Deletes the given token from the linked list.
214
215  Args:
216    token: The token to delete
217  """
218  # When deleting a token, we do not update the deleted token itself to make
219  # sure the previous and next pointers are still pointing to tokens which are
220  # not deleted.  Also it is very hard to keep track of all previously deleted
221  # tokens to update them when their pointers become invalid.  So we add this
222  # flag that any token linked list iteration logic can skip deleted node safely
223  # when its current token is deleted.
224  token.is_deleted = True
225  if token.previous:
226    token.previous.next = token.next
227
228  if token.next:
229    token.next.previous = token.previous
230
231    following_token = token.next
232    while following_token and following_token.metadata.last_code == token:
233      following_token.metadata.last_code = token.metadata.last_code
234      following_token = following_token.next
235
236
237def DeleteTokens(token, token_count):
238  """Deletes the given number of tokens starting with the given token.
239
240  Args:
241    token: The token to start deleting at.
242    token_count: The total number of tokens to delete.
243  """
244  for i in xrange(1, token_count):
245    DeleteToken(token.next)
246  DeleteToken(token)
247
248
249def InsertTokenBefore(new_token, token):
250  """Insert new_token before token.
251
252  Args:
253    new_token: A token to be added to the stream
254    token: A token already in the stream
255  """
256  new_token.next = token
257  new_token.previous = token.previous
258
259  new_token.metadata = copy.copy(token.metadata)
260
261  if new_token.IsCode():
262    old_last_code = token.metadata.last_code
263    following_token = token
264    while (following_token and
265           following_token.metadata.last_code == old_last_code):
266      following_token.metadata.last_code = new_token
267      following_token = following_token.next
268
269  token.previous = new_token
270  if new_token.previous:
271    new_token.previous.next = new_token
272
273  if new_token.start_index is None:
274    if new_token.line_number == token.line_number:
275      new_token.start_index = token.start_index
276    else:
277      previous_token = new_token.previous
278      if previous_token:
279        new_token.start_index = (previous_token.start_index +
280                                 len(previous_token.string))
281      else:
282        new_token.start_index = 0
283
284    iterator = new_token.next
285    while iterator and iterator.line_number == new_token.line_number:
286      iterator.start_index += len(new_token.string)
287      iterator = iterator.next
288
289
290def InsertTokenAfter(new_token, token):
291  """Insert new_token after token.
292
293  Args:
294    new_token: A token to be added to the stream
295    token: A token already in the stream
296  """
297  new_token.previous = token
298  new_token.next = token.next
299
300  new_token.metadata = copy.copy(token.metadata)
301
302  if token.IsCode():
303    new_token.metadata.last_code = token
304
305  if new_token.IsCode():
306    following_token = token.next
307    while following_token and following_token.metadata.last_code == token:
308      following_token.metadata.last_code = new_token
309      following_token = following_token.next
310
311  token.next = new_token
312  if new_token.next:
313    new_token.next.previous = new_token
314
315  if new_token.start_index is None:
316    if new_token.line_number == token.line_number:
317      new_token.start_index = token.start_index + len(token.string)
318    else:
319      new_token.start_index = 0
320
321    iterator = new_token.next
322    while iterator and iterator.line_number == new_token.line_number:
323      iterator.start_index += len(new_token.string)
324      iterator = iterator.next
325
326
327def InsertTokensAfter(new_tokens, token):
328  """Insert multiple tokens after token.
329
330  Args:
331    new_tokens: An array of tokens to be added to the stream
332    token: A token already in the stream
333  """
334  # TODO(user): It would be nicer to have InsertTokenAfter defer to here
335  # instead of vice-versa.
336  current_token = token
337  for new_token in new_tokens:
338    InsertTokenAfter(new_token, current_token)
339    current_token = new_token
340
341
342def InsertSpaceTokenAfter(token):
343  """Inserts a space token after the given token.
344
345  Args:
346    token: The token to insert a space token after
347
348  Returns:
349    A single space token
350  """
351  space_token = JavaScriptToken(' ', Type.WHITESPACE, token.line,
352                                token.line_number)
353  InsertTokenAfter(space_token, token)
354
355
356def InsertBlankLineAfter(token):
357  """Inserts a blank line after the given token.
358
359  Args:
360    token: The token to insert a blank line after
361
362  Returns:
363    A single space token
364  """
365  blank_token = JavaScriptToken('', Type.BLANK_LINE, '',
366                                token.line_number + 1)
367  InsertLineAfter(token, [blank_token])
368
369
370def InsertLineAfter(token, new_tokens):
371  """Inserts a new line consisting of new_tokens after the given token.
372
373  Args:
374    token: The token to insert after.
375    new_tokens: The tokens that will make up the new line.
376  """
377  insert_location = token
378  for new_token in new_tokens:
379    InsertTokenAfter(new_token, insert_location)
380    insert_location = new_token
381
382  # Update all subsequent line numbers.
383  next_token = new_tokens[-1].next
384  while next_token:
385    next_token.line_number += 1
386    next_token = next_token.next
387
388
389def SplitToken(token, position):
390  """Splits the token into two tokens at position.
391
392  Args:
393    token: The token to split
394    position: The position to split at. Will be the beginning of second token.
395
396  Returns:
397    The new second token.
398  """
399  new_string = token.string[position:]
400  token.string = token.string[:position]
401
402  new_token = JavaScriptToken(new_string, token.type, token.line,
403                              token.line_number)
404  InsertTokenAfter(new_token, token)
405
406  return new_token
407
408
409def Compare(token1, token2):
410  """Compares two tokens and determines their relative order.
411
412  Args:
413    token1: The first token to compare.
414    token2: The second token to compare.
415
416  Returns:
417    A negative integer, zero, or a positive integer as the first token is
418    before, equal, or after the second in the token stream.
419  """
420  if token2.line_number != token1.line_number:
421    return token1.line_number - token2.line_number
422  else:
423    return token1.start_index - token2.start_index
424
425
426def GoogScopeOrNoneFromStartBlock(token):
427  """Determines if the given START_BLOCK is part of a goog.scope statement.
428
429  Args:
430    token: A token of type START_BLOCK.
431
432  Returns:
433    The goog.scope function call token, or None if such call doesn't exist.
434  """
435  if token.type != JavaScriptTokenType.START_BLOCK:
436    return None
437
438  # Search for a goog.scope statement, which will be 5 tokens before the
439  # block. Illustration of the tokens found prior to the start block:
440  # goog.scope(function() {
441  #      5    4    3   21 ^
442
443  maybe_goog_scope = token
444  for unused_i in xrange(5):
445    maybe_goog_scope = (maybe_goog_scope.previous if maybe_goog_scope and
446                        maybe_goog_scope.previous else None)
447  if maybe_goog_scope and maybe_goog_scope.string == 'goog.scope':
448    return maybe_goog_scope
449
450
451def GetTokenRange(start_token, end_token):
452  """Returns a list of tokens between the two given, inclusive.
453
454  Args:
455    start_token: Start token in the range.
456    end_token: End token in the range.
457
458  Returns:
459    A list of tokens, in order, from start_token to end_token (including start
460    and end).  Returns none if the tokens do not describe a valid range.
461  """
462
463  token_range = []
464  token = start_token
465
466  while token:
467    token_range.append(token)
468
469    if token == end_token:
470      return token_range
471
472    token = token.next
473
474
475def TokensToString(token_iterable):
476  """Convert a number of tokens into a string.
477
478  Newlines will be inserted whenever the line_number of two neighboring
479  strings differ.
480
481  Args:
482    token_iterable: The tokens to turn to a string.
483
484  Returns:
485    A string representation of the given tokens.
486  """
487
488  buf = StringIO.StringIO()
489  token_list = list(token_iterable)
490  if not token_list:
491    return ''
492
493  line_number = token_list[0].line_number
494
495  for token in token_list:
496
497    while line_number < token.line_number:
498      line_number += 1
499      buf.write('\n')
500
501    if line_number > token.line_number:
502      line_number = token.line_number
503      buf.write('\n')
504
505    buf.write(token.string)
506
507  return buf.getvalue()
508
509
510def GetPreviousCodeToken(token):
511  """Returns the code token before the specified token.
512
513  Args:
514    token: A token.
515
516  Returns:
517    The code token before the specified token or None if no such token
518    exists.
519  """
520
521  return CustomSearch(
522      token,
523      lambda t: t and t.type not in JavaScriptTokenType.NON_CODE_TYPES,
524      reverse=True)
525
526
527def GetNextCodeToken(token):
528  """Returns the next code token after the specified token.
529
530  Args:
531    token: A token.
532
533  Returns:
534    The next code token after the specified token or None if no such token
535    exists.
536  """
537
538  return CustomSearch(
539      token,
540      lambda t: t and t.type not in JavaScriptTokenType.NON_CODE_TYPES,
541      reverse=False)
542
543
544def GetIdentifierStart(token):
545  """Returns the first token in an identifier.
546
547  Given a token which is part of an identifier, returns the token at the start
548  of the identifier.
549
550  Args:
551    token: A token which is part of an identifier.
552
553  Returns:
554    The token at the start of the identifier or None if the identifier was not
555    of the form 'a.b.c' (e.g. "['a']['b'].c").
556  """
557
558  start_token = token
559  previous_code_token = GetPreviousCodeToken(token)
560
561  while (previous_code_token and (
562      previous_code_token.IsType(JavaScriptTokenType.IDENTIFIER) or
563      IsDot(previous_code_token))):
564    start_token = previous_code_token
565    previous_code_token = GetPreviousCodeToken(previous_code_token)
566
567  if IsDot(start_token):
568    return None
569
570  return start_token
571
572
573def GetIdentifierForToken(token):
574  """Get the symbol specified by a token.
575
576  Given a token, this function additionally concatenates any parts of an
577  identifying symbol being identified that are split by whitespace or a
578  newline.
579
580  The function will return None if the token is not the first token of an
581  identifier.
582
583  Args:
584    token: The first token of a symbol.
585
586  Returns:
587    The whole symbol, as a string.
588  """
589
590  # Search backward to determine if this token is the first token of the
591  # identifier. If it is not the first token, return None to signal that this
592  # token should be ignored.
593  prev_token = token.previous
594  while prev_token:
595    if (prev_token.IsType(JavaScriptTokenType.IDENTIFIER) or
596        IsDot(prev_token)):
597      return None
598
599    if (prev_token.IsType(tokens.TokenType.WHITESPACE) or
600        prev_token.IsAnyType(JavaScriptTokenType.COMMENT_TYPES)):
601      prev_token = prev_token.previous
602    else:
603      break
604
605  # A "function foo()" declaration.
606  if token.type is JavaScriptTokenType.FUNCTION_NAME:
607    return token.string
608
609  # A "var foo" declaration (if the previous token is 'var')
610  previous_code_token = GetPreviousCodeToken(token)
611
612  if previous_code_token and previous_code_token.IsKeyword('var'):
613    return token.string
614
615  # Otherwise, this is potentially a namespaced (goog.foo.bar) identifier that
616  # could span multiple lines or be broken up by whitespace.  We need
617  # to concatenate.
618  identifier_types = set([
619      JavaScriptTokenType.IDENTIFIER,
620      JavaScriptTokenType.SIMPLE_LVALUE
621      ])
622
623  assert token.type in identifier_types
624
625  # Start with the first token
626  symbol_tokens = [token]
627
628  if token.next:
629    for t in token.next:
630      last_symbol_token = symbol_tokens[-1]
631
632      # A dot is part of the previous symbol.
633      if IsDot(t):
634        symbol_tokens.append(t)
635        continue
636
637      # An identifier is part of the previous symbol if the previous one was a
638      # dot.
639      if t.type in identifier_types:
640        if IsDot(last_symbol_token):
641          symbol_tokens.append(t)
642          continue
643        else:
644          break
645
646      # Skip any whitespace
647      if t.type in JavaScriptTokenType.NON_CODE_TYPES:
648        continue
649
650      # This is the end of the identifier. Stop iterating.
651      break
652
653  if symbol_tokens:
654    return ''.join([t.string for t in symbol_tokens])
655
656
657def GetStringAfterToken(token):
658  """Get string after token.
659
660  Args:
661    token: Search will be done after this token.
662
663  Returns:
664    String if found after token else None (empty string will also
665    return None).
666
667  Search until end of string as in case of empty string Type.STRING_TEXT is not
668  present/found and don't want to return next string.
669  E.g.
670  a = '';
671  b = 'test';
672  When searching for string after 'a' if search is not limited by end of string
673  then it will return 'test' which is not desirable as there is a empty string
674  before that.
675
676  This will return None for cases where string is empty or no string found
677  as in both cases there is no Type.STRING_TEXT.
678  """
679  string_token = SearchUntil(token, JavaScriptTokenType.STRING_TEXT,
680                             [JavaScriptTokenType.SINGLE_QUOTE_STRING_END,
681                              JavaScriptTokenType.DOUBLE_QUOTE_STRING_END])
682  if string_token:
683    return string_token.string
684  else:
685    return None
686
687
688def IsDot(token):
689  """Whether the token represents a "dot" operator (foo.bar)."""
690  return token.type is JavaScriptTokenType.OPERATOR and token.string == '.'
691
692
693def IsIdentifierOrDot(token):
694  """Whether the token is either an identifier or a '.'."""
695  return (token.type in [JavaScriptTokenType.IDENTIFIER,
696                         JavaScriptTokenType.SIMPLE_LVALUE] or
697          IsDot(token))
698