1#!/usr/bin/env python 2# 3# Copyright 2007 The Closure Linter Authors. All Rights Reserved. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS-IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Token utility functions.""" 18 19__author__ = ('robbyw@google.com (Robert Walker)', 20 'ajp@google.com (Andy Perelson)') 21 22import copy 23import StringIO 24 25from closure_linter.common import tokens 26from closure_linter.javascripttokens import JavaScriptToken 27from closure_linter.javascripttokens import JavaScriptTokenType 28 29# Shorthand 30Type = tokens.TokenType 31 32 33def GetFirstTokenInSameLine(token): 34 """Returns the first token in the same line as token. 35 36 Args: 37 token: Any token in the line. 38 39 Returns: 40 The first token in the same line as token. 41 """ 42 while not token.IsFirstInLine(): 43 token = token.previous 44 return token 45 46 47def GetFirstTokenInPreviousLine(token): 48 """Returns the first token in the previous line as token. 49 50 Args: 51 token: Any token in the line. 52 53 Returns: 54 The first token in the previous line as token, or None if token is on the 55 first line. 56 """ 57 first_in_line = GetFirstTokenInSameLine(token) 58 if first_in_line.previous: 59 return GetFirstTokenInSameLine(first_in_line.previous) 60 61 return None 62 63 64def GetLastTokenInSameLine(token): 65 """Returns the last token in the same line as token. 66 67 Args: 68 token: Any token in the line. 69 70 Returns: 71 The last token in the same line as token. 72 """ 73 while not token.IsLastInLine(): 74 token = token.next 75 return token 76 77 78def GetAllTokensInSameLine(token): 79 """Returns all tokens in the same line as the given token. 80 81 Args: 82 token: Any token in the line. 83 84 Returns: 85 All tokens on the same line as the given token. 86 """ 87 first_token = GetFirstTokenInSameLine(token) 88 last_token = GetLastTokenInSameLine(token) 89 90 tokens_in_line = [] 91 while first_token != last_token: 92 tokens_in_line.append(first_token) 93 first_token = first_token.next 94 tokens_in_line.append(last_token) 95 96 return tokens_in_line 97 98 99def CustomSearch(start_token, func, end_func=None, distance=None, 100 reverse=False): 101 """Returns the first token where func is True within distance of this token. 102 103 Args: 104 start_token: The token to start searching from 105 func: The function to call to test a token for applicability 106 end_func: The function to call to test a token to determine whether to abort 107 the search. 108 distance: The number of tokens to look through before failing search. Must 109 be positive. If unspecified, will search until the end of the token 110 chain 111 reverse: When true, search the tokens before this one instead of the tokens 112 after it 113 114 Returns: 115 The first token matching func within distance of this token, or None if no 116 such token is found. 117 """ 118 token = start_token 119 if reverse: 120 while token and (distance is None or distance > 0): 121 previous = token.previous 122 if previous: 123 if func(previous): 124 return previous 125 if end_func and end_func(previous): 126 return None 127 128 token = previous 129 if distance is not None: 130 distance -= 1 131 132 else: 133 while token and (distance is None or distance > 0): 134 next_token = token.next 135 if next_token: 136 if func(next_token): 137 return next_token 138 if end_func and end_func(next_token): 139 return None 140 141 token = next_token 142 if distance is not None: 143 distance -= 1 144 145 return None 146 147 148def Search(start_token, token_types, distance=None, reverse=False): 149 """Returns the first token of type in token_types within distance. 150 151 Args: 152 start_token: The token to start searching from 153 token_types: The allowable types of the token being searched for 154 distance: The number of tokens to look through before failing search. Must 155 be positive. If unspecified, will search until the end of the token 156 chain 157 reverse: When true, search the tokens before this one instead of the tokens 158 after it 159 160 Returns: 161 The first token of any type in token_types within distance of this token, or 162 None if no such token is found. 163 """ 164 return CustomSearch(start_token, lambda token: token.IsAnyType(token_types), 165 None, distance, reverse) 166 167 168def SearchExcept(start_token, token_types, distance=None, reverse=False): 169 """Returns the first token not of any type in token_types within distance. 170 171 Args: 172 start_token: The token to start searching from 173 token_types: The unallowable types of the token being searched for 174 distance: The number of tokens to look through before failing search. Must 175 be positive. If unspecified, will search until the end of the token 176 chain 177 reverse: When true, search the tokens before this one instead of the tokens 178 after it 179 180 Returns: 181 The first token of any type in token_types within distance of this token, or 182 None if no such token is found. 183 """ 184 return CustomSearch(start_token, 185 lambda token: not token.IsAnyType(token_types), 186 None, distance, reverse) 187 188 189def SearchUntil(start_token, token_types, end_types, distance=None, 190 reverse=False): 191 """Returns the first token of type in token_types before a token of end_type. 192 193 Args: 194 start_token: The token to start searching from. 195 token_types: The allowable types of the token being searched for. 196 end_types: Types of tokens to abort search if we find. 197 distance: The number of tokens to look through before failing search. Must 198 be positive. If unspecified, will search until the end of the token 199 chain 200 reverse: When true, search the tokens before this one instead of the tokens 201 after it 202 203 Returns: 204 The first token of any type in token_types within distance of this token 205 before any tokens of type in end_type, or None if no such token is found. 206 """ 207 return CustomSearch(start_token, lambda token: token.IsAnyType(token_types), 208 lambda token: token.IsAnyType(end_types), 209 distance, reverse) 210 211 212def DeleteToken(token): 213 """Deletes the given token from the linked list. 214 215 Args: 216 token: The token to delete 217 """ 218 # When deleting a token, we do not update the deleted token itself to make 219 # sure the previous and next pointers are still pointing to tokens which are 220 # not deleted. Also it is very hard to keep track of all previously deleted 221 # tokens to update them when their pointers become invalid. So we add this 222 # flag that any token linked list iteration logic can skip deleted node safely 223 # when its current token is deleted. 224 token.is_deleted = True 225 if token.previous: 226 token.previous.next = token.next 227 228 if token.next: 229 token.next.previous = token.previous 230 231 following_token = token.next 232 while following_token and following_token.metadata.last_code == token: 233 following_token.metadata.last_code = token.metadata.last_code 234 following_token = following_token.next 235 236 237def DeleteTokens(token, token_count): 238 """Deletes the given number of tokens starting with the given token. 239 240 Args: 241 token: The token to start deleting at. 242 token_count: The total number of tokens to delete. 243 """ 244 for i in xrange(1, token_count): 245 DeleteToken(token.next) 246 DeleteToken(token) 247 248 249def InsertTokenBefore(new_token, token): 250 """Insert new_token before token. 251 252 Args: 253 new_token: A token to be added to the stream 254 token: A token already in the stream 255 """ 256 new_token.next = token 257 new_token.previous = token.previous 258 259 new_token.metadata = copy.copy(token.metadata) 260 261 if new_token.IsCode(): 262 old_last_code = token.metadata.last_code 263 following_token = token 264 while (following_token and 265 following_token.metadata.last_code == old_last_code): 266 following_token.metadata.last_code = new_token 267 following_token = following_token.next 268 269 token.previous = new_token 270 if new_token.previous: 271 new_token.previous.next = new_token 272 273 if new_token.start_index is None: 274 if new_token.line_number == token.line_number: 275 new_token.start_index = token.start_index 276 else: 277 previous_token = new_token.previous 278 if previous_token: 279 new_token.start_index = (previous_token.start_index + 280 len(previous_token.string)) 281 else: 282 new_token.start_index = 0 283 284 iterator = new_token.next 285 while iterator and iterator.line_number == new_token.line_number: 286 iterator.start_index += len(new_token.string) 287 iterator = iterator.next 288 289 290def InsertTokenAfter(new_token, token): 291 """Insert new_token after token. 292 293 Args: 294 new_token: A token to be added to the stream 295 token: A token already in the stream 296 """ 297 new_token.previous = token 298 new_token.next = token.next 299 300 new_token.metadata = copy.copy(token.metadata) 301 302 if token.IsCode(): 303 new_token.metadata.last_code = token 304 305 if new_token.IsCode(): 306 following_token = token.next 307 while following_token and following_token.metadata.last_code == token: 308 following_token.metadata.last_code = new_token 309 following_token = following_token.next 310 311 token.next = new_token 312 if new_token.next: 313 new_token.next.previous = new_token 314 315 if new_token.start_index is None: 316 if new_token.line_number == token.line_number: 317 new_token.start_index = token.start_index + len(token.string) 318 else: 319 new_token.start_index = 0 320 321 iterator = new_token.next 322 while iterator and iterator.line_number == new_token.line_number: 323 iterator.start_index += len(new_token.string) 324 iterator = iterator.next 325 326 327def InsertTokensAfter(new_tokens, token): 328 """Insert multiple tokens after token. 329 330 Args: 331 new_tokens: An array of tokens to be added to the stream 332 token: A token already in the stream 333 """ 334 # TODO(user): It would be nicer to have InsertTokenAfter defer to here 335 # instead of vice-versa. 336 current_token = token 337 for new_token in new_tokens: 338 InsertTokenAfter(new_token, current_token) 339 current_token = new_token 340 341 342def InsertSpaceTokenAfter(token): 343 """Inserts a space token after the given token. 344 345 Args: 346 token: The token to insert a space token after 347 348 Returns: 349 A single space token 350 """ 351 space_token = JavaScriptToken(' ', Type.WHITESPACE, token.line, 352 token.line_number) 353 InsertTokenAfter(space_token, token) 354 355 356def InsertBlankLineAfter(token): 357 """Inserts a blank line after the given token. 358 359 Args: 360 token: The token to insert a blank line after 361 362 Returns: 363 A single space token 364 """ 365 blank_token = JavaScriptToken('', Type.BLANK_LINE, '', 366 token.line_number + 1) 367 InsertLineAfter(token, [blank_token]) 368 369 370def InsertLineAfter(token, new_tokens): 371 """Inserts a new line consisting of new_tokens after the given token. 372 373 Args: 374 token: The token to insert after. 375 new_tokens: The tokens that will make up the new line. 376 """ 377 insert_location = token 378 for new_token in new_tokens: 379 InsertTokenAfter(new_token, insert_location) 380 insert_location = new_token 381 382 # Update all subsequent line numbers. 383 next_token = new_tokens[-1].next 384 while next_token: 385 next_token.line_number += 1 386 next_token = next_token.next 387 388 389def SplitToken(token, position): 390 """Splits the token into two tokens at position. 391 392 Args: 393 token: The token to split 394 position: The position to split at. Will be the beginning of second token. 395 396 Returns: 397 The new second token. 398 """ 399 new_string = token.string[position:] 400 token.string = token.string[:position] 401 402 new_token = JavaScriptToken(new_string, token.type, token.line, 403 token.line_number) 404 InsertTokenAfter(new_token, token) 405 406 return new_token 407 408 409def Compare(token1, token2): 410 """Compares two tokens and determines their relative order. 411 412 Args: 413 token1: The first token to compare. 414 token2: The second token to compare. 415 416 Returns: 417 A negative integer, zero, or a positive integer as the first token is 418 before, equal, or after the second in the token stream. 419 """ 420 if token2.line_number != token1.line_number: 421 return token1.line_number - token2.line_number 422 else: 423 return token1.start_index - token2.start_index 424 425 426def GoogScopeOrNoneFromStartBlock(token): 427 """Determines if the given START_BLOCK is part of a goog.scope statement. 428 429 Args: 430 token: A token of type START_BLOCK. 431 432 Returns: 433 The goog.scope function call token, or None if such call doesn't exist. 434 """ 435 if token.type != JavaScriptTokenType.START_BLOCK: 436 return None 437 438 # Search for a goog.scope statement, which will be 5 tokens before the 439 # block. Illustration of the tokens found prior to the start block: 440 # goog.scope(function() { 441 # 5 4 3 21 ^ 442 443 maybe_goog_scope = token 444 for unused_i in xrange(5): 445 maybe_goog_scope = (maybe_goog_scope.previous if maybe_goog_scope and 446 maybe_goog_scope.previous else None) 447 if maybe_goog_scope and maybe_goog_scope.string == 'goog.scope': 448 return maybe_goog_scope 449 450 451def GetTokenRange(start_token, end_token): 452 """Returns a list of tokens between the two given, inclusive. 453 454 Args: 455 start_token: Start token in the range. 456 end_token: End token in the range. 457 458 Returns: 459 A list of tokens, in order, from start_token to end_token (including start 460 and end). Returns none if the tokens do not describe a valid range. 461 """ 462 463 token_range = [] 464 token = start_token 465 466 while token: 467 token_range.append(token) 468 469 if token == end_token: 470 return token_range 471 472 token = token.next 473 474 475def TokensToString(token_iterable): 476 """Convert a number of tokens into a string. 477 478 Newlines will be inserted whenever the line_number of two neighboring 479 strings differ. 480 481 Args: 482 token_iterable: The tokens to turn to a string. 483 484 Returns: 485 A string representation of the given tokens. 486 """ 487 488 buf = StringIO.StringIO() 489 token_list = list(token_iterable) 490 if not token_list: 491 return '' 492 493 line_number = token_list[0].line_number 494 495 for token in token_list: 496 497 while line_number < token.line_number: 498 line_number += 1 499 buf.write('\n') 500 501 if line_number > token.line_number: 502 line_number = token.line_number 503 buf.write('\n') 504 505 buf.write(token.string) 506 507 return buf.getvalue() 508 509 510def GetPreviousCodeToken(token): 511 """Returns the code token before the specified token. 512 513 Args: 514 token: A token. 515 516 Returns: 517 The code token before the specified token or None if no such token 518 exists. 519 """ 520 521 return CustomSearch( 522 token, 523 lambda t: t and t.type not in JavaScriptTokenType.NON_CODE_TYPES, 524 reverse=True) 525 526 527def GetNextCodeToken(token): 528 """Returns the next code token after the specified token. 529 530 Args: 531 token: A token. 532 533 Returns: 534 The next code token after the specified token or None if no such token 535 exists. 536 """ 537 538 return CustomSearch( 539 token, 540 lambda t: t and t.type not in JavaScriptTokenType.NON_CODE_TYPES, 541 reverse=False) 542 543 544def GetIdentifierStart(token): 545 """Returns the first token in an identifier. 546 547 Given a token which is part of an identifier, returns the token at the start 548 of the identifier. 549 550 Args: 551 token: A token which is part of an identifier. 552 553 Returns: 554 The token at the start of the identifier or None if the identifier was not 555 of the form 'a.b.c' (e.g. "['a']['b'].c"). 556 """ 557 558 start_token = token 559 previous_code_token = GetPreviousCodeToken(token) 560 561 while (previous_code_token and ( 562 previous_code_token.IsType(JavaScriptTokenType.IDENTIFIER) or 563 IsDot(previous_code_token))): 564 start_token = previous_code_token 565 previous_code_token = GetPreviousCodeToken(previous_code_token) 566 567 if IsDot(start_token): 568 return None 569 570 return start_token 571 572 573def GetIdentifierForToken(token): 574 """Get the symbol specified by a token. 575 576 Given a token, this function additionally concatenates any parts of an 577 identifying symbol being identified that are split by whitespace or a 578 newline. 579 580 The function will return None if the token is not the first token of an 581 identifier. 582 583 Args: 584 token: The first token of a symbol. 585 586 Returns: 587 The whole symbol, as a string. 588 """ 589 590 # Search backward to determine if this token is the first token of the 591 # identifier. If it is not the first token, return None to signal that this 592 # token should be ignored. 593 prev_token = token.previous 594 while prev_token: 595 if (prev_token.IsType(JavaScriptTokenType.IDENTIFIER) or 596 IsDot(prev_token)): 597 return None 598 599 if (prev_token.IsType(tokens.TokenType.WHITESPACE) or 600 prev_token.IsAnyType(JavaScriptTokenType.COMMENT_TYPES)): 601 prev_token = prev_token.previous 602 else: 603 break 604 605 # A "function foo()" declaration. 606 if token.type is JavaScriptTokenType.FUNCTION_NAME: 607 return token.string 608 609 # A "var foo" declaration (if the previous token is 'var') 610 previous_code_token = GetPreviousCodeToken(token) 611 612 if previous_code_token and previous_code_token.IsKeyword('var'): 613 return token.string 614 615 # Otherwise, this is potentially a namespaced (goog.foo.bar) identifier that 616 # could span multiple lines or be broken up by whitespace. We need 617 # to concatenate. 618 identifier_types = set([ 619 JavaScriptTokenType.IDENTIFIER, 620 JavaScriptTokenType.SIMPLE_LVALUE 621 ]) 622 623 assert token.type in identifier_types 624 625 # Start with the first token 626 symbol_tokens = [token] 627 628 if token.next: 629 for t in token.next: 630 last_symbol_token = symbol_tokens[-1] 631 632 # A dot is part of the previous symbol. 633 if IsDot(t): 634 symbol_tokens.append(t) 635 continue 636 637 # An identifier is part of the previous symbol if the previous one was a 638 # dot. 639 if t.type in identifier_types: 640 if IsDot(last_symbol_token): 641 symbol_tokens.append(t) 642 continue 643 else: 644 break 645 646 # Skip any whitespace 647 if t.type in JavaScriptTokenType.NON_CODE_TYPES: 648 continue 649 650 # This is the end of the identifier. Stop iterating. 651 break 652 653 if symbol_tokens: 654 return ''.join([t.string for t in symbol_tokens]) 655 656 657def GetStringAfterToken(token): 658 """Get string after token. 659 660 Args: 661 token: Search will be done after this token. 662 663 Returns: 664 String if found after token else None (empty string will also 665 return None). 666 667 Search until end of string as in case of empty string Type.STRING_TEXT is not 668 present/found and don't want to return next string. 669 E.g. 670 a = ''; 671 b = 'test'; 672 When searching for string after 'a' if search is not limited by end of string 673 then it will return 'test' which is not desirable as there is a empty string 674 before that. 675 676 This will return None for cases where string is empty or no string found 677 as in both cases there is no Type.STRING_TEXT. 678 """ 679 string_token = SearchUntil(token, JavaScriptTokenType.STRING_TEXT, 680 [JavaScriptTokenType.SINGLE_QUOTE_STRING_END, 681 JavaScriptTokenType.DOUBLE_QUOTE_STRING_END]) 682 if string_token: 683 return string_token.string 684 else: 685 return None 686 687 688def IsDot(token): 689 """Whether the token represents a "dot" operator (foo.bar).""" 690 return token.type is JavaScriptTokenType.OPERATOR and token.string == '.' 691 692 693def IsIdentifierOrDot(token): 694 """Whether the token is either an identifier or a '.'.""" 695 return (token.type in [JavaScriptTokenType.IDENTIFIER, 696 JavaScriptTokenType.SIMPLE_LVALUE] or 697 IsDot(token)) 698