1#!/usr/bin/python 2"""A glorified C pre-processor parser.""" 3 4import ctypes 5import logging 6import os 7import re 8import site 9import utils 10 11top = os.getenv('ANDROID_BUILD_TOP') 12if top is None: 13 utils.panic('ANDROID_BUILD_TOP not set.\n') 14 15# Set up the env vars for libclang. 16site.addsitedir(os.path.join(top, 'external/clang/bindings/python')) 17 18import clang.cindex 19from clang.cindex import conf 20from clang.cindex import Cursor 21from clang.cindex import CursorKind 22from clang.cindex import SourceLocation 23from clang.cindex import SourceRange 24from clang.cindex import TokenGroup 25from clang.cindex import TokenKind 26from clang.cindex import TranslationUnit 27 28# Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, and etc. 29# Note that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help. 30clang.cindex.Config.set_library_path(os.path.join(top, 'prebuilts/sdk/tools/linux/lib64')) 31 32from defaults import kCppUndefinedMacro 33from defaults import kernel_remove_config_macros 34from defaults import kernel_token_replacements 35 36 37debugBlockParser = False 38debugCppExpr = False 39debugOptimIf01 = False 40 41############################################################################### 42############################################################################### 43##### ##### 44##### C P P T O K E N S ##### 45##### ##### 46############################################################################### 47############################################################################### 48 49# the list of supported C-preprocessor tokens 50# plus a couple of C tokens as well 51tokEOF = "\0" 52tokLN = "\n" 53tokSTRINGIFY = "#" 54tokCONCAT = "##" 55tokLOGICAND = "&&" 56tokLOGICOR = "||" 57tokSHL = "<<" 58tokSHR = ">>" 59tokEQUAL = "==" 60tokNEQUAL = "!=" 61tokLT = "<" 62tokLTE = "<=" 63tokGT = ">" 64tokGTE = ">=" 65tokELLIPSIS = "..." 66tokSPACE = " " 67tokDEFINED = "defined" 68tokLPAREN = "(" 69tokRPAREN = ")" 70tokNOT = "!" 71tokPLUS = "+" 72tokMINUS = "-" 73tokMULTIPLY = "*" 74tokDIVIDE = "/" 75tokMODULUS = "%" 76tokBINAND = "&" 77tokBINOR = "|" 78tokBINXOR = "^" 79tokCOMMA = "," 80tokLBRACE = "{" 81tokRBRACE = "}" 82tokARROW = "->" 83tokINCREMENT = "++" 84tokDECREMENT = "--" 85tokNUMBER = "<number>" 86tokIDENT = "<ident>" 87tokSTRING = "<string>" 88 89 90class Token(clang.cindex.Token): 91 """A class that represents one token after parsing. 92 93 It inherits the class in libclang, with an extra id property to hold the 94 new spelling of the token. The spelling property in the base class is 95 defined as read-only. New names after macro instantiation are saved in 96 their ids now. It also facilitates the renaming of directive optimizations 97 like replacing 'ifndef X' with 'if !defined(X)'. 98 99 It also overrides the cursor property of the base class. Because the one 100 in libclang always queries based on a single token, which usually doesn't 101 hold useful information. The cursor in this class can be set by calling 102 CppTokenizer.getTokensWithCursors(). Otherwise it returns the one in the 103 base class. 104 """ 105 106 def __init__(self, tu=None, group=None, int_data=None, ptr_data=None, 107 cursor=None): 108 clang.cindex.Token.__init__(self) 109 self._id = None 110 self._tu = tu 111 self._group = group 112 self._cursor = cursor 113 # self.int_data and self.ptr_data are from the base class. But 114 # self.int_data doesn't accept a None value. 115 if int_data is not None: 116 self.int_data = int_data 117 self.ptr_data = ptr_data 118 119 @property 120 def id(self): 121 """Name of the token.""" 122 if self._id is None: 123 return self.spelling 124 else: 125 return self._id 126 127 @id.setter 128 def id(self, new_id): 129 """Setting name of the token.""" 130 self._id = new_id 131 132 @property 133 def cursor(self): 134 if self._cursor is None: 135 self._cursor = clang.cindex.Token.cursor 136 return self._cursor 137 138 @cursor.setter 139 def cursor(self, new_cursor): 140 self._cursor = new_cursor 141 142 def __repr__(self): 143 if self.id == 'defined': 144 return self.id 145 elif self.kind == TokenKind.IDENTIFIER: 146 return "(ident %s)" % self.id 147 148 return self.id 149 150 def __str__(self): 151 return self.id 152 153 154class BadExpectedToken(Exception): 155 """An exception that will be raised for unexpected tokens.""" 156 pass 157 158 159# The __contains__ function in libclang SourceRange class contains a bug. It 160# gives wrong result when dealing with single line range. 161# Bug filed with upstream: 162# http://llvm.org/bugs/show_bug.cgi?id=22243, http://reviews.llvm.org/D7277 163def SourceRange__contains__(self, other): 164 """Determine if a given location is inside the range.""" 165 if not isinstance(other, SourceLocation): 166 return False 167 if other.file is None and self.start.file is None: 168 pass 169 elif (self.start.file.name != other.file.name or 170 other.file.name != self.end.file.name): 171 # same file name 172 return False 173 # same file, in between lines 174 if self.start.line < other.line < self.end.line: 175 return True 176 # same file, same line 177 elif self.start.line == other.line == self.end.line: 178 if self.start.column <= other.column <= self.end.column: 179 return True 180 elif self.start.line == other.line: 181 # same file first line 182 if self.start.column <= other.column: 183 return True 184 elif other.line == self.end.line: 185 # same file last line 186 if other.column <= self.end.column: 187 return True 188 return False 189 190 191SourceRange.__contains__ = SourceRange__contains__ 192 193 194################################################################################ 195################################################################################ 196##### ##### 197##### C P P T O K E N I Z E R ##### 198##### ##### 199################################################################################ 200################################################################################ 201 202 203class CppTokenizer(object): 204 """A tokenizer that converts some input text into a list of tokens. 205 206 It calls libclang's tokenizer to get the parsed tokens. In addition, it 207 updates the cursor property in each token after parsing, by calling 208 getTokensWithCursors(). 209 """ 210 211 clang_flags = ['-E', '-x', 'c'] 212 options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD 213 214 def __init__(self): 215 """Initialize a new CppTokenizer object.""" 216 self._indexer = clang.cindex.Index.create() 217 self._tu = None 218 self._index = 0 219 self.tokens = None 220 221 def _getTokensWithCursors(self): 222 """Helper method to return all tokens with their cursors. 223 224 The cursor property in a clang Token doesn't provide enough 225 information. Because it is queried based on single token each time 226 without any context, i.e. via calling conf.lib.clang_annotateTokens() 227 with only one token given. So we often see 'INVALID_FILE' in one 228 token's cursor. In this function it passes all the available tokens 229 to get more informative cursors. 230 """ 231 232 tokens_memory = ctypes.POINTER(clang.cindex.Token)() 233 tokens_count = ctypes.c_uint() 234 235 conf.lib.clang_tokenize(self._tu, self._tu.cursor.extent, 236 ctypes.byref(tokens_memory), 237 ctypes.byref(tokens_count)) 238 239 count = int(tokens_count.value) 240 241 # If we get no tokens, no memory was allocated. Be sure not to return 242 # anything and potentially call a destructor on nothing. 243 if count < 1: 244 return 245 246 cursors = (Cursor * count)() 247 cursors_memory = ctypes.cast(cursors, ctypes.POINTER(Cursor)) 248 249 conf.lib.clang_annotateTokens(self._tu, tokens_memory, count, 250 cursors_memory) 251 252 tokens_array = ctypes.cast( 253 tokens_memory, 254 ctypes.POINTER(clang.cindex.Token * count)).contents 255 token_group = TokenGroup(self._tu, tokens_memory, tokens_count) 256 257 tokens = [] 258 for i in xrange(0, count): 259 token = Token(self._tu, token_group, 260 int_data=tokens_array[i].int_data, 261 ptr_data=tokens_array[i].ptr_data, 262 cursor=cursors[i]) 263 # We only want non-comment tokens. 264 if token.kind != TokenKind.COMMENT: 265 tokens.append(token) 266 267 return tokens 268 269 def parseString(self, lines): 270 """Parse a list of text lines into a BlockList object.""" 271 file_ = 'dummy.c' 272 self._tu = self._indexer.parse(file_, self.clang_flags, 273 unsaved_files=[(file_, lines)], 274 options=self.options) 275 self.tokens = self._getTokensWithCursors() 276 277 def parseFile(self, file_): 278 """Parse a file into a BlockList object.""" 279 self._tu = self._indexer.parse(file_, self.clang_flags, 280 options=self.options) 281 self.tokens = self._getTokensWithCursors() 282 283 def nextToken(self): 284 """Return next token from the list.""" 285 if self._index < len(self.tokens): 286 t = self.tokens[self._index] 287 self._index += 1 288 return t 289 else: 290 return None 291 292 293class CppStringTokenizer(CppTokenizer): 294 """A CppTokenizer derived class that accepts a string of text as input.""" 295 296 def __init__(self, line): 297 CppTokenizer.__init__(self) 298 self.parseString(line) 299 300 301class CppFileTokenizer(CppTokenizer): 302 """A CppTokenizer derived class that accepts a file as input.""" 303 304 def __init__(self, file_): 305 CppTokenizer.__init__(self) 306 self.parseFile(file_) 307 308 309# Unit testing 310# 311class CppTokenizerTester(object): 312 """A class used to test CppTokenizer classes.""" 313 314 def __init__(self, tokenizer=None): 315 self._tokenizer = tokenizer 316 self._token = None 317 318 def setTokenizer(self, tokenizer): 319 self._tokenizer = tokenizer 320 321 def expect(self, id): 322 self._token = self._tokenizer.nextToken() 323 if self._token is None: 324 tokid = '' 325 else: 326 tokid = self._token.id 327 if tokid == id: 328 return 329 raise BadExpectedToken("### BAD TOKEN: '%s' expecting '%s'" % ( 330 tokid, id)) 331 332 def expectToken(self, id, line, col): 333 self.expect(id) 334 if self._token.location.line != line: 335 raise BadExpectedToken( 336 "### BAD LINENO: token '%s' got '%d' expecting '%d'" % ( 337 id, self._token.lineno, line)) 338 if self._token.location.column != col: 339 raise BadExpectedToken("### BAD COLNO: '%d' expecting '%d'" % ( 340 self._token.colno, col)) 341 342 def expectTokens(self, tokens): 343 for id, line, col in tokens: 344 self.expectToken(id, line, col) 345 346 def expectList(self, list_): 347 for item in list_: 348 self.expect(item) 349 350 351def test_CppTokenizer(): 352 tester = CppTokenizerTester() 353 354 tester.setTokenizer(CppStringTokenizer("#an/example && (01923_xy)")) 355 tester.expectList(["#", "an", "/", "example", tokLOGICAND, tokLPAREN, 356 "01923_xy", tokRPAREN]) 357 358 tester.setTokenizer(CppStringTokenizer("FOO(BAR) && defined(BAZ)")) 359 tester.expectList(["FOO", tokLPAREN, "BAR", tokRPAREN, tokLOGICAND, 360 "defined", tokLPAREN, "BAZ", tokRPAREN]) 361 362 tester.setTokenizer(CppStringTokenizer("/*\n#\n*/")) 363 tester.expectList([]) 364 365 tester.setTokenizer(CppStringTokenizer("first\nsecond")) 366 tester.expectList(["first", "second"]) 367 368 tester.setTokenizer(CppStringTokenizer("first second\n third")) 369 tester.expectTokens([("first", 1, 1), 370 ("second", 1, 7), 371 ("third", 2, 3)]) 372 373 tester.setTokenizer(CppStringTokenizer("boo /* what the\nhell */")) 374 tester.expectTokens([("boo", 1, 1)]) 375 376 tester.setTokenizer(CppStringTokenizer("an \\\n example")) 377 tester.expectTokens([("an", 1, 1), 378 ("example", 2, 2)]) 379 return True 380 381 382################################################################################ 383################################################################################ 384##### ##### 385##### C P P E X P R E S S I O N S ##### 386##### ##### 387################################################################################ 388################################################################################ 389 390 391class CppExpr(object): 392 """A class that models the condition of #if directives into an expr tree. 393 394 Each node in the tree is of the form (op, arg) or (op, arg1, arg2) where 395 "op" is a string describing the operation 396 """ 397 398 unaries = ["!", "~"] 399 binaries = ["+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", 400 "&", "|", "^", "<<", ">>", "==", "!=", "?", ":"] 401 precedences = { 402 "?": 1, ":": 1, 403 "||": 2, 404 "&&": 3, 405 "|": 4, 406 "^": 5, 407 "&": 6, 408 "==": 7, "!=": 7, 409 "<": 8, "<=": 8, ">": 8, ">=": 8, 410 "<<": 9, ">>": 9, 411 "+": 10, "-": 10, 412 "*": 11, "/": 11, "%": 11, 413 "!": 12, "~": 12 414 } 415 416 def __init__(self, tokens): 417 """Initialize a CppExpr. 'tokens' must be a CppToken list.""" 418 self.tokens = tokens 419 self._num_tokens = len(tokens) 420 self._index = 0 421 422 if debugCppExpr: 423 print "CppExpr: trying to parse %s" % repr(tokens) 424 self.expr = self.parseExpression(0) 425 if debugCppExpr: 426 print "CppExpr: got " + repr(self.expr) 427 if self._index != self._num_tokens: 428 self.throw(BadExpectedToken, "crap at end of input (%d != %d): %s" 429 % (self._index, self._num_tokens, repr(tokens))) 430 431 def throw(self, exception, msg): 432 if self._index < self._num_tokens: 433 tok = self.tokens[self._index] 434 print "%d:%d: %s" % (tok.location.line, tok.location.column, msg) 435 else: 436 print "EOF: %s" % msg 437 raise exception(msg) 438 439 def expectId(self, id): 440 """Check that a given token id is at the current position.""" 441 token = self.tokens[self._index] 442 if self._index >= self._num_tokens or token.id != id: 443 self.throw(BadExpectedToken, 444 "### expecting '%s' in expression, got '%s'" % ( 445 id, token.id)) 446 self._index += 1 447 448 def is_decimal(self): 449 token = self.tokens[self._index].id 450 if token[-1] in "ULul": 451 token = token[:-1] 452 try: 453 val = int(token, 10) 454 self._index += 1 455 return ('int', val) 456 except ValueError: 457 return None 458 459 def is_octal(self): 460 token = self.tokens[self._index].id 461 if token[-1] in "ULul": 462 token = token[:-1] 463 if len(token) < 2 or token[0] != '0': 464 return None 465 try: 466 val = int(token, 8) 467 self._index += 1 468 return ('oct', val) 469 except ValueError: 470 return None 471 472 def is_hexadecimal(self): 473 token = self.tokens[self._index].id 474 if token[-1] in "ULul": 475 token = token[:-1] 476 if len(token) < 3 or (token[:2] != '0x' and token[:2] != '0X'): 477 return None 478 try: 479 val = int(token, 16) 480 self._index += 1 481 return ('hex', val) 482 except ValueError: 483 return None 484 485 def is_integer(self): 486 if self.tokens[self._index].kind != TokenKind.LITERAL: 487 return None 488 489 c = self.is_hexadecimal() 490 if c: 491 return c 492 493 c = self.is_octal() 494 if c: 495 return c 496 497 c = self.is_decimal() 498 if c: 499 return c 500 501 return None 502 503 def is_number(self): 504 t = self.tokens[self._index] 505 if t.id == tokMINUS and self._index + 1 < self._num_tokens: 506 self._index += 1 507 c = self.is_integer() 508 if c: 509 op, val = c 510 return (op, -val) 511 if t.id == tokPLUS and self._index + 1 < self._num_tokens: 512 self._index += 1 513 c = self.is_integer() 514 if c: 515 return c 516 517 return self.is_integer() 518 519 def is_defined(self): 520 t = self.tokens[self._index] 521 if t.id != tokDEFINED: 522 return None 523 524 # We have the defined keyword, check the rest. 525 self._index += 1 526 used_parens = False 527 if (self._index < self._num_tokens and 528 self.tokens[self._index].id == tokLPAREN): 529 used_parens = True 530 self._index += 1 531 532 if self._index >= self._num_tokens: 533 self.throw(BadExpectedToken, 534 "### 'defined' must be followed by macro name or left " 535 "paren") 536 537 t = self.tokens[self._index] 538 if t.kind != TokenKind.IDENTIFIER: 539 self.throw(BadExpectedToken, 540 "### 'defined' must be followed by macro name") 541 542 self._index += 1 543 if used_parens: 544 self.expectId(tokRPAREN) 545 546 return ("defined", t.id) 547 548 def is_call_or_ident(self): 549 if self._index >= self._num_tokens: 550 return None 551 552 t = self.tokens[self._index] 553 if t.kind != TokenKind.IDENTIFIER: 554 return None 555 556 name = t.id 557 558 self._index += 1 559 if (self._index >= self._num_tokens or 560 self.tokens[self._index].id != tokLPAREN): 561 return ("ident", name) 562 563 params = [] 564 depth = 1 565 self._index += 1 566 j = self._index 567 while self._index < self._num_tokens: 568 id = self.tokens[self._index].id 569 if id == tokLPAREN: 570 depth += 1 571 elif depth == 1 and (id == tokCOMMA or id == tokRPAREN): 572 k = self._index 573 param = self.tokens[j:k] 574 params.append(param) 575 if id == tokRPAREN: 576 break 577 j = self._index + 1 578 elif id == tokRPAREN: 579 depth -= 1 580 self._index += 1 581 582 if self._index >= self._num_tokens: 583 return None 584 585 self._index += 1 586 return ("call", (name, params)) 587 588 # Implements the "precedence climbing" algorithm from 589 # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm. 590 # The "classic" algorithm would be fine if we were using a tool to 591 # generate the parser, but we're not. Dijkstra's "shunting yard" 592 # algorithm hasn't been necessary yet. 593 594 def parseExpression(self, minPrecedence): 595 if self._index >= self._num_tokens: 596 return None 597 598 node = self.parsePrimary() 599 while (self.token() and self.isBinary(self.token()) and 600 self.precedence(self.token()) >= minPrecedence): 601 op = self.token() 602 self.nextToken() 603 rhs = self.parseExpression(self.precedence(op) + 1) 604 node = (op.id, node, rhs) 605 606 return node 607 608 def parsePrimary(self): 609 op = self.token() 610 if self.isUnary(op): 611 self.nextToken() 612 return (op.id, self.parseExpression(self.precedence(op))) 613 614 primary = None 615 if op.id == tokLPAREN: 616 self.nextToken() 617 primary = self.parseExpression(0) 618 self.expectId(tokRPAREN) 619 elif op.id == "?": 620 self.nextToken() 621 primary = self.parseExpression(0) 622 self.expectId(":") 623 elif op.id == '+' or op.id == '-' or op.kind == TokenKind.LITERAL: 624 primary = self.is_number() 625 # Checking for 'defined' needs to come first now because 'defined' is 626 # recognized as IDENTIFIER. 627 elif op.id == tokDEFINED: 628 primary = self.is_defined() 629 elif op.kind == TokenKind.IDENTIFIER: 630 primary = self.is_call_or_ident() 631 else: 632 self.throw(BadExpectedToken, 633 "didn't expect to see a %s in factor" % ( 634 self.tokens[self._index].id)) 635 return primary 636 637 def isBinary(self, token): 638 return token.id in self.binaries 639 640 def isUnary(self, token): 641 return token.id in self.unaries 642 643 def precedence(self, token): 644 return self.precedences.get(token.id) 645 646 def token(self): 647 if self._index >= self._num_tokens: 648 return None 649 return self.tokens[self._index] 650 651 def nextToken(self): 652 self._index += 1 653 if self._index >= self._num_tokens: 654 return None 655 return self.tokens[self._index] 656 657 def dump_node(self, e): 658 op = e[0] 659 line = "(" + op 660 if op == "int": 661 line += " %d)" % e[1] 662 elif op == "oct": 663 line += " 0%o)" % e[1] 664 elif op == "hex": 665 line += " 0x%x)" % e[1] 666 elif op == "ident": 667 line += " %s)" % e[1] 668 elif op == "defined": 669 line += " %s)" % e[1] 670 elif op == "call": 671 arg = e[1] 672 line += " %s [" % arg[0] 673 prefix = "" 674 for param in arg[1]: 675 par = "" 676 for tok in param: 677 par += str(tok) 678 line += "%s%s" % (prefix, par) 679 prefix = "," 680 line += "])" 681 elif op in CppExpr.unaries: 682 line += " %s)" % self.dump_node(e[1]) 683 elif op in CppExpr.binaries: 684 line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2])) 685 else: 686 line += " ?%s)" % repr(e[1]) 687 688 return line 689 690 def __repr__(self): 691 return self.dump_node(self.expr) 692 693 def source_node(self, e): 694 op = e[0] 695 if op == "int": 696 return "%d" % e[1] 697 if op == "hex": 698 return "0x%x" % e[1] 699 if op == "oct": 700 return "0%o" % e[1] 701 if op == "ident": 702 # XXX: should try to expand 703 return e[1] 704 if op == "defined": 705 return "defined(%s)" % e[1] 706 707 prec = CppExpr.precedences.get(op, 1000) 708 arg = e[1] 709 if op in CppExpr.unaries: 710 arg_src = self.source_node(arg) 711 arg_op = arg[0] 712 arg_prec = CppExpr.precedences.get(arg_op, 1000) 713 if arg_prec < prec: 714 return "!(" + arg_src + ")" 715 else: 716 return "!" + arg_src 717 if op in CppExpr.binaries: 718 arg2 = e[2] 719 arg1_op = arg[0] 720 arg2_op = arg2[0] 721 arg1_src = self.source_node(arg) 722 arg2_src = self.source_node(arg2) 723 if CppExpr.precedences.get(arg1_op, 1000) < prec: 724 arg1_src = "(%s)" % arg1_src 725 if CppExpr.precedences.get(arg2_op, 1000) < prec: 726 arg2_src = "(%s)" % arg2_src 727 728 return "%s %s %s" % (arg1_src, op, arg2_src) 729 return "???" 730 731 def __str__(self): 732 return self.source_node(self.expr) 733 734 @staticmethod 735 def int_node(e): 736 if e[0] in ["int", "oct", "hex"]: 737 return e[1] 738 else: 739 return None 740 741 def toInt(self): 742 return self.int_node(self.expr) 743 744 def optimize_node(self, e, macros=None): 745 if macros is None: 746 macros = {} 747 op = e[0] 748 749 if op == "defined": 750 op, name = e 751 if macros.has_key(name): 752 if macros[name] == kCppUndefinedMacro: 753 return ("int", 0) 754 else: 755 try: 756 value = int(macros[name]) 757 return ("int", value) 758 except ValueError: 759 return ("defined", macros[name]) 760 761 if kernel_remove_config_macros and name.startswith("CONFIG_"): 762 return ("int", 0) 763 764 return e 765 766 elif op == "ident": 767 op, name = e 768 if macros.has_key(name): 769 try: 770 value = int(macros[name]) 771 expanded = ("int", value) 772 except ValueError: 773 expanded = ("ident", macros[name]) 774 return self.optimize_node(expanded, macros) 775 return e 776 777 elif op == "!": 778 op, v = e 779 v = self.optimize_node(v, macros) 780 if v[0] == "int": 781 if v[1] == 0: 782 return ("int", 1) 783 else: 784 return ("int", 0) 785 return ('!', v) 786 787 elif op == "&&": 788 op, l, r = e 789 l = self.optimize_node(l, macros) 790 r = self.optimize_node(r, macros) 791 li = self.int_node(l) 792 ri = self.int_node(r) 793 if li is not None: 794 if li == 0: 795 return ("int", 0) 796 else: 797 return r 798 elif ri is not None: 799 if ri == 0: 800 return ("int", 0) 801 else: 802 return l 803 return (op, l, r) 804 805 elif op == "||": 806 op, l, r = e 807 l = self.optimize_node(l, macros) 808 r = self.optimize_node(r, macros) 809 li = self.int_node(l) 810 ri = self.int_node(r) 811 if li is not None: 812 if li == 0: 813 return r 814 else: 815 return ("int", 1) 816 elif ri is not None: 817 if ri == 0: 818 return l 819 else: 820 return ("int", 1) 821 return (op, l, r) 822 823 else: 824 return e 825 826 def optimize(self, macros=None): 827 if macros is None: 828 macros = {} 829 self.expr = self.optimize_node(self.expr, macros) 830 831 832def test_cpp_expr(expr, expected): 833 e = CppExpr(CppStringTokenizer(expr).tokens) 834 s1 = repr(e) 835 if s1 != expected: 836 print ("[FAIL]: expression '%s' generates '%s', should be " 837 "'%s'" % (expr, s1, expected)) 838 global failure_count 839 failure_count += 1 840 841 842def test_cpp_expr_optim(expr, expected, macros=None): 843 if macros is None: 844 macros = {} 845 e = CppExpr(CppStringTokenizer(expr).tokens) 846 e.optimize(macros) 847 s1 = repr(e) 848 if s1 != expected: 849 print ("[FAIL]: optimized expression '%s' generates '%s' with " 850 "macros %s, should be '%s'" % (expr, s1, macros, expected)) 851 global failure_count 852 failure_count += 1 853 854 855def test_cpp_expr_source(expr, expected): 856 e = CppExpr(CppStringTokenizer(expr).tokens) 857 s1 = str(e) 858 if s1 != expected: 859 print ("[FAIL]: source expression '%s' generates '%s', should " 860 "be '%s'" % (expr, s1, expected)) 861 global failure_count 862 failure_count += 1 863 864 865def test_CppExpr(): 866 test_cpp_expr("0", "(int 0)") 867 test_cpp_expr("1", "(int 1)") 868 test_cpp_expr("-5", "(int -5)") 869 test_cpp_expr("+1", "(int 1)") 870 test_cpp_expr("0U", "(int 0)") 871 test_cpp_expr("015", "(oct 015)") 872 test_cpp_expr("015l", "(oct 015)") 873 test_cpp_expr("0x3e", "(hex 0x3e)") 874 test_cpp_expr("(0)", "(int 0)") 875 test_cpp_expr("1 && 1", "(&& (int 1) (int 1))") 876 test_cpp_expr("1 && 0", "(&& (int 1) (int 0))") 877 test_cpp_expr("EXAMPLE", "(ident EXAMPLE)") 878 test_cpp_expr("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))") 879 test_cpp_expr("defined(EXAMPLE)", "(defined EXAMPLE)") 880 test_cpp_expr("defined ( EXAMPLE ) ", "(defined EXAMPLE)") 881 test_cpp_expr("!defined(EXAMPLE)", "(! (defined EXAMPLE))") 882 test_cpp_expr("defined(ABC) || defined(BINGO)", 883 "(|| (defined ABC) (defined BINGO))") 884 test_cpp_expr("FOO(BAR,5)", "(call FOO [BAR,5])") 885 test_cpp_expr("A == 1 || defined(B)", 886 "(|| (== (ident A) (int 1)) (defined B))") 887 888 test_cpp_expr_optim("0", "(int 0)") 889 test_cpp_expr_optim("1", "(int 1)") 890 test_cpp_expr_optim("1 && 1", "(int 1)") 891 test_cpp_expr_optim("1 && +1", "(int 1)") 892 test_cpp_expr_optim("0x1 && 01", "(oct 01)") 893 test_cpp_expr_optim("1 && 0", "(int 0)") 894 test_cpp_expr_optim("0 && 1", "(int 0)") 895 test_cpp_expr_optim("0 && 0", "(int 0)") 896 test_cpp_expr_optim("1 || 1", "(int 1)") 897 test_cpp_expr_optim("1 || 0", "(int 1)") 898 test_cpp_expr_optim("0 || 1", "(int 1)") 899 test_cpp_expr_optim("0 || 0", "(int 0)") 900 test_cpp_expr_optim("A", "(ident A)") 901 test_cpp_expr_optim("A", "(int 1)", {"A": 1}) 902 test_cpp_expr_optim("A || B", "(int 1)", {"A": 1}) 903 test_cpp_expr_optim("A || B", "(int 1)", {"B": 1}) 904 test_cpp_expr_optim("A && B", "(ident B)", {"A": 1}) 905 test_cpp_expr_optim("A && B", "(ident A)", {"B": 1}) 906 test_cpp_expr_optim("A && B", "(&& (ident A) (ident B))") 907 test_cpp_expr_optim("EXAMPLE", "(ident EXAMPLE)") 908 test_cpp_expr_optim("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))") 909 test_cpp_expr_optim("defined(EXAMPLE)", "(defined EXAMPLE)") 910 test_cpp_expr_optim("defined(EXAMPLE)", "(defined XOWOE)", 911 {"EXAMPLE": "XOWOE"}) 912 test_cpp_expr_optim("defined(EXAMPLE)", "(int 0)", 913 {"EXAMPLE": kCppUndefinedMacro}) 914 test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined EXAMPLE))") 915 test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined XOWOE))", 916 {"EXAMPLE": "XOWOE"}) 917 test_cpp_expr_optim("!defined(EXAMPLE)", "(int 1)", 918 {"EXAMPLE": kCppUndefinedMacro}) 919 test_cpp_expr_optim("defined(A) || defined(B)", 920 "(|| (defined A) (defined B))") 921 test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"A": "1"}) 922 test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", {"B": "1"}) 923 test_cpp_expr_optim("defined(A) || defined(B)", "(defined A)", 924 {"B": kCppUndefinedMacro}) 925 test_cpp_expr_optim("defined(A) || defined(B)", "(int 0)", 926 {"A": kCppUndefinedMacro, "B": kCppUndefinedMacro}) 927 test_cpp_expr_optim("defined(A) && defined(B)", 928 "(&& (defined A) (defined B))") 929 test_cpp_expr_optim("defined(A) && defined(B)", 930 "(defined B)", {"A": "1"}) 931 test_cpp_expr_optim("defined(A) && defined(B)", 932 "(defined A)", {"B": "1"}) 933 test_cpp_expr_optim("defined(A) && defined(B)", "(int 0)", 934 {"B": kCppUndefinedMacro}) 935 test_cpp_expr_optim("defined(A) && defined(B)", 936 "(int 0)", {"A": kCppUndefinedMacro}) 937 test_cpp_expr_optim("A == 1 || defined(B)", 938 "(|| (== (ident A) (int 1)) (defined B))") 939 test_cpp_expr_optim( 940 "defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)", 941 "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))", 942 {"__KERNEL__": kCppUndefinedMacro}) 943 944 test_cpp_expr_source("0", "0") 945 test_cpp_expr_source("1", "1") 946 test_cpp_expr_source("1 && 1", "1 && 1") 947 test_cpp_expr_source("1 && 0", "1 && 0") 948 test_cpp_expr_source("0 && 1", "0 && 1") 949 test_cpp_expr_source("0 && 0", "0 && 0") 950 test_cpp_expr_source("1 || 1", "1 || 1") 951 test_cpp_expr_source("1 || 0", "1 || 0") 952 test_cpp_expr_source("0 || 1", "0 || 1") 953 test_cpp_expr_source("0 || 0", "0 || 0") 954 test_cpp_expr_source("EXAMPLE", "EXAMPLE") 955 test_cpp_expr_source("EXAMPLE - 3", "EXAMPLE - 3") 956 test_cpp_expr_source("defined(EXAMPLE)", "defined(EXAMPLE)") 957 test_cpp_expr_source("defined EXAMPLE", "defined(EXAMPLE)") 958 test_cpp_expr_source("A == 1 || defined(B)", "A == 1 || defined(B)") 959 960 961################################################################################ 962################################################################################ 963##### ##### 964##### C P P B L O C K ##### 965##### ##### 966################################################################################ 967################################################################################ 968 969 970class Block(object): 971 """A class used to model a block of input source text. 972 973 There are two block types: 974 - directive blocks: contain the tokens of a single pre-processor 975 directive (e.g. #if) 976 - text blocks, contain the tokens of non-directive blocks 977 978 The cpp parser class below will transform an input source file into a list 979 of Block objects (grouped in a BlockList object for convenience) 980 """ 981 982 def __init__(self, tokens, directive=None, lineno=0, identifier=None): 983 """Initialize a new block, if 'directive' is None, it is a text block. 984 985 NOTE: This automatically converts '#ifdef MACRO' into 986 '#if defined(MACRO)' and '#ifndef MACRO' into '#if !defined(MACRO)'. 987 """ 988 989 if directive == "ifdef": 990 tok = Token() 991 tok.id = tokDEFINED 992 tokens = [tok] + tokens 993 directive = "if" 994 995 elif directive == "ifndef": 996 tok1 = Token() 997 tok2 = Token() 998 tok1.id = tokNOT 999 tok2.id = tokDEFINED 1000 tokens = [tok1, tok2] + tokens 1001 directive = "if" 1002 1003 self.tokens = tokens 1004 self.directive = directive 1005 self.define_id = identifier 1006 if lineno > 0: 1007 self.lineno = lineno 1008 else: 1009 self.lineno = self.tokens[0].location.line 1010 1011 if self.isIf(): 1012 self.expr = CppExpr(self.tokens) 1013 1014 def isDirective(self): 1015 """Return True iff this is a directive block.""" 1016 return self.directive is not None 1017 1018 def isConditional(self): 1019 """Return True iff this is a conditional directive block.""" 1020 return self.directive in ["if", "ifdef", "ifndef", "else", "elif", 1021 "endif"] 1022 1023 def isDefine(self): 1024 """Return the macro name in a #define directive, or None otherwise.""" 1025 if self.directive != "define": 1026 return None 1027 return self.define_id 1028 1029 def isIf(self): 1030 """Return True iff this is an #if-like directive block.""" 1031 return self.directive in ["if", "ifdef", "ifndef", "elif"] 1032 1033 def isEndif(self): 1034 """Return True iff this is an #endif directive block.""" 1035 return self.directive == "endif" 1036 1037 def isInclude(self): 1038 """Check whether this is a #include directive. 1039 1040 If true, returns the corresponding file name (with brackets or 1041 double-qoutes). None otherwise. 1042 """ 1043 1044 if self.directive != "include": 1045 return None 1046 return ''.join([str(x) for x in self.tokens]) 1047 1048 @staticmethod 1049 def format_blocks(tokens, indent=0): 1050 """Return the formatted lines of strings with proper indentation.""" 1051 newline = True 1052 result = [] 1053 buf = '' 1054 i = 0 1055 while i < len(tokens): 1056 t = tokens[i] 1057 if t.id == '{': 1058 buf += ' {' 1059 result.append(strip_space(buf)) 1060 indent += 2 1061 buf = '' 1062 newline = True 1063 elif t.id == '}': 1064 indent -= 2 1065 if not newline: 1066 result.append(strip_space(buf)) 1067 # Look ahead to determine if it's the end of line. 1068 if (i + 1 < len(tokens) and 1069 (tokens[i+1].id == ';' or 1070 tokens[i+1].id in ['else', '__attribute__', 1071 '__attribute', '__packed'] or 1072 tokens[i+1].kind == TokenKind.IDENTIFIER)): 1073 buf = ' ' * indent + '}' 1074 newline = False 1075 else: 1076 result.append(' ' * indent + '}') 1077 buf = '' 1078 newline = True 1079 elif t.id == ';': 1080 result.append(strip_space(buf) + ';') 1081 buf = '' 1082 newline = True 1083 # We prefer a new line for each constant in enum. 1084 elif t.id == ',' and t.cursor.kind == CursorKind.ENUM_DECL: 1085 result.append(strip_space(buf) + ',') 1086 buf = '' 1087 newline = True 1088 else: 1089 if newline: 1090 buf += ' ' * indent + str(t) 1091 else: 1092 buf += ' ' + str(t) 1093 newline = False 1094 i += 1 1095 1096 if buf: 1097 result.append(strip_space(buf)) 1098 1099 return result, indent 1100 1101 def writeWithWarning(self, out, warning, left_count, repeat_count, indent): 1102 """Dump the current block with warnings.""" 1103 # removeWhiteSpace() will sometimes creates non-directive blocks 1104 # without any tokens. These come from blocks that only contained 1105 # empty lines and spaces. They should not be printed in the final 1106 # output, and then should not be counted for this operation. 1107 # 1108 if self.directive is None and not self.tokens: 1109 return left_count, indent 1110 1111 if self.directive: 1112 out.write(str(self) + '\n') 1113 left_count -= 1 1114 if left_count == 0: 1115 out.write(warning) 1116 left_count = repeat_count 1117 1118 else: 1119 lines, indent = self.format_blocks(self.tokens, indent) 1120 for line in lines: 1121 out.write(line + '\n') 1122 left_count -= 1 1123 if left_count == 0: 1124 out.write(warning) 1125 left_count = repeat_count 1126 1127 return left_count, indent 1128 1129 def __repr__(self): 1130 """Generate the representation of a given block.""" 1131 if self.directive: 1132 result = "#%s " % self.directive 1133 if self.isIf(): 1134 result += repr(self.expr) 1135 else: 1136 for tok in self.tokens: 1137 result += repr(tok) 1138 else: 1139 result = "" 1140 for tok in self.tokens: 1141 result += repr(tok) 1142 1143 return result 1144 1145 def __str__(self): 1146 """Generate the string representation of a given block.""" 1147 if self.directive: 1148 # "#if" 1149 if self.directive == "if": 1150 # small optimization to re-generate #ifdef and #ifndef 1151 e = self.expr.expr 1152 op = e[0] 1153 if op == "defined": 1154 result = "#ifdef %s" % e[1] 1155 elif op == "!" and e[1][0] == "defined": 1156 result = "#ifndef %s" % e[1][1] 1157 else: 1158 result = "#if " + str(self.expr) 1159 1160 # "#define" 1161 elif self.isDefine(): 1162 result = "#%s %s" % (self.directive, self.define_id) 1163 if self.tokens: 1164 result += " " 1165 expr = strip_space(' '.join([tok.id for tok in self.tokens])) 1166 # remove the space between name and '(' in function call 1167 result += re.sub(r'(\w+) \(', r'\1(', expr) 1168 1169 # "#error" 1170 # Concatenating tokens with a space separator, because they may 1171 # not be quoted and broken into several tokens 1172 elif self.directive == "error": 1173 result = "#error %s" % ' '.join([tok.id for tok in self.tokens]) 1174 1175 else: 1176 result = "#%s" % self.directive 1177 if self.tokens: 1178 result += " " 1179 result += ''.join([tok.id for tok in self.tokens]) 1180 else: 1181 lines, _ = self.format_blocks(self.tokens) 1182 result = '\n'.join(lines) 1183 1184 return result 1185 1186 1187class BlockList(object): 1188 """A convenience class used to hold and process a list of blocks. 1189 1190 It calls the cpp parser to get the blocks. 1191 """ 1192 1193 def __init__(self, blocks): 1194 self.blocks = blocks 1195 1196 def __len__(self): 1197 return len(self.blocks) 1198 1199 def __getitem__(self, n): 1200 return self.blocks[n] 1201 1202 def __repr__(self): 1203 return repr(self.blocks) 1204 1205 def __str__(self): 1206 result = '\n'.join([str(b) for b in self.blocks]) 1207 return result 1208 1209 def dump(self): 1210 """Dump all the blocks in current BlockList.""" 1211 print '##### BEGIN #####' 1212 for i, b in enumerate(self.blocks): 1213 print '### BLOCK %d ###' % i 1214 print b 1215 print '##### END #####' 1216 1217 def optimizeIf01(self): 1218 """Remove the code between #if 0 .. #endif in a BlockList.""" 1219 self.blocks = optimize_if01(self.blocks) 1220 1221 def optimizeMacros(self, macros): 1222 """Remove known defined and undefined macros from a BlockList.""" 1223 for b in self.blocks: 1224 if b.isIf(): 1225 b.expr.optimize(macros) 1226 1227 def removeMacroDefines(self, macros): 1228 """Remove known macro definitions from a BlockList.""" 1229 self.blocks = remove_macro_defines(self.blocks, macros) 1230 1231 def optimizeAll(self, macros): 1232 self.optimizeMacros(macros) 1233 self.optimizeIf01() 1234 return 1235 1236 def findIncludes(self): 1237 """Return the list of included files in a BlockList.""" 1238 result = [] 1239 for b in self.blocks: 1240 i = b.isInclude() 1241 if i: 1242 result.append(i) 1243 return result 1244 1245 def write(self, out): 1246 out.write(str(self)) 1247 1248 def writeWithWarning(self, out, warning, repeat_count): 1249 left_count = repeat_count 1250 indent = 0 1251 for b in self.blocks: 1252 left_count, indent = b.writeWithWarning(out, warning, left_count, 1253 repeat_count, indent) 1254 1255 def removeVarsAndFuncs(self, knownStatics=None): 1256 """Remove variable and function declarations. 1257 1258 All extern and static declarations corresponding to variable and 1259 function declarations are removed. We only accept typedefs and 1260 enum/structs/union declarations. 1261 1262 However, we keep the definitions corresponding to the set of known 1263 static inline functions in the set 'knownStatics', which is useful 1264 for optimized byteorder swap functions and stuff like that. 1265 """ 1266 1267 # NOTE: It's also removing function-like macros, such as __SYSCALL(...) 1268 # in uapi/asm-generic/unistd.h, or KEY_FIELD(...) in linux/bcache.h. 1269 # It could be problematic when we have function-like macros but without 1270 # '}' following them. It will skip all the tokens/blocks until seeing a 1271 # '}' as the function end. Fortunately we don't have such cases in the 1272 # current kernel headers. 1273 1274 # state = 0 => normal (i.e. LN + spaces) 1275 # state = 1 => typedef/struct encountered, ends with ";" 1276 # state = 2 => var declaration encountered, ends with ";" 1277 # state = 3 => func declaration encountered, ends with "}" 1278 1279 if knownStatics is None: 1280 knownStatics = set() 1281 state = 0 1282 depth = 0 1283 blocks2 = [] 1284 skipTokens = False 1285 for b in self.blocks: 1286 if b.isDirective(): 1287 blocks2.append(b) 1288 else: 1289 n = len(b.tokens) 1290 i = 0 1291 if skipTokens: 1292 first = n 1293 else: 1294 first = 0 1295 while i < n: 1296 tok = b.tokens[i] 1297 tokid = tok.id 1298 # If we are not looking for the start of a new 1299 # type/var/func, then skip over tokens until 1300 # we find our terminator, managing the depth of 1301 # accolades as we go. 1302 if state > 0: 1303 terminator = False 1304 if tokid == '{': 1305 depth += 1 1306 elif tokid == '}': 1307 if depth > 0: 1308 depth -= 1 1309 if (depth == 0) and (state == 3): 1310 terminator = True 1311 elif tokid == ';' and depth == 0: 1312 terminator = True 1313 1314 if terminator: 1315 # we found the terminator 1316 state = 0 1317 if skipTokens: 1318 skipTokens = False 1319 first = i + 1 1320 1321 i += 1 1322 continue 1323 1324 # Is it a new type definition, then start recording it 1325 if tok.id in ['struct', 'typedef', 'enum', 'union', 1326 '__extension__']: 1327 state = 1 1328 i += 1 1329 continue 1330 1331 # Is it a variable or function definition. If so, first 1332 # try to determine which type it is, and also extract 1333 # its name. 1334 # 1335 # We're going to parse the next tokens of the same block 1336 # until we find a semi-column or a left parenthesis. 1337 # 1338 # The semi-column corresponds to a variable definition, 1339 # the left-parenthesis to a function definition. 1340 # 1341 # We also assume that the var/func name is the last 1342 # identifier before the terminator. 1343 # 1344 j = i + 1 1345 ident = "" 1346 while j < n: 1347 tokid = b.tokens[j].id 1348 if tokid == '(': # a function declaration 1349 state = 3 1350 break 1351 elif tokid == ';': # a variable declaration 1352 state = 2 1353 break 1354 if b.tokens[j].kind == TokenKind.IDENTIFIER: 1355 ident = b.tokens[j].id 1356 j += 1 1357 1358 if j >= n: 1359 # This can only happen when the declaration 1360 # does not end on the current block (e.g. with 1361 # a directive mixed inside it. 1362 # 1363 # We will treat it as malformed because 1364 # it's very hard to recover from this case 1365 # without making our parser much more 1366 # complex. 1367 # 1368 logging.debug("### skip unterminated static '%s'", 1369 ident) 1370 break 1371 1372 if ident in knownStatics: 1373 logging.debug("### keep var/func '%s': %s", ident, 1374 repr(b.tokens[i:j])) 1375 else: 1376 # We're going to skip the tokens for this declaration 1377 logging.debug("### skip var/func '%s': %s", ident, 1378 repr(b.tokens[i:j])) 1379 if i > first: 1380 blocks2.append(Block(b.tokens[first:i])) 1381 skipTokens = True 1382 first = n 1383 1384 i += 1 1385 1386 if i > first: 1387 # print "### final '%s'" % repr(b.tokens[first:i]) 1388 blocks2.append(Block(b.tokens[first:i])) 1389 1390 self.blocks = blocks2 1391 1392 def replaceTokens(self, replacements): 1393 """Replace tokens according to the given dict.""" 1394 for b in self.blocks: 1395 made_change = False 1396 if b.isInclude() is None: 1397 for tok in b.tokens: 1398 if tok.kind == TokenKind.IDENTIFIER: 1399 if tok.id in replacements: 1400 tok.id = replacements[tok.id] 1401 made_change = True 1402 1403 if b.isDefine() and b.define_id in replacements: 1404 b.define_id = replacements[b.define_id] 1405 made_change = True 1406 1407 if made_change and b.isIf(): 1408 # Keep 'expr' in sync with 'tokens'. 1409 b.expr = CppExpr(b.tokens) 1410 1411 1412def strip_space(s): 1413 """Strip out redundant space in a given string.""" 1414 1415 # NOTE: It ought to be more clever to not destroy spaces in string tokens. 1416 replacements = {' . ': '.', 1417 ' [': '[', 1418 '[ ': '[', 1419 ' ]': ']', 1420 '( ': '(', 1421 ' )': ')', 1422 ' ,': ',', 1423 '# ': '#', 1424 ' ;': ';', 1425 '~ ': '~', 1426 ' -> ': '->'} 1427 result = s 1428 for r in replacements: 1429 result = result.replace(r, replacements[r]) 1430 1431 # Remove the space between function name and the parenthesis. 1432 result = re.sub(r'(\w+) \(', r'\1(', result) 1433 return result 1434 1435 1436class BlockParser(object): 1437 """A class that converts an input source file into a BlockList object.""" 1438 1439 def __init__(self, tokzer=None): 1440 """Initialize a block parser. 1441 1442 The input source is provided through a Tokenizer object. 1443 """ 1444 self._tokzer = tokzer 1445 self._parsed = False 1446 1447 @property 1448 def parsed(self): 1449 return self._parsed 1450 1451 @staticmethod 1452 def _short_extent(extent): 1453 return '%d:%d - %d:%d' % (extent.start.line, extent.start.column, 1454 extent.end.line, extent.end.column) 1455 1456 def getBlocks(self, tokzer=None): 1457 """Return all the blocks parsed.""" 1458 1459 def consume_extent(i, tokens, extent=None, detect_change=False): 1460 """Return tokens that belong to the given extent. 1461 1462 It parses all the tokens that follow tokens[i], until getting out 1463 of the extent. When detect_change is True, it may terminate early 1464 when detecting preprocessing directives inside the extent. 1465 """ 1466 1467 result = [] 1468 if extent is None: 1469 extent = tokens[i].cursor.extent 1470 1471 while i < len(tokens) and tokens[i].location in extent: 1472 t = tokens[i] 1473 if debugBlockParser: 1474 print ' ' * 2, t.id, t.kind, t.cursor.kind 1475 if (detect_change and t.cursor.extent != extent and 1476 t.cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE): 1477 break 1478 result.append(t) 1479 i += 1 1480 return (i, result) 1481 1482 def consume_line(i, tokens): 1483 """Return tokens that follow tokens[i] in the same line.""" 1484 result = [] 1485 line = tokens[i].location.line 1486 while i < len(tokens) and tokens[i].location.line == line: 1487 if tokens[i].cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE: 1488 break 1489 result.append(tokens[i]) 1490 i += 1 1491 return (i, result) 1492 1493 if tokzer is None: 1494 tokzer = self._tokzer 1495 tokens = tokzer.tokens 1496 1497 blocks = [] 1498 buf = [] 1499 i = 0 1500 1501 while i < len(tokens): 1502 t = tokens[i] 1503 cursor = t.cursor 1504 1505 if debugBlockParser: 1506 print ("%d: Processing [%s], kind=[%s], cursor=[%s], " 1507 "extent=[%s]" % (t.location.line, t.spelling, t.kind, 1508 cursor.kind, 1509 self._short_extent(cursor.extent))) 1510 1511 if cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE: 1512 if buf: 1513 blocks.append(Block(buf)) 1514 buf = [] 1515 1516 j = i 1517 if j + 1 >= len(tokens): 1518 raise BadExpectedToken("### BAD TOKEN at %s" % (t.location)) 1519 directive = tokens[j+1].id 1520 1521 if directive == 'define': 1522 if i+2 >= len(tokens): 1523 raise BadExpectedToken("### BAD TOKEN at %s" % 1524 (tokens[i].location)) 1525 1526 # Skip '#' and 'define'. 1527 extent = tokens[i].cursor.extent 1528 i += 2 1529 id = '' 1530 # We need to separate the id from the remaining of 1531 # the line, especially for the function-like macro. 1532 if (i + 1 < len(tokens) and tokens[i+1].id == '(' and 1533 (tokens[i].location.column + len(tokens[i].spelling) == 1534 tokens[i+1].location.column)): 1535 while i < len(tokens): 1536 id += tokens[i].id 1537 if tokens[i].spelling == ')': 1538 i += 1 1539 break 1540 i += 1 1541 else: 1542 id += tokens[i].id 1543 # Advance to the next token that follows the macro id 1544 i += 1 1545 1546 (i, ret) = consume_extent(i, tokens, extent=extent) 1547 blocks.append(Block(ret, directive=directive, 1548 lineno=t.location.line, identifier=id)) 1549 1550 else: 1551 (i, ret) = consume_extent(i, tokens) 1552 blocks.append(Block(ret[2:], directive=directive, 1553 lineno=t.location.line)) 1554 1555 elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE: 1556 if buf: 1557 blocks.append(Block(buf)) 1558 buf = [] 1559 directive = tokens[i+1].id 1560 (i, ret) = consume_extent(i, tokens) 1561 1562 blocks.append(Block(ret[2:], directive=directive, 1563 lineno=t.location.line)) 1564 1565 elif cursor.kind == CursorKind.VAR_DECL: 1566 if buf: 1567 blocks.append(Block(buf)) 1568 buf = [] 1569 1570 (i, ret) = consume_extent(i, tokens, detect_change=True) 1571 buf += ret 1572 1573 elif cursor.kind == CursorKind.FUNCTION_DECL: 1574 if buf: 1575 blocks.append(Block(buf)) 1576 buf = [] 1577 1578 (i, ret) = consume_extent(i, tokens, detect_change=True) 1579 buf += ret 1580 1581 else: 1582 (i, ret) = consume_line(i, tokens) 1583 buf += ret 1584 1585 if buf: 1586 blocks.append(Block(buf)) 1587 1588 # _parsed=True indicates a successful parsing, although may result an 1589 # empty BlockList. 1590 self._parsed = True 1591 1592 return BlockList(blocks) 1593 1594 def parse(self, tokzer): 1595 return self.getBlocks(tokzer) 1596 1597 def parseFile(self, path): 1598 return self.getBlocks(CppFileTokenizer(path)) 1599 1600 1601def test_block_parsing(lines, expected): 1602 """Helper method to test the correctness of BlockParser.parse.""" 1603 blocks = BlockParser().parse(CppStringTokenizer('\n'.join(lines))) 1604 if len(blocks) != len(expected): 1605 raise BadExpectedToken("BlockParser.parse() returned '%s' expecting " 1606 "'%s'" % (str(blocks), repr(expected))) 1607 for n in range(len(blocks)): 1608 if str(blocks[n]) != expected[n]: 1609 raise BadExpectedToken("BlockParser.parse()[%d] is '%s', " 1610 "expecting '%s'" % (n, str(blocks[n]), 1611 expected[n])) 1612 1613 1614def test_BlockParser(): 1615 test_block_parsing(["#error hello"], ["#error hello"]) 1616 test_block_parsing(["foo", "", "bar"], ["foo bar"]) 1617 1618 # We currently cannot handle the following case with libclang properly. 1619 # Fortunately it doesn't appear in current headers. 1620 # test_block_parsing(["foo", " # ", "bar"], ["foo", "bar"]) 1621 1622 test_block_parsing(["foo", 1623 " # /* ahah */ if defined(__KERNEL__) /* more */", 1624 "bar", "#endif"], 1625 ["foo", "#ifdef __KERNEL__", "bar", "#endif"]) 1626 1627 1628################################################################################ 1629################################################################################ 1630##### ##### 1631##### B L O C K L I S T O P T I M I Z A T I O N ##### 1632##### ##### 1633################################################################################ 1634################################################################################ 1635 1636 1637def remove_macro_defines(blocks, excludedMacros=None): 1638 """Remove macro definitions like #define <macroName> ....""" 1639 if excludedMacros is None: 1640 excludedMacros = set() 1641 result = [] 1642 for b in blocks: 1643 macroName = b.isDefine() 1644 if macroName is None or macroName not in excludedMacros: 1645 result.append(b) 1646 1647 return result 1648 1649 1650def find_matching_endif(blocks, i): 1651 """Traverse the blocks to find out the matching #endif.""" 1652 n = len(blocks) 1653 depth = 1 1654 while i < n: 1655 if blocks[i].isDirective(): 1656 dir_ = blocks[i].directive 1657 if dir_ in ["if", "ifndef", "ifdef"]: 1658 depth += 1 1659 elif depth == 1 and dir_ in ["else", "elif"]: 1660 return i 1661 elif dir_ == "endif": 1662 depth -= 1 1663 if depth == 0: 1664 return i 1665 i += 1 1666 return i 1667 1668 1669def optimize_if01(blocks): 1670 """Remove the code between #if 0 .. #endif in a list of CppBlocks.""" 1671 i = 0 1672 n = len(blocks) 1673 result = [] 1674 while i < n: 1675 j = i 1676 while j < n and not blocks[j].isIf(): 1677 j += 1 1678 if j > i: 1679 logging.debug("appending lines %d to %d", blocks[i].lineno, 1680 blocks[j-1].lineno) 1681 result += blocks[i:j] 1682 if j >= n: 1683 break 1684 expr = blocks[j].expr 1685 r = expr.toInt() 1686 if r is None: 1687 result.append(blocks[j]) 1688 i = j + 1 1689 continue 1690 1691 if r == 0: 1692 # if 0 => skip everything until the corresponding #endif 1693 j = find_matching_endif(blocks, j + 1) 1694 if j >= n: 1695 # unterminated #if 0, finish here 1696 break 1697 dir_ = blocks[j].directive 1698 if dir_ == "endif": 1699 logging.debug("remove 'if 0' .. 'endif' (lines %d to %d)", 1700 blocks[i].lineno, blocks[j].lineno) 1701 i = j + 1 1702 elif dir_ == "else": 1703 # convert 'else' into 'if 1' 1704 logging.debug("convert 'if 0' .. 'else' into 'if 1' (lines %d " 1705 "to %d)", blocks[i].lineno, blocks[j-1].lineno) 1706 blocks[j].directive = "if" 1707 blocks[j].expr = CppExpr(CppStringTokenizer("1").tokens) 1708 i = j 1709 elif dir_ == "elif": 1710 # convert 'elif' into 'if' 1711 logging.debug("convert 'if 0' .. 'elif' into 'if'") 1712 blocks[j].directive = "if" 1713 i = j 1714 continue 1715 1716 # if 1 => find corresponding endif and remove/transform them 1717 k = find_matching_endif(blocks, j + 1) 1718 if k >= n: 1719 # unterminated #if 1, finish here 1720 logging.debug("unterminated 'if 1'") 1721 result += blocks[j+1:k] 1722 break 1723 1724 dir_ = blocks[k].directive 1725 if dir_ == "endif": 1726 logging.debug("convert 'if 1' .. 'endif' (lines %d to %d)", 1727 blocks[j].lineno, blocks[k].lineno) 1728 result += optimize_if01(blocks[j+1:k]) 1729 i = k + 1 1730 elif dir_ == "else": 1731 # convert 'else' into 'if 0' 1732 logging.debug("convert 'if 1' .. 'else' (lines %d to %d)", 1733 blocks[j].lineno, blocks[k].lineno) 1734 result += optimize_if01(blocks[j+1:k]) 1735 blocks[k].directive = "if" 1736 blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens) 1737 i = k 1738 elif dir_ == "elif": 1739 # convert 'elif' into 'if 0' 1740 logging.debug("convert 'if 1' .. 'elif' (lines %d to %d)", 1741 blocks[j].lineno, blocks[k].lineno) 1742 result += optimize_if01(blocks[j+1:k]) 1743 blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens) 1744 i = k 1745 return result 1746 1747 1748def test_optimizeAll(): 1749 text = """\ 1750#if 1 1751#define GOOD_1 1752#endif 1753#if 0 1754#define BAD_2 1755#define BAD_3 1756#endif 1757 1758#if 1 1759#define GOOD_2 1760#else 1761#define BAD_4 1762#endif 1763 1764#if 0 1765#define BAD_5 1766#else 1767#define GOOD_3 1768#endif 1769 1770#if defined(__KERNEL__) 1771#define BAD_KERNEL 1772#endif 1773 1774#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) 1775#define X 1776#endif 1777 1778#ifndef SIGRTMAX 1779#define SIGRTMAX 123 1780#endif /* SIGRTMAX */ 1781 1782#if 0 1783#if 1 1784#define BAD_6 1785#endif 1786#endif\ 1787""" 1788 1789 expected = """\ 1790#define GOOD_1 1791#define GOOD_2 1792#define GOOD_3 1793#if !defined(__GLIBC__) || __GLIBC__ < 2 1794#define X 1795#endif 1796#ifndef __SIGRTMAX 1797#define __SIGRTMAX 123 1798#endif\ 1799""" 1800 1801 out = utils.StringOutput() 1802 blocks = BlockParser().parse(CppStringTokenizer(text)) 1803 blocks.replaceTokens(kernel_token_replacements) 1804 blocks.optimizeAll({"__KERNEL__": kCppUndefinedMacro}) 1805 blocks.write(out) 1806 if out.get() != expected: 1807 print "[FAIL]: macro optimization failed\n" 1808 print "<<<< expecting '", 1809 print expected, 1810 print "'\n>>>> result '", 1811 print out.get(), 1812 print "'\n----" 1813 global failure_count 1814 failure_count += 1 1815 1816 1817def runUnitTests(): 1818 """Always run all unit tests for this program.""" 1819 test_CppTokenizer() 1820 test_CppExpr() 1821 test_optimizeAll() 1822 test_BlockParser() 1823 1824 1825failure_count = 0 1826runUnitTests() 1827if failure_count != 0: 1828 utils.panic("Unit tests failed in cpp.py.\n") 1829