1#!/usr/bin/env python3 2"""A glorified C pre-processor parser.""" 3 4import ctypes 5import logging 6import os 7import re 8import site 9import unittest 10import utils 11 12top = os.getenv('ANDROID_BUILD_TOP') 13if top is None: 14 utils.panic('ANDROID_BUILD_TOP not set.\n') 15 16# Set up the env vars for libclang. 17site.addsitedir(os.path.join(top, 'prebuilts/clang/host/linux-x86/clang-stable/lib/python3/site-packages/')) 18 19import clang.cindex 20from clang.cindex import conf 21from clang.cindex import Cursor 22from clang.cindex import CursorKind 23from clang.cindex import SourceLocation 24from clang.cindex import SourceRange 25from clang.cindex import TokenGroup 26from clang.cindex import TokenKind 27from clang.cindex import TranslationUnit 28 29# Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, and etc. 30# Note that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help. 31clang.cindex.Config.set_library_file(os.path.join(top, 'prebuilts/clang/host/linux-x86/clang-stable/lib/libclang.so')) 32 33from defaults import * 34 35 36debugBlockParser = False 37debugCppExpr = False 38debugOptimIf01 = False 39 40############################################################################### 41############################################################################### 42##### ##### 43##### C P P T O K E N S ##### 44##### ##### 45############################################################################### 46############################################################################### 47 48# the list of supported C-preprocessor tokens 49# plus a couple of C tokens as well 50tokEOF = "\0" 51tokLN = "\n" 52tokSTRINGIFY = "#" 53tokCONCAT = "##" 54tokLOGICAND = "&&" 55tokLOGICOR = "||" 56tokSHL = "<<" 57tokSHR = ">>" 58tokEQUAL = "==" 59tokNEQUAL = "!=" 60tokLT = "<" 61tokLTE = "<=" 62tokGT = ">" 63tokGTE = ">=" 64tokELLIPSIS = "..." 65tokSPACE = " " 66tokDEFINED = "defined" 67tokLPAREN = "(" 68tokRPAREN = ")" 69tokNOT = "!" 70tokPLUS = "+" 71tokMINUS = "-" 72tokMULTIPLY = "*" 73tokDIVIDE = "/" 74tokMODULUS = "%" 75tokBINAND = "&" 76tokBINOR = "|" 77tokBINXOR = "^" 78tokCOMMA = "," 79tokLBRACE = "{" 80tokRBRACE = "}" 81tokARROW = "->" 82tokINCREMENT = "++" 83tokDECREMENT = "--" 84tokNUMBER = "<number>" 85tokIDENT = "<ident>" 86tokSTRING = "<string>" 87 88 89class Token(clang.cindex.Token): 90 """A class that represents one token after parsing. 91 92 It inherits the class in libclang, with an extra id property to hold the 93 new spelling of the token. The spelling property in the base class is 94 defined as read-only. New names after macro instantiation are saved in 95 their ids now. It also facilitates the renaming of directive optimizations 96 like replacing 'ifndef X' with 'if !defined(X)'. 97 98 It also overrides the cursor property of the base class. Because the one 99 in libclang always queries based on a single token, which usually doesn't 100 hold useful information. The cursor in this class can be set by calling 101 CppTokenizer.getTokensWithCursors(). Otherwise it returns the one in the 102 base class. 103 """ 104 105 def __init__(self, tu=None, group=None, int_data=None, ptr_data=None, 106 cursor=None): 107 clang.cindex.Token.__init__(self) 108 self._id = None 109 self._tu = tu 110 self._group = group 111 self._cursor = cursor 112 # self.int_data and self.ptr_data are from the base class. But 113 # self.int_data doesn't accept a None value. 114 if int_data is not None: 115 self.int_data = int_data 116 self.ptr_data = ptr_data 117 118 @property 119 def id(self): 120 """Name of the token.""" 121 if self._id is None: 122 return self.spelling 123 else: 124 return self._id 125 126 @id.setter 127 def id(self, new_id): 128 """Setting name of the token.""" 129 self._id = new_id 130 131 @property 132 def cursor(self): 133 if self._cursor is None: 134 self._cursor = clang.cindex.Token.cursor 135 return self._cursor 136 137 @cursor.setter 138 def cursor(self, new_cursor): 139 self._cursor = new_cursor 140 141 def __repr__(self): 142 if self.id == 'defined': 143 return self.id 144 elif self.kind == TokenKind.IDENTIFIER: 145 return "(ident %s)" % self.id 146 147 return self.id 148 149 def __str__(self): 150 return self.id 151 152 153class BadExpectedToken(Exception): 154 """An exception that will be raised for unexpected tokens.""" 155 pass 156 157 158class UnparseableStruct(Exception): 159 """An exception that will be raised for structs that cannot be parsed.""" 160 pass 161 162 163# The __contains__ function in libclang SourceRange class contains a bug. It 164# gives wrong result when dealing with single line range. 165# Bug filed with upstream: 166# http://llvm.org/bugs/show_bug.cgi?id=22243, http://reviews.llvm.org/D7277 167def SourceRange__contains__(self, other): 168 """Determine if a given location is inside the range.""" 169 if not isinstance(other, SourceLocation): 170 return False 171 if other.file is None and self.start.file is None: 172 pass 173 elif (self.start.file.name != other.file.name or 174 other.file.name != self.end.file.name): 175 # same file name 176 return False 177 # same file, in between lines 178 if self.start.line < other.line < self.end.line: 179 return True 180 # same file, same line 181 elif self.start.line == other.line == self.end.line: 182 if self.start.column <= other.column <= self.end.column: 183 return True 184 elif self.start.line == other.line: 185 # same file first line 186 if self.start.column <= other.column: 187 return True 188 elif other.line == self.end.line: 189 # same file last line 190 if other.column <= self.end.column: 191 return True 192 return False 193 194 195SourceRange.__contains__ = SourceRange__contains__ 196 197 198################################################################################ 199################################################################################ 200##### ##### 201##### C P P T O K E N I Z E R ##### 202##### ##### 203################################################################################ 204################################################################################ 205 206 207class CppTokenizer(object): 208 """A tokenizer that converts some input text into a list of tokens. 209 210 It calls libclang's tokenizer to get the parsed tokens. In addition, it 211 updates the cursor property in each token after parsing, by calling 212 getTokensWithCursors(). 213 """ 214 215 clang_flags = ['-E', '-x', 'c'] 216 options = TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD 217 218 def __init__(self): 219 """Initialize a new CppTokenizer object.""" 220 self._indexer = clang.cindex.Index.create() 221 self._tu = None 222 self._index = 0 223 self.tokens = None 224 225 def _getTokensWithCursors(self): 226 """Helper method to return all tokens with their cursors. 227 228 The cursor property in a clang Token doesn't provide enough 229 information. Because it is queried based on single token each time 230 without any context, i.e. via calling conf.lib.clang_annotateTokens() 231 with only one token given. So we often see 'INVALID_FILE' in one 232 token's cursor. In this function it passes all the available tokens 233 to get more informative cursors. 234 """ 235 236 tokens_memory = ctypes.POINTER(clang.cindex.Token)() 237 tokens_count = ctypes.c_uint() 238 239 conf.lib.clang_tokenize(self._tu, self._tu.cursor.extent, 240 ctypes.byref(tokens_memory), 241 ctypes.byref(tokens_count)) 242 243 count = int(tokens_count.value) 244 245 # If we get no tokens, no memory was allocated. Be sure not to return 246 # anything and potentially call a destructor on nothing. 247 if count < 1: 248 return 249 250 cursors = (Cursor * count)() 251 cursors_memory = ctypes.cast(cursors, ctypes.POINTER(Cursor)) 252 253 conf.lib.clang_annotateTokens(self._tu, tokens_memory, count, 254 cursors_memory) 255 256 tokens_array = ctypes.cast( 257 tokens_memory, 258 ctypes.POINTER(clang.cindex.Token * count)).contents 259 token_group = TokenGroup(self._tu, tokens_memory, tokens_count) 260 261 tokens = [] 262 for i in range(0, count): 263 token = Token(self._tu, token_group, 264 int_data=tokens_array[i].int_data, 265 ptr_data=tokens_array[i].ptr_data, 266 cursor=cursors[i]) 267 # We only want non-comment tokens. 268 if token.kind != TokenKind.COMMENT: 269 tokens.append(token) 270 271 return tokens 272 273 def parseString(self, lines): 274 """Parse a list of text lines into a BlockList object.""" 275 file_ = 'no-filename-available.c' 276 self._tu = self._indexer.parse(file_, self.clang_flags, 277 unsaved_files=[(file_, lines)], 278 options=self.options) 279 self.tokens = self._getTokensWithCursors() 280 281 def parseFile(self, file_): 282 """Parse a file into a BlockList object.""" 283 self._tu = self._indexer.parse(file_, self.clang_flags, 284 options=self.options) 285 self.tokens = self._getTokensWithCursors() 286 287 def nextToken(self): 288 """Return next token from the list.""" 289 if self._index < len(self.tokens): 290 t = self.tokens[self._index] 291 self._index += 1 292 return t 293 else: 294 return None 295 296 297class CppStringTokenizer(CppTokenizer): 298 """A CppTokenizer derived class that accepts a string of text as input.""" 299 300 def __init__(self, line): 301 CppTokenizer.__init__(self) 302 self.parseString(line) 303 304 305class CppFileTokenizer(CppTokenizer): 306 """A CppTokenizer derived class that accepts a file as input.""" 307 308 def __init__(self, file_): 309 CppTokenizer.__init__(self) 310 self.parseFile(file_) 311 312 313# Unit testing 314# 315class CppTokenizerTests(unittest.TestCase): 316 """CppTokenizer tests.""" 317 318 def get_tokens(self, token_string, line_col=False): 319 tokens = CppStringTokenizer(token_string) 320 token_list = [] 321 while True: 322 token = tokens.nextToken() 323 if not token: 324 break 325 if line_col: 326 token_list.append((token.id, token.location.line, 327 token.location.column)) 328 else: 329 token_list.append(token.id) 330 return token_list 331 332 def test_hash(self): 333 self.assertEqual(self.get_tokens("#an/example && (01923_xy)"), 334 ["#", "an", "/", "example", tokLOGICAND, tokLPAREN, 335 "01923_xy", tokRPAREN]) 336 337 def test_parens(self): 338 self.assertEqual(self.get_tokens("FOO(BAR) && defined(BAZ)"), 339 ["FOO", tokLPAREN, "BAR", tokRPAREN, tokLOGICAND, 340 "defined", tokLPAREN, "BAZ", tokRPAREN]) 341 342 def test_comment(self): 343 self.assertEqual(self.get_tokens("/*\n#\n*/"), []) 344 345 def test_line_cross(self): 346 self.assertEqual(self.get_tokens("first\nsecond"), ["first", "second"]) 347 348 def test_line_cross_line_col(self): 349 self.assertEqual(self.get_tokens("first second\n third", True), 350 [("first", 1, 1), ("second", 1, 7), ("third", 2, 3)]) 351 352 def test_comment_line_col(self): 353 self.assertEqual(self.get_tokens("boo /* what the\nhell */", True), 354 [("boo", 1, 1)]) 355 356 def test_escapes(self): 357 self.assertEqual(self.get_tokens("an \\\n example", True), 358 [("an", 1, 1), ("example", 2, 2)]) 359 360 361################################################################################ 362################################################################################ 363##### ##### 364##### C P P E X P R E S S I O N S ##### 365##### ##### 366################################################################################ 367################################################################################ 368 369 370class CppExpr(object): 371 """A class that models the condition of #if directives into an expr tree. 372 373 Each node in the tree is of the form (op, arg) or (op, arg1, arg2) where 374 "op" is a string describing the operation 375 """ 376 377 unaries = ["!", "~"] 378 binaries = ["+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", 379 "&", "|", "^", "<<", ">>", "==", "!=", "?", ":"] 380 precedences = { 381 "?": 1, ":": 1, 382 "||": 2, 383 "&&": 3, 384 "|": 4, 385 "^": 5, 386 "&": 6, 387 "==": 7, "!=": 7, 388 "<": 8, "<=": 8, ">": 8, ">=": 8, 389 "<<": 9, ">>": 9, 390 "+": 10, "-": 10, 391 "*": 11, "/": 11, "%": 11, 392 "!": 12, "~": 12 393 } 394 395 def __init__(self, tokens): 396 """Initialize a CppExpr. 'tokens' must be a CppToken list.""" 397 self.tokens = tokens 398 self._num_tokens = len(tokens) 399 self._index = 0 400 401 if debugCppExpr: 402 print("CppExpr: trying to parse %s" % repr(tokens)) 403 self.expr = self.parseExpression(0) 404 if debugCppExpr: 405 print("CppExpr: got " + repr(self.expr)) 406 if self._index != self._num_tokens: 407 self.throw(BadExpectedToken, "crap at end of input (%d != %d): %s" 408 % (self._index, self._num_tokens, repr(tokens))) 409 410 def throw(self, exception, msg): 411 if self._index < self._num_tokens: 412 tok = self.tokens[self._index] 413 print("%d:%d: %s" % (tok.location.line, tok.location.column, msg)) 414 else: 415 print("EOF: %s" % msg) 416 raise exception(msg) 417 418 def expectId(self, id): 419 """Check that a given token id is at the current position.""" 420 token = self.tokens[self._index] 421 if self._index >= self._num_tokens or token.id != id: 422 self.throw(BadExpectedToken, 423 "### expecting '%s' in expression, got '%s'" % ( 424 id, token.id)) 425 self._index += 1 426 427 def is_decimal(self): 428 token = self.tokens[self._index].id 429 if token[-1] in "ULul": 430 token = token[:-1] 431 try: 432 val = int(token, 10) 433 self._index += 1 434 return ('int', val) 435 except ValueError: 436 return None 437 438 def is_octal(self): 439 token = self.tokens[self._index].id 440 if token[-1] in "ULul": 441 token = token[:-1] 442 if len(token) < 2 or token[0] != '0': 443 return None 444 try: 445 val = int(token, 8) 446 self._index += 1 447 return ('oct', val) 448 except ValueError: 449 return None 450 451 def is_hexadecimal(self): 452 token = self.tokens[self._index].id 453 if token[-1] in "ULul": 454 token = token[:-1] 455 if len(token) < 3 or (token[:2] != '0x' and token[:2] != '0X'): 456 return None 457 try: 458 val = int(token, 16) 459 self._index += 1 460 return ('hex', val) 461 except ValueError: 462 return None 463 464 def is_integer(self): 465 if self.tokens[self._index].kind != TokenKind.LITERAL: 466 return None 467 468 c = self.is_hexadecimal() 469 if c: 470 return c 471 472 c = self.is_octal() 473 if c: 474 return c 475 476 c = self.is_decimal() 477 if c: 478 return c 479 480 return None 481 482 def is_number(self): 483 t = self.tokens[self._index] 484 if t.id == tokMINUS and self._index + 1 < self._num_tokens: 485 self._index += 1 486 c = self.is_integer() 487 if c: 488 op, val = c 489 return (op, -val) 490 if t.id == tokPLUS and self._index + 1 < self._num_tokens: 491 self._index += 1 492 c = self.is_integer() 493 if c: 494 return c 495 496 return self.is_integer() 497 498 def is_defined(self): 499 t = self.tokens[self._index] 500 if t.id != tokDEFINED: 501 return None 502 503 # We have the defined keyword, check the rest. 504 self._index += 1 505 used_parens = False 506 if (self._index < self._num_tokens and 507 self.tokens[self._index].id == tokLPAREN): 508 used_parens = True 509 self._index += 1 510 511 if self._index >= self._num_tokens: 512 self.throw(BadExpectedToken, 513 "### 'defined' must be followed by macro name or left " 514 "paren") 515 516 t = self.tokens[self._index] 517 if t.kind != TokenKind.IDENTIFIER: 518 self.throw(BadExpectedToken, 519 "### 'defined' must be followed by macro name") 520 521 self._index += 1 522 if used_parens: 523 self.expectId(tokRPAREN) 524 525 return ("defined", t.id) 526 527 def is_call_or_ident(self): 528 if self._index >= self._num_tokens: 529 return None 530 531 t = self.tokens[self._index] 532 if t.kind != TokenKind.IDENTIFIER: 533 return None 534 535 name = t.id 536 537 self._index += 1 538 if (self._index >= self._num_tokens or 539 self.tokens[self._index].id != tokLPAREN): 540 return ("ident", name) 541 542 params = [] 543 depth = 1 544 self._index += 1 545 j = self._index 546 while self._index < self._num_tokens: 547 id = self.tokens[self._index].id 548 if id == tokLPAREN: 549 depth += 1 550 elif depth == 1 and (id == tokCOMMA or id == tokRPAREN): 551 k = self._index 552 param = self.tokens[j:k] 553 params.append(param) 554 if id == tokRPAREN: 555 break 556 j = self._index + 1 557 elif id == tokRPAREN: 558 depth -= 1 559 self._index += 1 560 561 if self._index >= self._num_tokens: 562 return None 563 564 self._index += 1 565 return ("call", (name, params)) 566 567 # Implements the "precedence climbing" algorithm from 568 # http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm. 569 # The "classic" algorithm would be fine if we were using a tool to 570 # generate the parser, but we're not. Dijkstra's "shunting yard" 571 # algorithm hasn't been necessary yet. 572 573 def parseExpression(self, minPrecedence): 574 if self._index >= self._num_tokens: 575 return None 576 577 node = self.parsePrimary() 578 while (self.token() and self.isBinary(self.token()) and 579 self.precedence(self.token()) >= minPrecedence): 580 op = self.token() 581 self.nextToken() 582 rhs = self.parseExpression(self.precedence(op) + 1) 583 node = (op.id, node, rhs) 584 585 return node 586 587 def parsePrimary(self): 588 op = self.token() 589 if self.isUnary(op): 590 self.nextToken() 591 return (op.id, self.parseExpression(self.precedence(op))) 592 593 primary = None 594 if op.id == tokLPAREN: 595 self.nextToken() 596 primary = self.parseExpression(0) 597 self.expectId(tokRPAREN) 598 elif op.id == "?": 599 self.nextToken() 600 primary = self.parseExpression(0) 601 self.expectId(":") 602 elif op.id == '+' or op.id == '-' or op.kind == TokenKind.LITERAL: 603 primary = self.is_number() 604 # Checking for 'defined' needs to come first now because 'defined' is 605 # recognized as IDENTIFIER. 606 elif op.id == tokDEFINED: 607 primary = self.is_defined() 608 elif op.kind == TokenKind.IDENTIFIER: 609 primary = self.is_call_or_ident() 610 else: 611 self.throw(BadExpectedToken, 612 "didn't expect to see a %s in factor" % ( 613 self.tokens[self._index].id)) 614 return primary 615 616 def isBinary(self, token): 617 return token.id in self.binaries 618 619 def isUnary(self, token): 620 return token.id in self.unaries 621 622 def precedence(self, token): 623 return self.precedences.get(token.id) 624 625 def token(self): 626 if self._index >= self._num_tokens: 627 return None 628 return self.tokens[self._index] 629 630 def nextToken(self): 631 self._index += 1 632 if self._index >= self._num_tokens: 633 return None 634 return self.tokens[self._index] 635 636 def dump_node(self, e): 637 op = e[0] 638 line = "(" + op 639 if op == "int": 640 line += " %d)" % e[1] 641 elif op == "oct": 642 line += " 0%o)" % e[1] 643 elif op == "hex": 644 line += " 0x%x)" % e[1] 645 elif op == "ident": 646 line += " %s)" % e[1] 647 elif op == "defined": 648 line += " %s)" % e[1] 649 elif op == "call": 650 arg = e[1] 651 line += " %s [" % arg[0] 652 prefix = "" 653 for param in arg[1]: 654 par = "" 655 for tok in param: 656 par += str(tok) 657 line += "%s%s" % (prefix, par) 658 prefix = "," 659 line += "])" 660 elif op in CppExpr.unaries: 661 line += " %s)" % self.dump_node(e[1]) 662 elif op in CppExpr.binaries: 663 line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2])) 664 else: 665 line += " ?%s)" % repr(e[1]) 666 667 return line 668 669 def __repr__(self): 670 return self.dump_node(self.expr) 671 672 def source_node(self, e): 673 op = e[0] 674 if op == "int": 675 return "%d" % e[1] 676 if op == "hex": 677 return "0x%x" % e[1] 678 if op == "oct": 679 return "0%o" % e[1] 680 if op == "ident": 681 # XXX: should try to expand 682 return e[1] 683 if op == "defined": 684 return "defined(%s)" % e[1] 685 686 prec = CppExpr.precedences.get(op, 1000) 687 arg = e[1] 688 if op in CppExpr.unaries: 689 arg_src = self.source_node(arg) 690 arg_op = arg[0] 691 arg_prec = CppExpr.precedences.get(arg_op, 1000) 692 if arg_prec < prec: 693 return "!(" + arg_src + ")" 694 else: 695 return "!" + arg_src 696 if op in CppExpr.binaries: 697 arg2 = e[2] 698 arg1_op = arg[0] 699 arg2_op = arg2[0] 700 arg1_src = self.source_node(arg) 701 arg2_src = self.source_node(arg2) 702 if CppExpr.precedences.get(arg1_op, 1000) < prec: 703 arg1_src = "(%s)" % arg1_src 704 if CppExpr.precedences.get(arg2_op, 1000) < prec: 705 arg2_src = "(%s)" % arg2_src 706 707 return "%s %s %s" % (arg1_src, op, arg2_src) 708 return "???" 709 710 def __str__(self): 711 return self.source_node(self.expr) 712 713 @staticmethod 714 def int_node(e): 715 if e[0] in ["int", "oct", "hex"]: 716 return e[1] 717 else: 718 return None 719 720 def toInt(self): 721 return self.int_node(self.expr) 722 723 def optimize_node(self, e, macros=None): 724 if macros is None: 725 macros = {} 726 op = e[0] 727 728 if op == "defined": 729 op, name = e 730 if name in macros: 731 if macros[name] == kCppUndefinedMacro: 732 return ("int", 0) 733 else: 734 try: 735 value = int(macros[name]) 736 return ("int", value) 737 except ValueError: 738 return ("defined", macros[name]) 739 740 if kernel_remove_config_macros and name.startswith("CONFIG_"): 741 return ("int", 0) 742 743 return e 744 745 elif op == "ident": 746 op, name = e 747 if name in macros: 748 try: 749 value = int(macros[name]) 750 expanded = ("int", value) 751 except ValueError: 752 expanded = ("ident", macros[name]) 753 return self.optimize_node(expanded, macros) 754 return e 755 756 elif op == "!": 757 op, v = e 758 v = self.optimize_node(v, macros) 759 if v[0] == "int": 760 if v[1] == 0: 761 return ("int", 1) 762 else: 763 return ("int", 0) 764 return ('!', v) 765 766 elif op == "&&": 767 op, l, r = e 768 l = self.optimize_node(l, macros) 769 r = self.optimize_node(r, macros) 770 li = self.int_node(l) 771 ri = self.int_node(r) 772 if li is not None: 773 if li == 0: 774 return ("int", 0) 775 else: 776 return r 777 elif ri is not None: 778 if ri == 0: 779 return ("int", 0) 780 else: 781 return l 782 return (op, l, r) 783 784 elif op == "||": 785 op, l, r = e 786 l = self.optimize_node(l, macros) 787 r = self.optimize_node(r, macros) 788 li = self.int_node(l) 789 ri = self.int_node(r) 790 if li is not None: 791 if li == 0: 792 return r 793 else: 794 return ("int", 1) 795 elif ri is not None: 796 if ri == 0: 797 return l 798 else: 799 return ("int", 1) 800 return (op, l, r) 801 802 else: 803 return e 804 805 def optimize(self, macros=None): 806 if macros is None: 807 macros = {} 808 self.expr = self.optimize_node(self.expr, macros) 809 810class CppExprTest(unittest.TestCase): 811 """CppExpr unit tests.""" 812 813 def get_expr(self, expr): 814 return repr(CppExpr(CppStringTokenizer(expr).tokens)) 815 816 def test_cpp_expr(self): 817 self.assertEqual(self.get_expr("0"), "(int 0)") 818 self.assertEqual(self.get_expr("1"), "(int 1)") 819 self.assertEqual(self.get_expr("-5"), "(int -5)") 820 self.assertEqual(self.get_expr("+1"), "(int 1)") 821 self.assertEqual(self.get_expr("0U"), "(int 0)") 822 self.assertEqual(self.get_expr("015"), "(oct 015)") 823 self.assertEqual(self.get_expr("015l"), "(oct 015)") 824 self.assertEqual(self.get_expr("0x3e"), "(hex 0x3e)") 825 self.assertEqual(self.get_expr("(0)"), "(int 0)") 826 self.assertEqual(self.get_expr("1 && 1"), "(&& (int 1) (int 1))") 827 self.assertEqual(self.get_expr("1 && 0"), "(&& (int 1) (int 0))") 828 self.assertEqual(self.get_expr("EXAMPLE"), "(ident EXAMPLE)") 829 self.assertEqual(self.get_expr("EXAMPLE - 3"), 830 "(- (ident EXAMPLE) (int 3))") 831 self.assertEqual(self.get_expr("defined(EXAMPLE)"), 832 "(defined EXAMPLE)") 833 self.assertEqual(self.get_expr("defined ( EXAMPLE ) "), 834 "(defined EXAMPLE)") 835 self.assertEqual(self.get_expr("!defined(EXAMPLE)"), 836 "(! (defined EXAMPLE))") 837 self.assertEqual(self.get_expr("defined(ABC) || defined(BINGO)"), 838 "(|| (defined ABC) (defined BINGO))") 839 self.assertEqual(self.get_expr("FOO(BAR,5)"), "(call FOO [BAR,5])") 840 self.assertEqual(self.get_expr("A == 1 || defined(B)"), 841 "(|| (== (ident A) (int 1)) (defined B))") 842 843 def get_expr_optimize(self, expr, macros=None): 844 if macros is None: 845 macros = {} 846 e = CppExpr(CppStringTokenizer(expr).tokens) 847 e.optimize(macros) 848 return repr(e) 849 850 def test_cpp_expr_optimize(self): 851 self.assertEqual(self.get_expr_optimize("0"), "(int 0)") 852 self.assertEqual(self.get_expr_optimize("1"), "(int 1)") 853 self.assertEqual(self.get_expr_optimize("1 && 1"), "(int 1)") 854 self.assertEqual(self.get_expr_optimize("1 && +1"), "(int 1)") 855 self.assertEqual(self.get_expr_optimize("0x1 && 01"), "(oct 01)") 856 self.assertEqual(self.get_expr_optimize("1 && 0"), "(int 0)") 857 self.assertEqual(self.get_expr_optimize("0 && 1"), "(int 0)") 858 self.assertEqual(self.get_expr_optimize("0 && 0"), "(int 0)") 859 self.assertEqual(self.get_expr_optimize("1 || 1"), "(int 1)") 860 self.assertEqual(self.get_expr_optimize("1 || 0"), "(int 1)") 861 self.assertEqual(self.get_expr_optimize("0 || 1"), "(int 1)") 862 self.assertEqual(self.get_expr_optimize("0 || 0"), "(int 0)") 863 self.assertEqual(self.get_expr_optimize("A"), "(ident A)") 864 self.assertEqual(self.get_expr_optimize("A", {"A": 1}), "(int 1)") 865 self.assertEqual(self.get_expr_optimize("A || B", {"A": 1}), "(int 1)") 866 self.assertEqual(self.get_expr_optimize("A || B", {"B": 1}), "(int 1)") 867 self.assertEqual(self.get_expr_optimize("A && B", {"A": 1}), "(ident B)") 868 self.assertEqual(self.get_expr_optimize("A && B", {"B": 1}), "(ident A)") 869 self.assertEqual(self.get_expr_optimize("A && B"), "(&& (ident A) (ident B))") 870 self.assertEqual(self.get_expr_optimize("EXAMPLE"), "(ident EXAMPLE)") 871 self.assertEqual(self.get_expr_optimize("EXAMPLE - 3"), "(- (ident EXAMPLE) (int 3))") 872 self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)"), "(defined EXAMPLE)") 873 self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)", 874 {"EXAMPLE": "XOWOE"}), 875 "(defined XOWOE)") 876 self.assertEqual(self.get_expr_optimize("defined(EXAMPLE)", 877 {"EXAMPLE": kCppUndefinedMacro}), 878 "(int 0)") 879 self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)"), "(! (defined EXAMPLE))") 880 self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)", 881 {"EXAMPLE": "XOWOE"}), 882 "(! (defined XOWOE))") 883 self.assertEqual(self.get_expr_optimize("!defined(EXAMPLE)", 884 {"EXAMPLE": kCppUndefinedMacro}), 885 "(int 1)") 886 self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)"), 887 "(|| (defined A) (defined B))") 888 self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)", 889 {"A": "1"}), 890 "(int 1)") 891 self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)", 892 {"B": "1"}), 893 "(int 1)") 894 self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)", 895 {"B": kCppUndefinedMacro}), 896 "(defined A)") 897 self.assertEqual(self.get_expr_optimize("defined(A) || defined(B)", 898 {"A": kCppUndefinedMacro, 899 "B": kCppUndefinedMacro}), 900 "(int 0)") 901 self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)"), 902 "(&& (defined A) (defined B))") 903 self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)", 904 {"A": "1"}), 905 "(defined B)") 906 self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)", 907 {"B": "1"}), 908 "(defined A)") 909 self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)", 910 {"B": kCppUndefinedMacro}), 911 "(int 0)") 912 self.assertEqual(self.get_expr_optimize("defined(A) && defined(B)", 913 {"A": kCppUndefinedMacro}), 914 "(int 0)") 915 self.assertEqual(self.get_expr_optimize("A == 1 || defined(B)"), 916 "(|| (== (ident A) (int 1)) (defined B))") 917 self.assertEqual(self.get_expr_optimize( 918 "defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)", 919 {"__KERNEL__": kCppUndefinedMacro}), 920 "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))") 921 922 def get_expr_string(self, expr): 923 return str(CppExpr(CppStringTokenizer(expr).tokens)) 924 925 def test_cpp_expr_string(self): 926 self.assertEqual(self.get_expr_string("0"), "0") 927 self.assertEqual(self.get_expr_string("1"), "1") 928 self.assertEqual(self.get_expr_string("1 && 1"), "1 && 1") 929 self.assertEqual(self.get_expr_string("1 && 0"), "1 && 0") 930 self.assertEqual(self.get_expr_string("0 && 1"), "0 && 1") 931 self.assertEqual(self.get_expr_string("0 && 0"), "0 && 0") 932 self.assertEqual(self.get_expr_string("1 || 1"), "1 || 1") 933 self.assertEqual(self.get_expr_string("1 || 0"), "1 || 0") 934 self.assertEqual(self.get_expr_string("0 || 1"), "0 || 1") 935 self.assertEqual(self.get_expr_string("0 || 0"), "0 || 0") 936 self.assertEqual(self.get_expr_string("EXAMPLE"), "EXAMPLE") 937 self.assertEqual(self.get_expr_string("EXAMPLE - 3"), "EXAMPLE - 3") 938 self.assertEqual(self.get_expr_string("defined(EXAMPLE)"), "defined(EXAMPLE)") 939 self.assertEqual(self.get_expr_string("defined EXAMPLE"), "defined(EXAMPLE)") 940 self.assertEqual(self.get_expr_string("A == 1 || defined(B)"), "A == 1 || defined(B)") 941 942 943################################################################################ 944################################################################################ 945##### ##### 946##### C P P B L O C K ##### 947##### ##### 948################################################################################ 949################################################################################ 950 951 952class Block(object): 953 """A class used to model a block of input source text. 954 955 There are two block types: 956 - directive blocks: contain the tokens of a single pre-processor 957 directive (e.g. #if) 958 - text blocks, contain the tokens of non-directive blocks 959 960 The cpp parser class below will transform an input source file into a list 961 of Block objects (grouped in a BlockList object for convenience) 962 """ 963 964 def __init__(self, tokens, directive=None, lineno=0, identifier=None): 965 """Initialize a new block, if 'directive' is None, it is a text block. 966 967 NOTE: This automatically converts '#ifdef MACRO' into 968 '#if defined(MACRO)' and '#ifndef MACRO' into '#if !defined(MACRO)'. 969 """ 970 971 if directive == "ifdef": 972 tok = Token() 973 tok.id = tokDEFINED 974 tokens = [tok] + tokens 975 directive = "if" 976 977 elif directive == "ifndef": 978 tok1 = Token() 979 tok2 = Token() 980 tok1.id = tokNOT 981 tok2.id = tokDEFINED 982 tokens = [tok1, tok2] + tokens 983 directive = "if" 984 985 self.tokens = tokens 986 self.directive = directive 987 self.define_id = identifier 988 if lineno > 0: 989 self.lineno = lineno 990 else: 991 self.lineno = self.tokens[0].location.line 992 993 if self.isIf(): 994 self.expr = CppExpr(self.tokens) 995 996 def isDirective(self): 997 """Return True iff this is a directive block.""" 998 return self.directive is not None 999 1000 def isConditional(self): 1001 """Return True iff this is a conditional directive block.""" 1002 return self.directive in ["if", "ifdef", "ifndef", "else", "elif", 1003 "endif"] 1004 1005 def isDefine(self): 1006 """Return the macro name in a #define directive, or None otherwise.""" 1007 if self.directive != "define": 1008 return None 1009 return self.define_id 1010 1011 def isIf(self): 1012 """Return True iff this is an #if-like directive block.""" 1013 return self.directive in ["if", "ifdef", "ifndef", "elif"] 1014 1015 def isEndif(self): 1016 """Return True iff this is an #endif directive block.""" 1017 return self.directive == "endif" 1018 1019 def isInclude(self): 1020 """Check whether this is a #include directive. 1021 1022 If true, returns the corresponding file name (with brackets or 1023 double-qoutes). None otherwise. 1024 """ 1025 1026 if self.directive != "include": 1027 return None 1028 return ''.join([str(x) for x in self.tokens]) 1029 1030 @staticmethod 1031 def format_blocks(tokens, indent=0): 1032 """Return the formatted lines of strings with proper indentation.""" 1033 newline = True 1034 result = [] 1035 buf = '' 1036 i = 0 1037 while i < len(tokens): 1038 t = tokens[i] 1039 if t.id == '{': 1040 buf += ' {' 1041 result.append(strip_space(buf)) 1042 # Do not indent if this is extern "C" { 1043 if i < 2 or tokens[i-2].id != 'extern' or tokens[i-1].id != '"C"': 1044 indent += 2 1045 buf = '' 1046 newline = True 1047 elif t.id == '}': 1048 if indent >= 2: 1049 indent -= 2 1050 if not newline: 1051 result.append(strip_space(buf)) 1052 # Look ahead to determine if it's the end of line. 1053 if (i + 1 < len(tokens) and 1054 (tokens[i+1].id == ';' or 1055 tokens[i+1].id in ['else', '__attribute__', 1056 '__attribute', '__packed'] or 1057 tokens[i+1].kind == TokenKind.IDENTIFIER)): 1058 buf = ' ' * indent + '}' 1059 newline = False 1060 else: 1061 result.append(' ' * indent + '}') 1062 buf = '' 1063 newline = True 1064 elif t.id == ';': 1065 result.append(strip_space(buf) + ';') 1066 buf = '' 1067 newline = True 1068 # We prefer a new line for each constant in enum. 1069 elif t.id == ',' and t.cursor.kind == CursorKind.ENUM_DECL: 1070 result.append(strip_space(buf) + ',') 1071 buf = '' 1072 newline = True 1073 else: 1074 if newline: 1075 buf += ' ' * indent + str(t) 1076 else: 1077 buf += ' ' + str(t) 1078 newline = False 1079 i += 1 1080 1081 if buf: 1082 result.append(strip_space(buf)) 1083 1084 return result, indent 1085 1086 def write(self, out, indent): 1087 """Dump the current block.""" 1088 # removeWhiteSpace() will sometimes creates non-directive blocks 1089 # without any tokens. These come from blocks that only contained 1090 # empty lines and spaces. They should not be printed in the final 1091 # output, and then should not be counted for this operation. 1092 # 1093 if self.directive is None and not self.tokens: 1094 return indent 1095 1096 if self.directive: 1097 out.write(str(self) + '\n') 1098 else: 1099 lines, indent = self.format_blocks(self.tokens, indent) 1100 for line in lines: 1101 out.write(line + '\n') 1102 1103 return indent 1104 1105 def __repr__(self): 1106 """Generate the representation of a given block.""" 1107 if self.directive: 1108 result = "#%s " % self.directive 1109 if self.isIf(): 1110 result += repr(self.expr) 1111 else: 1112 for tok in self.tokens: 1113 result += repr(tok) 1114 else: 1115 result = "" 1116 for tok in self.tokens: 1117 result += repr(tok) 1118 1119 return result 1120 1121 def __str__(self): 1122 """Generate the string representation of a given block.""" 1123 if self.directive: 1124 # "#if" 1125 if self.directive == "if": 1126 # small optimization to re-generate #ifdef and #ifndef 1127 e = self.expr.expr 1128 op = e[0] 1129 if op == "defined": 1130 result = "#ifdef %s" % e[1] 1131 elif op == "!" and e[1][0] == "defined": 1132 result = "#ifndef %s" % e[1][1] 1133 else: 1134 result = "#if " + str(self.expr) 1135 1136 # "#define" 1137 elif self.isDefine(): 1138 result = "#%s %s" % (self.directive, self.define_id) 1139 if self.tokens: 1140 result += " " 1141 expr = strip_space(' '.join([tok.id for tok in self.tokens])) 1142 # remove the space between name and '(' in function call 1143 result += re.sub(r'(\w+) \(', r'\1(', expr) 1144 1145 # "#error" 1146 # Concatenating tokens with a space separator, because they may 1147 # not be quoted and broken into several tokens 1148 elif self.directive == "error": 1149 result = "#error %s" % ' '.join([tok.id for tok in self.tokens]) 1150 1151 else: 1152 result = "#%s" % self.directive 1153 if self.tokens: 1154 result += " " 1155 result += ''.join([tok.id for tok in self.tokens]) 1156 else: 1157 lines, _ = self.format_blocks(self.tokens) 1158 result = '\n'.join(lines) 1159 1160 return result 1161 1162 1163class BlockList(object): 1164 """A convenience class used to hold and process a list of blocks. 1165 1166 It calls the cpp parser to get the blocks. 1167 """ 1168 1169 def __init__(self, blocks): 1170 self.blocks = blocks 1171 1172 def __len__(self): 1173 return len(self.blocks) 1174 1175 def __getitem__(self, n): 1176 return self.blocks[n] 1177 1178 def __repr__(self): 1179 return repr(self.blocks) 1180 1181 def __str__(self): 1182 result = '\n'.join([str(b) for b in self.blocks]) 1183 return result 1184 1185 def dump(self): 1186 """Dump all the blocks in current BlockList.""" 1187 print('##### BEGIN #####') 1188 for i, b in enumerate(self.blocks): 1189 print('### BLOCK %d ###' % i) 1190 print(b) 1191 print('##### END #####') 1192 1193 def optimizeIf01(self): 1194 """Remove the code between #if 0 .. #endif in a BlockList.""" 1195 self.blocks = optimize_if01(self.blocks) 1196 1197 def optimizeMacros(self, macros): 1198 """Remove known defined and undefined macros from a BlockList.""" 1199 for b in self.blocks: 1200 if b.isIf(): 1201 b.expr.optimize(macros) 1202 1203 def removeStructs(self, structs): 1204 """Remove structs.""" 1205 extra_includes = set() 1206 block_num = 0 1207 num_blocks = len(self.blocks) 1208 while block_num < num_blocks: 1209 b = self.blocks[block_num] 1210 block_num += 1 1211 # Have to look in each block for a top-level struct definition. 1212 if b.directive: 1213 continue 1214 num_tokens = len(b.tokens) 1215 # A struct definition usually looks like: 1216 # struct 1217 # ident 1218 # { 1219 # } 1220 # ; 1221 # However, the structure might be spread across multiple blocks 1222 # if the structure looks like this: 1223 # struct ident 1224 # { 1225 # #ifdef VARIABLE 1226 # pid_t pid; 1227 # #endif 1228 # }: 1229 # So the total number of tokens in the block might be less than 1230 # five but assume at least three. 1231 if num_tokens < 3: 1232 continue 1233 1234 # This is a simple struct finder, it might fail if a top-level 1235 # structure has an #if type directives that confuses the algorithm 1236 # for finding the end of the structure. Or if there is another 1237 # structure definition embedded in the structure. 1238 i = 0 1239 while i < num_tokens - 2: 1240 if (b.tokens[i].kind != TokenKind.KEYWORD or 1241 b.tokens[i].id != "struct"): 1242 i += 1 1243 continue 1244 if (b.tokens[i + 1].kind == TokenKind.IDENTIFIER and 1245 b.tokens[i + 2].kind == TokenKind.PUNCTUATION and 1246 b.tokens[i + 2].id == "{" and b.tokens[i + 1].id in structs): 1247 # Add an include for the structure to be removed of the form: 1248 # #include <bits/STRUCT_NAME.h> 1249 struct_token = b.tokens[i + 1] 1250 if struct_token.id in structs and structs[struct_token.id]: 1251 extra_includes.add("<%s>" % structs[struct_token.id]) 1252 1253 # Search forward for the end of the structure. 1254 # Very simple search, look for } and ; tokens. 1255 # If we hit the end of the block, we'll need to start 1256 # looking at the next block. 1257 j = i + 3 1258 depth = 1 1259 struct_removed = False 1260 while not struct_removed: 1261 while j < num_tokens: 1262 if b.tokens[j].kind == TokenKind.PUNCTUATION: 1263 if b.tokens[j].id == '{': 1264 depth += 1 1265 elif b.tokens[j].id == '}': 1266 depth -= 1 1267 elif b.tokens[j].id == ';' and depth == 0: 1268 b.tokens = b.tokens[0:i] + b.tokens[j + 1:num_tokens] 1269 num_tokens = len(b.tokens) 1270 struct_removed = True 1271 break 1272 j += 1 1273 if not struct_removed: 1274 b.tokens = b.tokens[0:i] 1275 1276 # Skip directive blocks. 1277 start_block = block_num 1278 while block_num < num_blocks: 1279 if not self.blocks[block_num].directive: 1280 break 1281 block_num += 1 1282 if block_num >= num_blocks: 1283 # Unparsable struct, error out. 1284 raise UnparseableStruct("Cannot remove struct %s: %s" % (struct_token.id, struct_token.location)) 1285 self.blocks = self.blocks[0:start_block] + self.blocks[block_num:num_blocks] 1286 num_blocks = len(self.blocks) 1287 b = self.blocks[start_block] 1288 block_num = start_block + 1 1289 num_tokens = len(b.tokens) 1290 i = 0 1291 j = 0 1292 continue 1293 i += 1 1294 1295 for extra_include in sorted(extra_includes): 1296 replacement = CppStringTokenizer(extra_include) 1297 self.blocks.insert(2, Block(replacement.tokens, directive='include')) 1298 1299 def optimizeAll(self, macros): 1300 self.optimizeMacros(macros) 1301 self.optimizeIf01() 1302 return 1303 1304 def findIncludes(self): 1305 """Return the list of included files in a BlockList.""" 1306 result = [] 1307 for b in self.blocks: 1308 i = b.isInclude() 1309 if i: 1310 result.append(i) 1311 return result 1312 1313 def write(self, out): 1314 indent = 0 1315 for b in self.blocks: 1316 indent = b.write(out, indent) 1317 1318 def removeVarsAndFuncs(self, keep): 1319 """Remove variable and function declarations. 1320 1321 All extern and static declarations corresponding to variable and 1322 function declarations are removed. We only accept typedefs and 1323 enum/structs/union declarations. 1324 1325 In addition, remove any macros expanding in the headers. Usually, 1326 these macros are static inline functions, which is why they are 1327 removed. 1328 1329 However, we keep the definitions corresponding to the set of known 1330 static inline functions in the set 'keep', which is useful 1331 for optimized byteorder swap functions and stuff like that. 1332 """ 1333 1334 # state = NORMAL => normal (i.e. LN + spaces) 1335 # state = OTHER_DECL => typedef/struct encountered, ends with ";" 1336 # state = VAR_DECL => var declaration encountered, ends with ";" 1337 # state = FUNC_DECL => func declaration encountered, ends with "}" 1338 NORMAL = 0 1339 OTHER_DECL = 1 1340 VAR_DECL = 2 1341 FUNC_DECL = 3 1342 1343 state = NORMAL 1344 depth = 0 1345 blocksToKeep = [] 1346 blocksInProgress = [] 1347 blocksOfDirectives = [] 1348 ident = "" 1349 state_token = "" 1350 macros = set() 1351 for block in self.blocks: 1352 if block.isDirective(): 1353 # Record all macros. 1354 if block.directive == 'define': 1355 macro_name = block.define_id 1356 paren_index = macro_name.find('(') 1357 if paren_index == -1: 1358 macros.add(macro_name) 1359 else: 1360 macros.add(macro_name[0:paren_index]) 1361 blocksInProgress.append(block) 1362 # If this is in a function/variable declaration, we might need 1363 # to emit the directives alone, so save them separately. 1364 blocksOfDirectives.append(block) 1365 continue 1366 1367 numTokens = len(block.tokens) 1368 lastTerminatorIndex = 0 1369 i = 0 1370 while i < numTokens: 1371 token_id = block.tokens[i].id 1372 terminator = False 1373 if token_id == '{': 1374 depth += 1 1375 if (i >= 2 and block.tokens[i-2].id == 'extern' and 1376 block.tokens[i-1].id == '"C"'): 1377 # For an extern "C" { pretend as though this is depth 0. 1378 depth -= 1 1379 elif token_id == '}': 1380 if depth > 0: 1381 depth -= 1 1382 if depth == 0: 1383 if state == OTHER_DECL: 1384 # Loop through until we hit the ';' 1385 i += 1 1386 while i < numTokens: 1387 if block.tokens[i].id == ';': 1388 token_id = ';' 1389 break 1390 i += 1 1391 # If we didn't hit the ';', just consider this the 1392 # terminator any way. 1393 terminator = True 1394 elif depth == 0: 1395 if token_id == ';': 1396 if state == NORMAL: 1397 blocksToKeep.extend(blocksInProgress) 1398 blocksInProgress = [] 1399 blocksOfDirectives = [] 1400 state = FUNC_DECL 1401 terminator = True 1402 elif (state == NORMAL and token_id == '(' and i >= 1 and 1403 block.tokens[i-1].kind == TokenKind.IDENTIFIER and 1404 block.tokens[i-1].id in macros): 1405 # This is a plain macro being expanded in the header 1406 # which needs to be removed. 1407 blocksToKeep.extend(blocksInProgress) 1408 if lastTerminatorIndex < i - 1: 1409 blocksToKeep.append(Block(block.tokens[lastTerminatorIndex:i-1])) 1410 blocksInProgress = [] 1411 blocksOfDirectives = [] 1412 1413 # Skip until we see the terminating ')' 1414 i += 1 1415 paren_depth = 1 1416 while i < numTokens: 1417 if block.tokens[i].id == ')': 1418 paren_depth -= 1 1419 if paren_depth == 0: 1420 break 1421 elif block.tokens[i].id == '(': 1422 paren_depth += 1 1423 i += 1 1424 lastTerminatorIndex = i + 1 1425 elif (state != FUNC_DECL and token_id == '(' and 1426 state_token != 'typedef'): 1427 blocksToKeep.extend(blocksInProgress) 1428 blocksInProgress = [] 1429 blocksOfDirectives = [] 1430 state = VAR_DECL 1431 elif state == NORMAL and token_id in ['struct', 'typedef', 1432 'enum', 'union', 1433 '__extension__']: 1434 state = OTHER_DECL 1435 state_token = token_id 1436 elif block.tokens[i].kind == TokenKind.IDENTIFIER: 1437 if state != VAR_DECL or ident == "": 1438 ident = token_id 1439 1440 if terminator: 1441 if state != VAR_DECL and state != FUNC_DECL or ident in keep: 1442 blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:i+1])) 1443 blocksToKeep.extend(blocksInProgress) 1444 else: 1445 # Only keep the directives found. 1446 blocksToKeep.extend(blocksOfDirectives) 1447 lastTerminatorIndex = i + 1 1448 blocksInProgress = [] 1449 blocksOfDirectives = [] 1450 state = NORMAL 1451 ident = "" 1452 state_token = "" 1453 i += 1 1454 if lastTerminatorIndex < numTokens: 1455 blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:numTokens])) 1456 if len(blocksInProgress) > 0: 1457 blocksToKeep.extend(blocksInProgress) 1458 self.blocks = blocksToKeep 1459 1460 def replaceTokens(self, replacements): 1461 """Replace tokens according to the given dict.""" 1462 for b in self.blocks: 1463 made_change = False 1464 if b.isInclude() is None: 1465 i = 0 1466 while i < len(b.tokens): 1467 tok = b.tokens[i] 1468 if tok.kind == TokenKind.IDENTIFIER: 1469 if tok.id in replacements: 1470 tok.id = replacements[tok.id] 1471 made_change = True 1472 i += 1 1473 1474 if b.isDefine(): 1475 tokens = CppStringTokenizer(b.define_id).tokens 1476 id_change = False 1477 for tok in tokens: 1478 if tok.kind == TokenKind.IDENTIFIER: 1479 if tok.id in replacements: 1480 tok.id = replacements[tok.id] 1481 id_change = True 1482 if id_change: 1483 b.define_id = ''.join([tok.id for tok in tokens]) 1484 made_change = True 1485 1486 1487 if made_change and b.isIf(): 1488 # Keep 'expr' in sync with 'tokens'. 1489 b.expr = CppExpr(b.tokens) 1490 1491 1492 1493def strip_space(s): 1494 """Strip out redundant space in a given string.""" 1495 1496 # NOTE: It ought to be more clever to not destroy spaces in string tokens. 1497 replacements = {' . ': '.', 1498 ' [': '[', 1499 '[ ': '[', 1500 ' ]': ']', 1501 '( ': '(', 1502 ' )': ')', 1503 ' ,': ',', 1504 '# ': '#', 1505 ' ;': ';', 1506 '~ ': '~', 1507 ' -> ': '->'} 1508 result = s 1509 for r in replacements: 1510 result = result.replace(r, replacements[r]) 1511 1512 # Remove the space between function name and the parenthesis. 1513 result = re.sub(r'(\w+) \(', r'\1(', result) 1514 return result 1515 1516 1517class BlockParser(object): 1518 """A class that converts an input source file into a BlockList object.""" 1519 1520 def __init__(self, tokzer=None): 1521 """Initialize a block parser. 1522 1523 The input source is provided through a Tokenizer object. 1524 """ 1525 self._tokzer = tokzer 1526 self._parsed = False 1527 1528 @property 1529 def parsed(self): 1530 return self._parsed 1531 1532 @staticmethod 1533 def _short_extent(extent): 1534 return '%d:%d - %d:%d' % (extent.start.line, extent.start.column, 1535 extent.end.line, extent.end.column) 1536 1537 def getBlocks(self, tokzer=None): 1538 """Return all the blocks parsed.""" 1539 1540 def consume_extent(i, tokens, extent=None, detect_change=False): 1541 """Return tokens that belong to the given extent. 1542 1543 It parses all the tokens that follow tokens[i], until getting out 1544 of the extent. When detect_change is True, it may terminate early 1545 when detecting preprocessing directives inside the extent. 1546 """ 1547 1548 result = [] 1549 if extent is None: 1550 extent = tokens[i].cursor.extent 1551 1552 while i < len(tokens) and tokens[i].location in extent: 1553 t = tokens[i] 1554 if debugBlockParser: 1555 print(' ' * 2, t.id, t.kind, t.cursor.kind) 1556 if (detect_change and t.cursor.extent != extent and 1557 t.cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE): 1558 break 1559 result.append(t) 1560 i += 1 1561 return (i, result) 1562 1563 def consume_line(i, tokens): 1564 """Return tokens that follow tokens[i] in the same line.""" 1565 result = [] 1566 line = tokens[i].location.line 1567 while i < len(tokens) and tokens[i].location.line == line: 1568 if tokens[i].cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE: 1569 break 1570 result.append(tokens[i]) 1571 i += 1 1572 return (i, result) 1573 1574 if tokzer is None: 1575 tokzer = self._tokzer 1576 tokens = tokzer.tokens 1577 1578 blocks = [] 1579 buf = [] 1580 i = 0 1581 1582 while i < len(tokens): 1583 t = tokens[i] 1584 cursor = t.cursor 1585 1586 if debugBlockParser: 1587 print ("%d: Processing [%s], kind=[%s], cursor=[%s], " 1588 "extent=[%s]" % (t.location.line, t.spelling, t.kind, 1589 cursor.kind, 1590 self._short_extent(cursor.extent))) 1591 1592 if cursor.kind == CursorKind.PREPROCESSING_DIRECTIVE: 1593 if buf: 1594 blocks.append(Block(buf)) 1595 buf = [] 1596 1597 j = i 1598 if j + 1 >= len(tokens): 1599 raise BadExpectedToken("### BAD TOKEN at %s" % (t.location)) 1600 directive = tokens[j+1].id 1601 1602 if directive == 'define': 1603 if i+2 >= len(tokens): 1604 raise BadExpectedToken("### BAD TOKEN at %s" % 1605 (tokens[i].location)) 1606 1607 # Skip '#' and 'define'. 1608 extent = tokens[i].cursor.extent 1609 i += 2 1610 id = '' 1611 # We need to separate the id from the remaining of 1612 # the line, especially for the function-like macro. 1613 if (i + 1 < len(tokens) and tokens[i+1].id == '(' and 1614 (tokens[i].location.column + len(tokens[i].spelling) == 1615 tokens[i+1].location.column)): 1616 while i < len(tokens): 1617 id += tokens[i].id 1618 if tokens[i].spelling == ')': 1619 i += 1 1620 break 1621 i += 1 1622 else: 1623 id += tokens[i].id 1624 # Advance to the next token that follows the macro id 1625 i += 1 1626 1627 (i, ret) = consume_extent(i, tokens, extent=extent) 1628 blocks.append(Block(ret, directive=directive, 1629 lineno=t.location.line, identifier=id)) 1630 1631 else: 1632 (i, ret) = consume_extent(i, tokens) 1633 blocks.append(Block(ret[2:], directive=directive, 1634 lineno=t.location.line)) 1635 1636 elif cursor.kind == CursorKind.INCLUSION_DIRECTIVE: 1637 if buf: 1638 blocks.append(Block(buf)) 1639 buf = [] 1640 directive = tokens[i+1].id 1641 (i, ret) = consume_extent(i, tokens) 1642 1643 blocks.append(Block(ret[2:], directive=directive, 1644 lineno=t.location.line)) 1645 1646 elif cursor.kind == CursorKind.VAR_DECL: 1647 if buf: 1648 blocks.append(Block(buf)) 1649 buf = [] 1650 1651 (i, ret) = consume_extent(i, tokens, detect_change=True) 1652 buf += ret 1653 1654 elif cursor.kind == CursorKind.FUNCTION_DECL: 1655 if buf: 1656 blocks.append(Block(buf)) 1657 buf = [] 1658 1659 (i, ret) = consume_extent(i, tokens, detect_change=True) 1660 buf += ret 1661 1662 else: 1663 (i, ret) = consume_line(i, tokens) 1664 buf += ret 1665 1666 if buf: 1667 blocks.append(Block(buf)) 1668 1669 # _parsed=True indicates a successful parsing, although may result an 1670 # empty BlockList. 1671 self._parsed = True 1672 1673 return BlockList(blocks) 1674 1675 def parse(self, tokzer): 1676 return self.getBlocks(tokzer) 1677 1678 def parseFile(self, path): 1679 return self.getBlocks(CppFileTokenizer(path)) 1680 1681 1682class BlockParserTests(unittest.TestCase): 1683 """BlockParser unit tests.""" 1684 1685 def get_blocks(self, lines): 1686 blocks = BlockParser().parse(CppStringTokenizer('\n'.join(lines))) 1687 return list(map(lambda a: str(a), blocks)) 1688 1689 def test_hash(self): 1690 self.assertEqual(self.get_blocks(["#error hello"]), ["#error hello"]) 1691 1692 def test_empty_line(self): 1693 self.assertEqual(self.get_blocks(["foo", "", "bar"]), ["foo bar"]) 1694 1695 def test_hash_with_space(self): 1696 # We currently cannot handle the following case with libclang properly. 1697 # Fortunately it doesn't appear in current headers. 1698 #self.assertEqual(self.get_blocks(["foo", " # ", "bar"]), ["foo", "bar"]) 1699 pass 1700 1701 def test_with_comment(self): 1702 self.assertEqual(self.get_blocks(["foo", 1703 " # /* ahah */ if defined(__KERNEL__) /* more */", 1704 "bar", "#endif"]), 1705 ["foo", "#ifdef __KERNEL__", "bar", "#endif"]) 1706 1707 1708################################################################################ 1709################################################################################ 1710##### ##### 1711##### B L O C K L I S T O P T I M I Z A T I O N ##### 1712##### ##### 1713################################################################################ 1714################################################################################ 1715 1716 1717def find_matching_endif(blocks, i): 1718 """Traverse the blocks to find out the matching #endif.""" 1719 n = len(blocks) 1720 depth = 1 1721 while i < n: 1722 if blocks[i].isDirective(): 1723 dir_ = blocks[i].directive 1724 if dir_ in ["if", "ifndef", "ifdef"]: 1725 depth += 1 1726 elif depth == 1 and dir_ in ["else", "elif"]: 1727 return i 1728 elif dir_ == "endif": 1729 depth -= 1 1730 if depth == 0: 1731 return i 1732 i += 1 1733 return i 1734 1735 1736def optimize_if01(blocks): 1737 """Remove the code between #if 0 .. #endif in a list of CppBlocks.""" 1738 i = 0 1739 n = len(blocks) 1740 result = [] 1741 while i < n: 1742 j = i 1743 while j < n and not blocks[j].isIf(): 1744 j += 1 1745 if j > i: 1746 logging.debug("appending lines %d to %d", blocks[i].lineno, 1747 blocks[j-1].lineno) 1748 result += blocks[i:j] 1749 if j >= n: 1750 break 1751 expr = blocks[j].expr 1752 r = expr.toInt() 1753 if r is None: 1754 result.append(blocks[j]) 1755 i = j + 1 1756 continue 1757 1758 if r == 0: 1759 # if 0 => skip everything until the corresponding #endif 1760 start_dir = blocks[j].directive 1761 j = find_matching_endif(blocks, j + 1) 1762 if j >= n: 1763 # unterminated #if 0, finish here 1764 break 1765 dir_ = blocks[j].directive 1766 if dir_ == "endif": 1767 logging.debug("remove 'if 0' .. 'endif' (lines %d to %d)", 1768 blocks[i].lineno, blocks[j].lineno) 1769 if start_dir == "elif": 1770 # Put an endif since we started with an elif. 1771 result += blocks[j:j+1] 1772 i = j + 1 1773 elif dir_ == "else": 1774 # convert 'else' into 'if 1' 1775 logging.debug("convert 'if 0' .. 'else' into 'if 1' (lines %d " 1776 "to %d)", blocks[i].lineno, blocks[j-1].lineno) 1777 if start_dir == "elif": 1778 blocks[j].directive = "elif" 1779 else: 1780 blocks[j].directive = "if" 1781 blocks[j].expr = CppExpr(CppStringTokenizer("1").tokens) 1782 i = j 1783 elif dir_ == "elif": 1784 # convert 'elif' into 'if' 1785 logging.debug("convert 'if 0' .. 'elif' into 'if'") 1786 if start_dir == "elif": 1787 blocks[j].directive = "elif" 1788 else: 1789 blocks[j].directive = "if" 1790 i = j 1791 continue 1792 1793 # if 1 => find corresponding endif and remove/transform them 1794 k = find_matching_endif(blocks, j + 1) 1795 if k >= n: 1796 # unterminated #if 1, finish here 1797 logging.debug("unterminated 'if 1'") 1798 result += blocks[j+1:k] 1799 break 1800 1801 start_dir = blocks[j].directive 1802 dir_ = blocks[k].directive 1803 if dir_ == "endif": 1804 logging.debug("convert 'if 1' .. 'endif' (lines %d to %d)", 1805 blocks[j].lineno, blocks[k].lineno) 1806 if start_dir == "elif": 1807 # Add the elif in to the results and convert it to an elif 1. 1808 blocks[j].tokens = CppStringTokenizer("1").tokens 1809 result += blocks[j:j+1] 1810 result += optimize_if01(blocks[j+1:k]) 1811 if start_dir == "elif": 1812 # Add the endif in to the results. 1813 result += blocks[k:k+1] 1814 i = k + 1 1815 elif dir_ == "else": 1816 # convert 'else' into 'if 0' 1817 logging.debug("convert 'if 1' .. 'else' (lines %d to %d)", 1818 blocks[j].lineno, blocks[k].lineno) 1819 if start_dir == "elif": 1820 # Add the elif in to the results and convert it to an elif 1. 1821 blocks[j].tokens = CppStringTokenizer("1").tokens 1822 result += blocks[j:j+1] 1823 result += optimize_if01(blocks[j+1:k]) 1824 if start_dir == "elif": 1825 blocks[k].directive = "elif" 1826 else: 1827 blocks[k].directive = "if" 1828 blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens) 1829 i = k 1830 elif dir_ == "elif": 1831 # convert 'elif' into 'if 0' 1832 logging.debug("convert 'if 1' .. 'elif' (lines %d to %d)", 1833 blocks[j].lineno, blocks[k].lineno) 1834 result += optimize_if01(blocks[j+1:k]) 1835 blocks[k].expr = CppExpr(CppStringTokenizer("0").tokens) 1836 i = k 1837 return result 1838 1839class OptimizerTests(unittest.TestCase): 1840 def parse(self, text, macros=None): 1841 out = utils.StringOutput() 1842 blocks = BlockParser().parse(CppStringTokenizer(text)) 1843 blocks.optimizeAll(macros) 1844 blocks.write(out) 1845 return out.get() 1846 1847 def test_if1(self): 1848 text = """\ 1849#if 1 1850#define GOOD 1851#endif 1852""" 1853 expected = """\ 1854#define GOOD 1855""" 1856 self.assertEqual(self.parse(text), expected) 1857 1858 def test_if0(self): 1859 text = """\ 1860#if 0 1861#define SHOULD_SKIP1 1862#define SHOULD_SKIP2 1863#endif 1864""" 1865 expected = "" 1866 self.assertEqual(self.parse(text), expected) 1867 1868 def test_if1_else(self): 1869 text = """\ 1870#if 1 1871#define GOOD 1872#else 1873#define BAD 1874#endif 1875""" 1876 expected = """\ 1877#define GOOD 1878""" 1879 self.assertEqual(self.parse(text), expected) 1880 1881 def test_if0_else(self): 1882 text = """\ 1883#if 0 1884#define BAD 1885#else 1886#define GOOD 1887#endif 1888""" 1889 expected = """\ 1890#define GOOD 1891""" 1892 self.assertEqual(self.parse(text), expected) 1893 1894 def test_if_elif1(self): 1895 text = """\ 1896#if defined(something) 1897#define EXISTS 1898#elif 1 1899#define GOOD 1900#endif 1901""" 1902 expected = """\ 1903#ifdef something 1904#define EXISTS 1905#elif 1 1906#define GOOD 1907#endif 1908""" 1909 self.assertEqual(self.parse(text), expected) 1910 1911 def test_if_elif1_macro(self): 1912 text = """\ 1913#if defined(something) 1914#define EXISTS 1915#elif defined(WILL_BE_ONE) 1916#define GOOD 1917#endif 1918""" 1919 expected = """\ 1920#ifdef something 1921#define EXISTS 1922#elif 1 1923#define GOOD 1924#endif 1925""" 1926 self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected) 1927 1928 1929 def test_if_elif1_else(self): 1930 text = """\ 1931#if defined(something) 1932#define EXISTS 1933#elif 1 1934#define GOOD 1935#else 1936#define BAD 1937#endif 1938""" 1939 expected = """\ 1940#ifdef something 1941#define EXISTS 1942#elif 1 1943#define GOOD 1944#endif 1945""" 1946 self.assertEqual(self.parse(text), expected) 1947 1948 def test_if_elif1_else_macro(self): 1949 text = """\ 1950#if defined(something) 1951#define EXISTS 1952#elif defined(WILL_BE_ONE) 1953#define GOOD 1954#else 1955#define BAD 1956#endif 1957""" 1958 expected = """\ 1959#ifdef something 1960#define EXISTS 1961#elif 1 1962#define GOOD 1963#endif 1964""" 1965 self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected) 1966 1967 1968 def test_if_elif1_else_macro(self): 1969 text = """\ 1970#if defined(something) 1971#define EXISTS 1972#elif defined(WILL_BE_ONE) 1973#define GOOD 1974#else 1975#define BAD 1976#endif 1977""" 1978 expected = """\ 1979#ifdef something 1980#define EXISTS 1981#elif 1 1982#define GOOD 1983#endif 1984""" 1985 self.assertEqual(self.parse(text, {"WILL_BE_ONE": "1"}), expected) 1986 1987 def test_macro_set_to_undefined_single(self): 1988 text = """\ 1989#if defined(__KERNEL__) 1990#define BAD_KERNEL 1991#endif 1992""" 1993 expected = "" 1994 macros = {"__KERNEL__": kCppUndefinedMacro} 1995 self.assertEqual(self.parse(text, macros), expected) 1996 1997 def test_macro_set_to_undefined_if(self): 1998 text = """\ 1999#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) 2000#define CHECK 2001#endif 2002""" 2003 expected = """\ 2004#if !defined(__GLIBC__) || __GLIBC__ < 2 2005#define CHECK 2006#endif 2007""" 2008 macros = {"__KERNEL__": kCppUndefinedMacro} 2009 self.assertEqual(self.parse(text, macros), expected) 2010 2011 def test_endif_comment_removed(self): 2012 text = """\ 2013#ifndef SIGRTMAX 2014#define SIGRTMAX 123 2015#endif /* SIGRTMAX */ 2016""" 2017 expected = """\ 2018#ifndef SIGRTMAX 2019#define SIGRTMAX 123 2020#endif 2021""" 2022 self.assertEqual(self.parse(text), expected) 2023 2024 def test_multilevel_if0(self): 2025 text = """\ 2026#if 0 2027#if 1 2028#define BAD_6 2029#endif 2030#endif 2031""" 2032 expected = "" 2033 self.assertEqual(self.parse(text), expected) 2034 2035class RemoveStructsTests(unittest.TestCase): 2036 def parse(self, text, structs): 2037 out = utils.StringOutput() 2038 blocks = BlockParser().parse(CppStringTokenizer(text)) 2039 blocks.removeStructs(structs) 2040 blocks.write(out) 2041 return out.get() 2042 2043 def test_remove_struct_from_start(self): 2044 text = """\ 2045struct remove { 2046 int val1; 2047 int val2; 2048}; 2049struct something { 2050 struct timeval val1; 2051 struct timeval val2; 2052}; 2053""" 2054 expected = """\ 2055struct something { 2056 struct timeval val1; 2057 struct timeval val2; 2058}; 2059""" 2060 self.assertEqual(self.parse(text, {"remove": True}), expected) 2061 2062 def test_remove_struct_from_end(self): 2063 text = """\ 2064struct something { 2065 struct timeval val1; 2066 struct timeval val2; 2067}; 2068struct remove { 2069 int val1; 2070 int val2; 2071}; 2072""" 2073 expected = """\ 2074struct something { 2075 struct timeval val1; 2076 struct timeval val2; 2077}; 2078""" 2079 self.assertEqual(self.parse(text, {"remove": True}), expected) 2080 2081 def test_remove_minimal_struct(self): 2082 text = """\ 2083struct remove { 2084}; 2085""" 2086 expected = ""; 2087 self.assertEqual(self.parse(text, {"remove": True}), expected) 2088 2089 def test_remove_struct_with_struct_fields(self): 2090 text = """\ 2091struct something { 2092 struct remove val1; 2093 struct remove val2; 2094}; 2095struct remove { 2096 int val1; 2097 struct something val3; 2098 int val2; 2099}; 2100""" 2101 expected = """\ 2102struct something { 2103 struct remove val1; 2104 struct remove val2; 2105}; 2106""" 2107 self.assertEqual(self.parse(text, {"remove": True}), expected) 2108 2109 def test_remove_consecutive_structs(self): 2110 text = """\ 2111struct keep1 { 2112 struct timeval val1; 2113 struct timeval val2; 2114}; 2115struct remove1 { 2116 int val1; 2117 int val2; 2118}; 2119struct remove2 { 2120 int val1; 2121 int val2; 2122 int val3; 2123}; 2124struct keep2 { 2125 struct timeval val1; 2126 struct timeval val2; 2127}; 2128""" 2129 expected = """\ 2130struct keep1 { 2131 struct timeval val1; 2132 struct timeval val2; 2133}; 2134struct keep2 { 2135 struct timeval val1; 2136 struct timeval val2; 2137}; 2138""" 2139 self.assertEqual(self.parse(text, {"remove1": True, "remove2": True}), expected) 2140 2141 def test_remove_multiple_structs(self): 2142 text = """\ 2143struct keep1 { 2144 int val; 2145}; 2146struct remove1 { 2147 int val1; 2148 int val2; 2149}; 2150struct keep2 { 2151 int val; 2152}; 2153struct remove2 { 2154 struct timeval val1; 2155 struct timeval val2; 2156}; 2157struct keep3 { 2158 int val; 2159}; 2160""" 2161 expected = """\ 2162struct keep1 { 2163 int val; 2164}; 2165struct keep2 { 2166 int val; 2167}; 2168struct keep3 { 2169 int val; 2170}; 2171""" 2172 self.assertEqual(self.parse(text, {"remove1": True, "remove2": True}), expected) 2173 2174 def test_remove_struct_with_inline_structs(self): 2175 text = """\ 2176struct remove { 2177 int val1; 2178 int val2; 2179 struct { 2180 int val1; 2181 struct { 2182 int val1; 2183 } level2; 2184 } level1; 2185}; 2186struct something { 2187 struct timeval val1; 2188 struct timeval val2; 2189}; 2190""" 2191 expected = """\ 2192struct something { 2193 struct timeval val1; 2194 struct timeval val2; 2195}; 2196""" 2197 self.assertEqual(self.parse(text, {"remove": True}), expected) 2198 2199 def test_remove_struct_across_blocks(self): 2200 text = """\ 2201struct remove { 2202 int val1; 2203 int val2; 2204#ifdef PARAMETER1 2205 PARAMETER1 2206#endif 2207#ifdef PARAMETER2 2208 PARAMETER2 2209#endif 2210}; 2211struct something { 2212 struct timeval val1; 2213 struct timeval val2; 2214}; 2215""" 2216 expected = """\ 2217struct something { 2218 struct timeval val1; 2219 struct timeval val2; 2220}; 2221""" 2222 self.assertEqual(self.parse(text, {"remove": True}), expected) 2223 2224 def test_remove_struct_across_blocks_multiple_structs(self): 2225 text = """\ 2226struct remove1 { 2227 int val1; 2228 int val2; 2229#ifdef PARAMETER1 2230 PARAMETER1 2231#endif 2232#ifdef PARAMETER2 2233 PARAMETER2 2234#endif 2235}; 2236struct remove2 { 2237}; 2238struct something { 2239 struct timeval val1; 2240 struct timeval val2; 2241}; 2242""" 2243 expected = """\ 2244struct something { 2245 struct timeval val1; 2246 struct timeval val2; 2247}; 2248""" 2249 self.assertEqual(self.parse(text, {"remove1": True, "remove2": True}), expected) 2250 2251 def test_remove_multiple_struct_and_add_includes(self): 2252 text = """\ 2253struct remove1 { 2254 int val1; 2255 int val2; 2256}; 2257struct remove2 { 2258 struct timeval val1; 2259 struct timeval val2; 2260}; 2261""" 2262 expected = """\ 2263#include <bits/remove1.h> 2264#include <bits/remove2.h> 2265""" 2266 self.assertEqual(self.parse(text, {"remove1": False, "remove2": False}), expected) 2267 2268 2269class FullPathTest(unittest.TestCase): 2270 """Test of the full path parsing.""" 2271 2272 def parse(self, text, keep=None): 2273 if not keep: 2274 keep = set() 2275 out = utils.StringOutput() 2276 blocks = BlockParser().parse(CppStringTokenizer(text)) 2277 2278 blocks.removeStructs(kernel_structs_to_remove) 2279 blocks.removeVarsAndFuncs(keep) 2280 blocks.replaceTokens(kernel_token_replacements) 2281 blocks.optimizeAll(None) 2282 2283 blocks.write(out) 2284 return out.get() 2285 2286 def test_function_removed(self): 2287 text = """\ 2288static inline __u64 function() 2289{ 2290} 2291""" 2292 expected = "" 2293 self.assertEqual(self.parse(text), expected) 2294 2295 def test_function_removed_with_struct(self): 2296 text = """\ 2297static inline struct something* function() 2298{ 2299} 2300""" 2301 expected = "" 2302 self.assertEqual(self.parse(text), expected) 2303 2304 def test_function_kept(self): 2305 text = """\ 2306static inline __u64 function() 2307{ 2308} 2309""" 2310 expected = """\ 2311static inline __u64 function() { 2312} 2313""" 2314 self.assertEqual(self.parse(text, set(["function"])), expected) 2315 2316 def test_var_removed(self): 2317 text = "__u64 variable;" 2318 expected = "" 2319 self.assertEqual(self.parse(text), expected) 2320 2321 def test_var_kept(self): 2322 text = "__u64 variable;" 2323 expected = "__u64 variable;\n" 2324 self.assertEqual(self.parse(text, set(["variable"])), expected) 2325 2326 def test_keep_function_typedef(self): 2327 text = "typedef void somefunction_t(void);" 2328 expected = "typedef void somefunction_t(void);\n" 2329 self.assertEqual(self.parse(text), expected) 2330 2331 def test_struct_keep_attribute(self): 2332 text = """\ 2333struct something_s { 2334 __u32 s1; 2335 __u32 s2; 2336} __attribute__((packed)); 2337""" 2338 expected = """\ 2339struct something_s { 2340 __u32 s1; 2341 __u32 s2; 2342} __attribute__((packed)); 2343""" 2344 self.assertEqual(self.parse(text), expected) 2345 2346 def test_function_keep_attribute_structs(self): 2347 text = """\ 2348static __inline__ struct some_struct1 * function(struct some_struct2 * e) { 2349} 2350""" 2351 expected = """\ 2352static __inline__ struct some_struct1 * function(struct some_struct2 * e) { 2353} 2354""" 2355 self.assertEqual(self.parse(text, set(["function"])), expected) 2356 2357 def test_struct_after_struct(self): 2358 text = """\ 2359struct first { 2360}; 2361 2362struct second { 2363 unsigned short s1; 2364#define SOMETHING 8 2365 unsigned short s2; 2366}; 2367""" 2368 expected = """\ 2369struct first { 2370}; 2371struct second { 2372 unsigned short s1; 2373#define SOMETHING 8 2374 unsigned short s2; 2375}; 2376""" 2377 self.assertEqual(self.parse(text), expected) 2378 2379 def test_other_not_removed(self): 2380 text = """\ 2381typedef union { 2382 __u64 tu1; 2383 __u64 tu2; 2384} typedef_name; 2385 2386union { 2387 __u64 u1; 2388 __u64 u2; 2389}; 2390 2391struct { 2392 __u64 s1; 2393 __u64 s2; 2394}; 2395 2396enum { 2397 ENUM1 = 0, 2398 ENUM2, 2399}; 2400 2401__extension__ typedef __signed__ long long __s64; 2402""" 2403 expected = """\ 2404typedef union { 2405 __u64 tu1; 2406 __u64 tu2; 2407} typedef_name; 2408union { 2409 __u64 u1; 2410 __u64 u2; 2411}; 2412struct { 2413 __u64 s1; 2414 __u64 s2; 2415}; 2416enum { 2417 ENUM1 = 0, 2418 ENUM2, 2419}; 2420__extension__ typedef __signed__ long long __s64; 2421""" 2422 2423 self.assertEqual(self.parse(text), expected) 2424 2425 def test_semicolon_after_function(self): 2426 text = """\ 2427static inline __u64 function() 2428{ 2429}; 2430 2431struct should_see { 2432 __u32 field; 2433}; 2434""" 2435 expected = """\ 2436struct should_see { 2437 __u32 field; 2438}; 2439""" 2440 self.assertEqual(self.parse(text), expected) 2441 2442 def test_define_in_middle_keep(self): 2443 text = """\ 2444enum { 2445 ENUM0 = 0x10, 2446 ENUM1 = 0x20, 2447#define SOMETHING SOMETHING_ELSE 2448 ENUM2 = 0x40, 2449}; 2450""" 2451 expected = """\ 2452enum { 2453 ENUM0 = 0x10, 2454 ENUM1 = 0x20, 2455#define SOMETHING SOMETHING_ELSE 2456 ENUM2 = 0x40, 2457}; 2458""" 2459 self.assertEqual(self.parse(text), expected) 2460 2461 def test_define_in_middle_remove(self): 2462 text = """\ 2463static inline function() { 2464#define SOMETHING1 SOMETHING_ELSE1 2465 i = 0; 2466 { 2467 i = 1; 2468 } 2469#define SOMETHING2 SOMETHING_ELSE2 2470} 2471""" 2472 expected = """\ 2473#define SOMETHING1 SOMETHING_ELSE1 2474#define SOMETHING2 SOMETHING_ELSE2 2475""" 2476 self.assertEqual(self.parse(text), expected) 2477 2478 def test_define_in_middle_force_keep(self): 2479 text = """\ 2480static inline function() { 2481#define SOMETHING1 SOMETHING_ELSE1 2482 i = 0; 2483 { 2484 i = 1; 2485 } 2486#define SOMETHING2 SOMETHING_ELSE2 2487} 2488""" 2489 expected = """\ 2490static inline function() { 2491#define SOMETHING1 SOMETHING_ELSE1 2492 i = 0; 2493 { 2494 i = 1; 2495 } 2496#define SOMETHING2 SOMETHING_ELSE2 2497} 2498""" 2499 self.assertEqual(self.parse(text, set(["function"])), expected) 2500 2501 def test_define_before_remove(self): 2502 text = """\ 2503#define SHOULD_BE_KEPT NOTHING1 2504#define ANOTHER_TO_KEEP NOTHING2 2505static inline function() { 2506#define SOMETHING1 SOMETHING_ELSE1 2507 i = 0; 2508 { 2509 i = 1; 2510 } 2511#define SOMETHING2 SOMETHING_ELSE2 2512} 2513""" 2514 expected = """\ 2515#define SHOULD_BE_KEPT NOTHING1 2516#define ANOTHER_TO_KEEP NOTHING2 2517#define SOMETHING1 SOMETHING_ELSE1 2518#define SOMETHING2 SOMETHING_ELSE2 2519""" 2520 self.assertEqual(self.parse(text), expected) 2521 2522 def test_extern_C(self): 2523 text = """\ 2524#if defined(__cplusplus) 2525extern "C" { 2526#endif 2527 2528struct something { 2529}; 2530 2531#if defined(__cplusplus) 2532} 2533#endif 2534""" 2535 expected = """\ 2536#ifdef __cplusplus 2537extern "C" { 2538#endif 2539struct something { 2540}; 2541#ifdef __cplusplus 2542} 2543#endif 2544""" 2545 self.assertEqual(self.parse(text), expected) 2546 2547 def test_macro_definition_removed(self): 2548 text = """\ 2549#define MACRO_FUNCTION_NO_PARAMS static inline some_func() {} 2550MACRO_FUNCTION_NO_PARAMS() 2551 2552#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; } 2553MACRO_FUNCTION_PARAMS(a = 1) 2554 2555something that should still be kept 2556MACRO_FUNCTION_PARAMS(b) 2557""" 2558 expected = """\ 2559#define MACRO_FUNCTION_NO_PARAMS static inline some_func() { } 2560#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; } 2561something that should still be kept 2562""" 2563 self.assertEqual(self.parse(text), expected) 2564 2565 def test_verify_timeval_itemerval(self): 2566 text = """\ 2567struct __kernel_old_timeval { 2568 struct something val; 2569}; 2570struct __kernel_old_itimerval { 2571 struct __kernel_old_timeval val; 2572}; 2573struct fields { 2574 struct __kernel_old_timeval timeval; 2575 struct __kernel_old_itimerval itimerval; 2576}; 2577""" 2578 expected = """\ 2579struct fields { 2580 struct timeval timeval; 2581 struct itimerval itimerval; 2582}; 2583""" 2584 self.assertEqual(self.parse(text), expected) 2585 2586 def test_token_replacement(self): 2587 text = """\ 2588#define SIGRTMIN 32 2589#define SIGRTMAX _NSIG 2590#define SIGRTMAX(a,class) some_func(a, class) 2591""" 2592 expected = """\ 2593#define __SIGRTMIN 32 2594#define __SIGRTMAX _KERNEL__NSIG 2595#define __SIGRTMAX(a,__linux_class) some_func(a, __linux_class) 2596""" 2597 self.assertEqual(self.parse(text), expected) 2598 2599 2600if __name__ == '__main__': 2601 unittest.main() 2602