1# a glorified C pre-processor parser 2 3import sys, re, string 4from utils import * 5from defaults import * 6 7debugTokens = False 8debugDirectiveTokenizer = False 9debugLineParsing = False 10debugCppExpr = False 11debugOptimIf01 = False 12 13##################################################################################### 14##################################################################################### 15##### ##### 16##### C P P T O K E N S ##### 17##### ##### 18##################################################################################### 19##################################################################################### 20 21# the list of supported C-preprocessor tokens 22# plus a couple of C tokens as well 23tokEOF = "\0" 24tokLN = "\n" 25tokSTRINGIFY = "#" 26tokCONCAT = "##" 27tokLOGICAND = "&&" 28tokLOGICOR = "||" 29tokSHL = "<<" 30tokSHR = ">>" 31tokEQUAL = "==" 32tokNEQUAL = "!=" 33tokLT = "<" 34tokLTE = "<=" 35tokGT = ">" 36tokGTE = ">=" 37tokELLIPSIS = "..." 38tokSPACE = " " 39tokDEFINED = "defined" 40tokLPAREN = "(" 41tokRPAREN = ")" 42tokNOT = "!" 43tokPLUS = "+" 44tokMINUS = "-" 45tokMULTIPLY = "*" 46tokDIVIDE = "/" 47tokMODULUS = "%" 48tokBINAND = "&" 49tokBINOR = "|" 50tokBINXOR = "^" 51tokCOMMA = "," 52tokLBRACE = "{" 53tokRBRACE = "}" 54tokARROW = "->" 55tokINCREMENT = "++" 56tokDECREMENT = "--" 57tokNUMBER = "<number>" 58tokIDENT = "<ident>" 59tokSTRING = "<string>" 60 61class Token: 62 """a simple class to hold information about a given token. 63 each token has a position in the source code, as well as 64 an 'id' and a 'value'. the id is a string that identifies 65 the token's class, while the value is the string of the 66 original token itself. 67 68 for example, the tokenizer concatenates a series of spaces 69 and tabs as a single tokSPACE id, whose value if the original 70 spaces+tabs sequence.""" 71 72 def __init__(self): 73 self.id = None 74 self.value = None 75 self.lineno = 0 76 self.colno = 0 77 78 def set(self,id,val=None): 79 self.id = id 80 if val: 81 self.value = val 82 else: 83 self.value = id 84 return None 85 86 def copyFrom(self,src): 87 self.id = src.id 88 self.value = src.value 89 self.lineno = src.lineno 90 self.colno = src.colno 91 92 def __repr__(self): 93 if self.id == tokIDENT: 94 return "(ident %s)" % self.value 95 if self.id == tokNUMBER: 96 return "(number %s)" % self.value 97 if self.id == tokSTRING: 98 return "(string '%s')" % self.value 99 if self.id == tokLN: 100 return "<LN>" 101 if self.id == tokEOF: 102 return "<EOF>" 103 if self.id == tokSPACE and self.value == "\\": 104 # this corresponds to a trailing \ that was transformed into a tokSPACE 105 return "<\\>" 106 107 return self.id 108 109 def __str__(self): 110 if self.id == tokIDENT: 111 return self.value 112 if self.id == tokNUMBER: 113 return self.value 114 if self.id == tokSTRING: 115 return self.value 116 if self.id == tokEOF: 117 return "<EOF>" 118 if self.id == tokSPACE: 119 if self.value == "\\": # trailing \ 120 return "\\\n" 121 else: 122 return self.value 123 124 return self.id 125 126class BadExpectedToken(Exception): 127 def __init__(self,msg): 128 print msg 129 130 131##################################################################################### 132##################################################################################### 133##### ##### 134##### C P P T O K E N I Z E R ##### 135##### ##### 136##################################################################################### 137##################################################################################### 138 139# list of long symbols, i.e. those that take more than one characters 140cppLongSymbols = [ tokCONCAT, tokLOGICAND, tokLOGICOR, tokSHL, tokSHR, tokELLIPSIS, tokEQUAL,\ 141 tokNEQUAL, tokLTE, tokGTE, tokARROW, tokINCREMENT, tokDECREMENT ] 142 143class CppTokenizer: 144 """an abstract class used to convert some input text into a list 145 of tokens. real implementations follow and differ in the format 146 of the input text only""" 147 148 def __init__(self): 149 """initialize a new CppTokenizer object""" 150 self.eof = False # end of file reached ? 151 self.text = None # content of current line, with final \n stripped 152 self.line = 0 # number of current line 153 self.pos = 0 # current character position in current line 154 self.len = 0 # length of current line text 155 self.held = Token() 156 157 def setLineText(self,line): 158 """set the content of the (next) current line. should be called 159 by fillLineText() in derived classes""" 160 self.text = line 161 self.len = len(line) 162 self.pos = 0 163 164 def fillLineText(self): 165 """refresh the content of 'line' with a new line of input""" 166 # to be overriden 167 self.eof = True 168 169 def markPos(self,tok): 170 """mark the position of the current token in the source file""" 171 if self.eof or self.pos > self.len: 172 tok.lineno = self.line + 1 173 tok.colno = 0 174 else: 175 tok.lineno = self.line 176 tok.colno = self.pos 177 178 def peekChar(self): 179 """return the current token under the cursor without moving it""" 180 if self.eof: 181 return tokEOF 182 183 if self.pos > self.len: 184 self.pos = 0 185 self.line += 1 186 self.fillLineText() 187 if self.eof: 188 return tokEOF 189 190 if self.pos == self.len: 191 return tokLN 192 else: 193 return self.text[self.pos] 194 195 def peekNChar(self,n): 196 """try to peek the next n chars on the same line""" 197 if self.pos + n > self.len: 198 return None 199 return self.text[self.pos:self.pos+n] 200 201 def skipChar(self): 202 """increment the token cursor position""" 203 if not self.eof: 204 self.pos += 1 205 206 def skipNChars(self,n): 207 if self.pos + n <= self.len: 208 self.pos += n 209 else: 210 while n > 0: 211 self.skipChar() 212 n -= 1 213 214 def nextChar(self): 215 """retrieve the token at the current cursor position, then skip it""" 216 result = self.peekChar() 217 self.skipChar() 218 return result 219 220 def getEscape(self): 221 # try to get all characters after a backslash (\) 222 result = self.nextChar() 223 if result == "0": 224 # octal number ? 225 num = self.peekNChar(3) 226 if num != None: 227 isOctal = True 228 for d in num: 229 if not d in "01234567": 230 isOctal = False 231 break 232 if isOctal: 233 result += num 234 self.skipNChars(3) 235 elif result == "x" or result == "X": 236 # hex number ? 237 num = self.peekNChar(2) 238 if num != None: 239 isHex = True 240 for d in num: 241 if not d in "012345678abcdefABCDEF": 242 isHex = False 243 break 244 if isHex: 245 result += num 246 self.skipNChars(2) 247 elif result == "u" or result == "U": 248 # unicode char ? 249 num = self.peekNChar(4) 250 if num != None: 251 isHex = True 252 for d in num: 253 if not d in "012345678abcdefABCDEF": 254 isHex = False 255 break 256 if isHex: 257 result += num 258 self.skipNChars(4) 259 260 return result 261 262 def nextRealToken(self,tok): 263 """return next CPP token, used internally by nextToken()""" 264 c = self.nextChar() 265 if c == tokEOF or c == tokLN: 266 return tok.set(c) 267 268 if c == '/': 269 c = self.peekChar() 270 if c == '/': # C++ comment line 271 self.skipChar() 272 while 1: 273 c = self.nextChar() 274 if c == tokEOF or c == tokLN: 275 break 276 return tok.set(tokLN) 277 if c == '*': # C comment start 278 self.skipChar() 279 value = "/*" 280 prev_c = None 281 while 1: 282 c = self.nextChar() 283 if c == tokEOF: 284 return tok.set(tokEOF,value) 285 if c == '/' and prev_c == '*': 286 break 287 prev_c = c 288 value += c 289 290 value += "/" 291 return tok.set(tokSPACE,value) 292 c = '/' 293 294 if c.isspace(): 295 while 1: 296 c2 = self.peekChar() 297 if c2 == tokLN or not c2.isspace(): 298 break 299 c += c2 300 self.skipChar() 301 return tok.set(tokSPACE,c) 302 303 if c == '\\': 304 if debugTokens: 305 print "nextRealToken: \\ found, next token is '%s'" % repr(self.peekChar()) 306 if self.peekChar() == tokLN: # trailing \ 307 # eat the tokLN 308 self.skipChar() 309 # we replace a trailing \ by a tokSPACE whose value is 310 # simply "\\". this allows us to detect them later when 311 # needed. 312 return tok.set(tokSPACE,"\\") 313 else: 314 # treat as a single token here ? 315 c +=self.getEscape() 316 return tok.set(c) 317 318 if c == "'": # chars 319 c2 = self.nextChar() 320 c += c2 321 if c2 == '\\': 322 c += self.getEscape() 323 324 while 1: 325 c2 = self.nextChar() 326 if c2 == tokEOF: 327 break 328 c += c2 329 if c2 == "'": 330 break 331 332 return tok.set(tokSTRING, c) 333 334 if c == '"': # strings 335 quote = 0 336 while 1: 337 c2 = self.nextChar() 338 if c2 == tokEOF: 339 return tok.set(tokSTRING,c) 340 341 c += c2 342 if not quote: 343 if c2 == '"': 344 return tok.set(tokSTRING,c) 345 if c2 == "\\": 346 quote = 1 347 else: 348 quote = 0 349 350 if c >= "0" and c <= "9": # integers ? 351 while 1: 352 c2 = self.peekChar() 353 if c2 == tokLN or (not c2.isalnum() and c2 != "_"): 354 break 355 c += c2 356 self.skipChar() 357 return tok.set(tokNUMBER,c) 358 359 if c.isalnum() or c == "_": # identifiers ? 360 while 1: 361 c2 = self.peekChar() 362 if c2 == tokLN or (not c2.isalnum() and c2 != "_"): 363 break 364 c += c2 365 self.skipChar() 366 if c == tokDEFINED: 367 return tok.set(tokDEFINED) 368 else: 369 return tok.set(tokIDENT,c) 370 371 # check special symbols 372 for sk in cppLongSymbols: 373 if c == sk[0]: 374 sklen = len(sk[1:]) 375 if self.pos + sklen <= self.len and \ 376 self.text[self.pos:self.pos+sklen] == sk[1:]: 377 self.pos += sklen 378 return tok.set(sk) 379 380 return tok.set(c) 381 382 def nextToken(self,tok): 383 """return the next token from the input text. this function 384 really updates 'tok', and does not return a new one""" 385 self.markPos(tok) 386 self.nextRealToken(tok) 387 388 def getToken(self): 389 tok = Token() 390 self.nextToken(tok) 391 if debugTokens: 392 print "getTokens: %s" % repr(tok) 393 return tok 394 395 def toTokenList(self): 396 """convert the input text of a CppTokenizer into a direct 397 list of token objects. tokEOF is stripped from the result""" 398 result = [] 399 while 1: 400 tok = Token() 401 self.nextToken(tok) 402 if tok.id == tokEOF: 403 break 404 result.append(tok) 405 return result 406 407class CppLineTokenizer(CppTokenizer): 408 """a CppTokenizer derived class that accepts a single line of text as input""" 409 def __init__(self,line,lineno=1): 410 CppTokenizer.__init__(self) 411 self.line = lineno 412 self.setLineText(line) 413 414 415class CppLinesTokenizer(CppTokenizer): 416 """a CppTokenizer derived class that accepts a list of texdt lines as input. 417 the lines must not have a trailing \n""" 418 def __init__(self,lines=[],lineno=1): 419 """initialize a CppLinesTokenizer. you can later add lines using addLines()""" 420 CppTokenizer.__init__(self) 421 self.line = lineno 422 self.lines = lines 423 self.index = 0 424 self.count = len(lines) 425 426 if self.count > 0: 427 self.fillLineText() 428 else: 429 self.eof = True 430 431 def addLine(self,line): 432 """add a line to a CppLinesTokenizer. this can be done after tokenization 433 happens""" 434 if self.count == 0: 435 self.setLineText(line) 436 self.index = 1 437 self.lines.append(line) 438 self.count += 1 439 self.eof = False 440 441 def fillLineText(self): 442 if self.index < self.count: 443 self.setLineText(self.lines[self.index]) 444 self.index += 1 445 else: 446 self.eof = True 447 448 449class CppFileTokenizer(CppTokenizer): 450 def __init__(self,file,lineno=1): 451 CppTokenizer.__init__(self) 452 self.file = file 453 self.line = lineno 454 455 def fillLineText(self): 456 line = self.file.readline() 457 if len(line) > 0: 458 if line[-1] == '\n': 459 line = line[:-1] 460 if len(line) > 0 and line[-1] == "\r": 461 line = line[:-1] 462 self.setLineText(line) 463 else: 464 self.eof = True 465 466# Unit testing 467# 468class CppTokenizerTester: 469 """a class used to test CppTokenizer classes""" 470 def __init__(self,tokenizer=None): 471 self.tokenizer = tokenizer 472 self.token = Token() 473 474 def setTokenizer(self,tokenizer): 475 self.tokenizer = tokenizer 476 477 def expect(self,id): 478 self.tokenizer.nextToken(self.token) 479 tokid = self.token.id 480 if tokid == id: 481 return 482 if self.token.value == id and (tokid == tokIDENT or tokid == tokNUMBER): 483 return 484 raise BadExpectedToken, "### BAD TOKEN: '%s' expecting '%s'" % (self.token.id,id) 485 486 def expectToken(self,id,line,col): 487 self.expect(id) 488 if self.token.lineno != line: 489 raise BadExpectedToken, "### BAD LINENO: token '%s' got '%d' expecting '%d'" % (id,self.token.lineno,line) 490 if self.token.colno != col: 491 raise BadExpectedToken, "### BAD COLNO: '%d' expecting '%d'" % (self.token.colno,col) 492 493 def expectTokenVal(self,id,value,line,col): 494 self.expectToken(id,line,col) 495 if self.token.value != value: 496 raise BadExpectedToken, "### BAD VALUE: '%s' expecting '%s'" % (self.token.value,value) 497 498 def expectList(self,list): 499 for item in list: 500 self.expect(item) 501 502def test_CppTokenizer(): 503 tester = CppTokenizerTester() 504 505 tester.setTokenizer( CppLineTokenizer("#an/example && (01923_xy)") ) 506 tester.expectList( ["#", "an", "/", "example", tokSPACE, tokLOGICAND, tokSPACE, tokLPAREN, "01923_xy", \ 507 tokRPAREN, tokLN, tokEOF] ) 508 509 tester.setTokenizer( CppLineTokenizer("FOO(BAR) && defined(BAZ)") ) 510 tester.expectList( ["FOO", tokLPAREN, "BAR", tokRPAREN, tokSPACE, tokLOGICAND, tokSPACE, 511 tokDEFINED, tokLPAREN, "BAZ", tokRPAREN, tokLN, tokEOF] ) 512 513 tester.setTokenizer( CppLinesTokenizer( ["/*", "#", "*/"] ) ) 514 tester.expectList( [ tokSPACE, tokLN, tokEOF ] ) 515 516 tester.setTokenizer( CppLinesTokenizer( ["first", "second"] ) ) 517 tester.expectList( [ "first", tokLN, "second", tokLN, tokEOF ] ) 518 519 tester.setTokenizer( CppLinesTokenizer( ["first second", " third"] ) ) 520 tester.expectToken( "first", 1, 0 ) 521 tester.expectToken( tokSPACE, 1, 5 ) 522 tester.expectToken( "second", 1, 6 ) 523 tester.expectToken( tokLN, 1, 12 ) 524 tester.expectToken( tokSPACE, 2, 0 ) 525 tester.expectToken( "third", 2, 2 ) 526 527 tester.setTokenizer( CppLinesTokenizer( [ "boo /* what the", "hell */" ] ) ) 528 tester.expectList( [ "boo", tokSPACE ] ) 529 tester.expectTokenVal( tokSPACE, "/* what the\nhell */", 1, 4 ) 530 tester.expectList( [ tokLN, tokEOF ] ) 531 532 tester.setTokenizer( CppLinesTokenizer( [ "an \\", " example" ] ) ) 533 tester.expectToken( "an", 1, 0 ) 534 tester.expectToken( tokSPACE, 1, 2 ) 535 tester.expectTokenVal( tokSPACE, "\\", 1, 3 ) 536 tester.expectToken( tokSPACE, 2, 0 ) 537 tester.expectToken( "example", 2, 1 ) 538 tester.expectToken( tokLN, 2, 8 ) 539 540 return True 541 542 543##################################################################################### 544##################################################################################### 545##### ##### 546##### C P P E X P R E S S I O N S ##### 547##### ##### 548##################################################################################### 549##################################################################################### 550 551class CppExpr: 552 """a class that models the condition of #if directives into 553 an expression tree. each node in the tree is of the form (op,arg) or (op,arg1,arg2) 554 where "op" is a string describing the operation""" 555 556 unaries = [ "!", "~" ] 557 binaries = [ "+", "-", "<", "<=", ">=", ">", "&&", "||", "*", "/", "%", "&", "|", "^", "<<", ">>", "==", "!=", "?", ":" ] 558 precedences = { 559 "?": 1, ":": 1, 560 "||": 2, 561 "&&": 3, 562 "|": 4, 563 "^": 5, 564 "&": 6, 565 "==": 7, "!=": 7, 566 "<": 8, "<=": 8, ">": 8, ">=": 8, 567 "<<": 9, ">>": 9, 568 "+": 10, "-": 10, 569 "*": 11, "/": 11, "%": 11, 570 "!": 12, "~": 12 571 } 572 573 re_cpp_constant = re.compile(r"((\d|\w|_)+)") 574 575 def __init__(self, tokens): 576 """initialize a CppExpr. 'tokens' must be a CppToken list""" 577 self.tok = tokens 578 self.n = len(tokens) 579 self.i = 0 580 if debugCppExpr: 581 print "CppExpr: trying to parse %s" % repr(tokens) 582 self.expr = self.parseExpression(0) 583 if debugCppExpr: 584 print "CppExpr: got " + repr(self.expr) 585 if self.i != self.n: 586 print 'crap at end of input (%d != %d): %s' % (self.i, self.n, repr(tokens)) 587 raise 588 589 590 def throw(self, exception, msg): 591 if self.i < self.n: 592 tok = self.tok[self.i] 593 print "%d:%d: %s" % (tok.lineno,tok.colno,msg) 594 else: 595 print "EOF: %s" % msg 596 raise exception(msg) 597 598 599 def skip_spaces(self): 600 """skip spaces in input token list""" 601 while self.i < self.n: 602 t = self.tok[self.i] 603 if t.id != tokSPACE and t.id != tokLN: 604 break 605 self.i += 1 606 607 608 def expectId(self, id): 609 """check that a given token id is at the current position, then skip over it""" 610 self.skip_spaces() 611 if self.i >= self.n or self.tok[self.i].id != id: 612 self.throw(BadExpectedToken,self.i,"### expecting '%s' in expression, got '%s'" % (id, self.tok[self.i].id)) 613 self.i += 1 614 615 616 def expectIdent(self): 617 self.skip_spaces() 618 if self.i >= self.n or self.tok[self.i].id != tokIDENT: 619 self.throw(BadExpectedToken, self.i,"### expecting identifier in expression, got '%s'" % (id, self.tok[self.i].id)) 620 self.i += 1 621 622 623 def is_decimal(self): 624 v = self.tok[self.i].value[:] 625 while len(v) > 0 and v[-1] in "ULul": 626 v = v[:-1] 627 for digit in v: 628 if not digit.isdigit(): 629 return None 630 631 self.i += 1 632 return ("int", string.atoi(v)) 633 634 635 def is_hexadecimal(self): 636 v = self.tok[self.i].value[:] 637 while len(v) > 0 and v[-1] in "ULul": 638 v = v[:-1] 639 if len(v) > 2 and (v[0:2] == "0x" or v[0:2] == "0X"): 640 for digit in v[2:]: 641 if not digit in "0123456789abcdefABCDEF": 642 return None 643 644 # for a hex expression tuple, the argument 645 # is the value as an integer 646 self.i += 1 647 return ("hex", int(v[2:], 16)) 648 649 return None 650 651 652 def is_integer(self): 653 if self.tok[self.i].id != tokNUMBER: 654 return None 655 656 c = self.is_decimal() 657 if c: return c 658 659 c = self.is_hexadecimal() 660 if c: return c 661 662 return None 663 664 665 def is_number(self): 666 t = self.tok[self.i] 667 if t.id == tokMINUS and self.i+1 < self.n: 668 self.i += 1 669 c = self.is_integer() 670 if c: 671 op, val = c 672 return (op, -val) 673 if t.id == tokPLUS and self.i+1 < self.n: 674 c = self.is_integer() 675 if c: return c 676 677 return self.is_integer() 678 679 680 def is_defined(self): 681 t = self.tok[self.i] 682 if t.id != tokDEFINED: 683 return None 684 685 # we have the defined keyword, check the rest 686 self.i += 1 687 self.skip_spaces() 688 used_parens = 0 689 if self.i < self.n and self.tok[self.i].id == tokLPAREN: 690 used_parens = 1 691 self.i += 1 692 self.skip_spaces() 693 694 if self.i >= self.n: 695 self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name or left paren") 696 697 t = self.tok[self.i] 698 if t.id != tokIDENT: 699 self.throw(CppConstantExpected,i,"### 'defined' must be followed by macro name") 700 701 self.i += 1 702 if used_parens: 703 self.expectId(tokRPAREN) 704 705 return ("defined", t.value) 706 707 708 def is_call_or_ident(self): 709 self.skip_spaces() 710 if self.i >= self.n: 711 return None 712 713 t = self.tok[self.i] 714 if t.id != tokIDENT: 715 return None 716 717 name = t.value 718 719 self.i += 1 720 self.skip_spaces() 721 if self.i >= self.n or self.tok[self.i].id != tokLPAREN: 722 return ("ident", name) 723 724 params = [] 725 depth = 1 726 self.i += 1 727 j = self.i 728 while self.i < self.n: 729 id = self.tok[self.i].id 730 if id == tokLPAREN: 731 depth += 1 732 elif depth == 1 and (id == tokCOMMA or id == tokRPAREN): 733 while j < self.i and self.tok[j].id == tokSPACE: 734 j += 1 735 k = self.i 736 while k > j and self.tok[k-1].id == tokSPACE: 737 k -= 1 738 param = self.tok[j:k] 739 params.append(param) 740 if id == tokRPAREN: 741 break 742 j = self.i+1 743 elif id == tokRPAREN: 744 depth -= 1 745 self.i += 1 746 747 if self.i >= self.n: 748 return None 749 750 self.i += 1 751 return ("call", (name, params)) 752 753 754 # Implements the "precedence climbing" algorithm from http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm. 755 # The "classic" algorithm would be fine if we were using a tool to generate the parser, but we're not. 756 # Dijkstra's "shunting yard" algorithm hasn't been necessary yet. 757 def parseExpression(self, minPrecedence): 758 self.skip_spaces() 759 if self.i >= self.n: 760 return None 761 762 node = self.parsePrimary() 763 while self.token() != None and self.isBinary(self.token()) and self.precedence(self.token()) >= minPrecedence: 764 op = self.token() 765 self.nextToken() 766 rhs = self.parseExpression(self.precedence(op) + 1) 767 node = (op.id, node, rhs) 768 769 return node 770 771 772 def parsePrimary(self): 773 op = self.token() 774 if self.isUnary(op): 775 self.nextToken() 776 return (op.id, self.parseExpression(self.precedence(op))) 777 778 primary = None 779 if op.id == tokLPAREN: 780 self.nextToken() 781 primary = self.parseExpression(0) 782 self.expectId(tokRPAREN) 783 elif op.id == "?": 784 self.nextToken() 785 primary = self.parseExpression(0) 786 self.expectId(":") 787 elif op.id == tokNUMBER: 788 primary = self.is_number() 789 elif op.id == tokIDENT: 790 primary = self.is_call_or_ident() 791 elif op.id == tokDEFINED: 792 primary = self.is_defined() 793 else: 794 self.throw(BadExpectedToken, "didn't expect to see a %s in factor" % (self.tok[self.i].id)) 795 796 self.skip_spaces() 797 798 return primary; 799 800 801 def isBinary(self, token): 802 return token.id in self.binaries 803 804 805 def isUnary(self, token): 806 return token.id in self.unaries 807 808 809 def precedence(self, token): 810 return self.precedences.get(token.id) 811 812 813 def token(self): 814 if self.i >= self.n: 815 return None 816 return self.tok[self.i] 817 818 819 def nextToken(self): 820 self.i += 1 821 self.skip_spaces() 822 if self.i >= self.n: 823 return None 824 return self.tok[self.i] 825 826 827 def dump_node(self, e): 828 op = e[0] 829 line = "(" + op 830 if op == "int": 831 line += " %d)" % e[1] 832 elif op == "hex": 833 line += " 0x%x)" % e[1] 834 elif op == "ident": 835 line += " %s)" % e[1] 836 elif op == "defined": 837 line += " %s)" % e[1] 838 elif op == "call": 839 arg = e[1] 840 line += " %s [" % arg[0] 841 prefix = "" 842 for param in arg[1]: 843 par = "" 844 for tok in param: 845 par += str(tok) 846 line += "%s%s" % (prefix, par) 847 prefix = "," 848 line += "])" 849 elif op in CppExpr.unaries: 850 line += " %s)" % self.dump_node(e[1]) 851 elif op in CppExpr.binaries: 852 line += " %s %s)" % (self.dump_node(e[1]), self.dump_node(e[2])) 853 else: 854 line += " ?%s)" % repr(e[1]) 855 856 return line 857 858 def __repr__(self): 859 return self.dump_node(self.expr) 860 861 def source_node(self, e): 862 op = e[0] 863 if op == "int": 864 return "%d" % e[1] 865 if op == "hex": 866 return "0x%x" % e[1] 867 if op == "ident": 868 # XXX: should try to expand 869 return e[1] 870 if op == "defined": 871 return "defined(%s)" % e[1] 872 873 prec = CppExpr.precedences.get(op,1000) 874 arg = e[1] 875 if op in CppExpr.unaries: 876 arg_src = self.source_node(arg) 877 arg_op = arg[0] 878 arg_prec = CppExpr.precedences.get(arg[0],1000) 879 if arg_prec < prec: 880 return "!(" + arg_src + ")" 881 else: 882 return "!" + arg_src 883 if op in CppExpr.binaries: 884 arg2 = e[2] 885 arg1_op = arg[0] 886 arg2_op = arg2[0] 887 arg1_src = self.source_node(arg) 888 arg2_src = self.source_node(arg2) 889 if CppExpr.precedences.get(arg1_op,1000) < prec: 890 arg1_src = "(%s)" % arg1_src 891 if CppExpr.precedences.get(arg2_op,1000) < prec: 892 arg2_src = "(%s)" % arg2_src 893 894 return "%s %s %s" % (arg1_src, op, arg2_src) 895 return "???" 896 897 def __str__(self): 898 return self.source_node(self.expr) 899 900 def int_node(self,e): 901 if e[0] == "int": 902 return e[1] 903 elif e[1] == "hex": 904 return int(e[1],16) 905 else: 906 return None 907 908 def toInt(self): 909 return self.int_node(self.expr) 910 911 def optimize_node(self, e, macros={}): 912 op = e[0] 913 if op == "defined": 914 op, name = e 915 if macros.has_key(name): 916 if macros[name] == kCppUndefinedMacro: 917 return ("int", 0) 918 else: 919 try: 920 value = int(macros[name]) 921 return ("int", value) 922 except: 923 return ("defined", macros[name]) 924 925 if kernel_remove_config_macros and name.startswith("CONFIG_"): 926 return ("int", 0) 927 928 return e 929 930 elif op == "ident": 931 op, name = e 932 if macros.has_key(name): 933 try: 934 value = int(macros[name]) 935 expanded = ("int", value) 936 except: 937 expanded = ("ident", macros[name]) 938 return self.optimize_node(expanded, macros) 939 return e 940 941 elif op == "!": 942 op, v = e 943 v = self.optimize_node(v, macros) 944 if v[0] == "int": 945 if v[1] == 0: 946 return ("int", 1) 947 else: 948 return ("int", 0) 949 return ('!', v) 950 951 elif op == "&&": 952 op, l, r = e 953 l = self.optimize_node(l, macros) 954 r = self.optimize_node(r, macros) 955 li = self.int_node(l) 956 ri = self.int_node(r) 957 if li != None: 958 if li == 0: 959 return ("int", 0) 960 else: 961 return r 962 elif ri != None: 963 if ri == 0: 964 return ("int", 0) 965 else: 966 return l 967 return (op, l, r) 968 969 elif op == "||": 970 op, l, r = e 971 l = self.optimize_node(l, macros) 972 r = self.optimize_node(r, macros) 973 li = self.int_node(l) 974 ri = self.int_node(r) 975 if li != None: 976 if li == 0: 977 return r 978 else: 979 return ("int", 1) 980 elif ri != None: 981 if ri == 0: 982 return l 983 else: 984 return ("int", 1) 985 return (op, l, r) 986 987 else: 988 return e 989 990 def optimize(self,macros={}): 991 self.expr = self.optimize_node(self.expr, macros) 992 993 def is_equal_node(self,e1,e2): 994 if e1[0] != e2[0] or len(e1) != len(e2): 995 return False 996 997 op = e1[0] 998 if op == "int" or op == "hex" or op == "!" or op == "defined": 999 return e1[0] == e2[0] 1000 1001 return self.is_equal_node(e1[1],e2[1]) and self.is_equal_node(e1[2],e2[2]) 1002 1003 def is_equal(self,other): 1004 return self.is_equal_node(self.expr,other.expr) 1005 1006def test_cpp_expr(expr, expected): 1007 e = CppExpr( CppLineTokenizer( expr ).toTokenList() ) 1008 s1 = repr(e) 1009 if s1 != expected: 1010 print "[FAIL]: expression '%s' generates '%s', should be '%s'" % (expr, s1, expected) 1011 global failure_count 1012 failure_count += 1 1013 1014def test_cpp_expr_optim(expr, expected, macros={}): 1015 e = CppExpr( CppLineTokenizer( expr ).toTokenList() ) 1016 e.optimize(macros) 1017 s1 = repr(e) 1018 if s1 != expected: 1019 print "[FAIL]: optimized expression '%s' generates '%s' with macros %s, should be '%s'" % (expr, s1, macros, expected) 1020 global failure_count 1021 failure_count += 1 1022 1023def test_cpp_expr_source(expr, expected): 1024 e = CppExpr( CppLineTokenizer( expr ).toTokenList() ) 1025 s1 = str(e) 1026 if s1 != expected: 1027 print "[FAIL]: source expression '%s' generates '%s', should be '%s'" % (expr, s1, expected) 1028 global failure_count 1029 failure_count += 1 1030 1031def test_CppExpr(): 1032 test_cpp_expr("0", "(int 0)") 1033 test_cpp_expr("1", "(int 1)") 1034 test_cpp_expr("(0)", "(int 0)") 1035 test_cpp_expr("1 && 1", "(&& (int 1) (int 1))") 1036 test_cpp_expr("1 && 0", "(&& (int 1) (int 0))") 1037 test_cpp_expr("EXAMPLE", "(ident EXAMPLE)") 1038 test_cpp_expr("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))") 1039 test_cpp_expr("defined(EXAMPLE)", "(defined EXAMPLE)") 1040 test_cpp_expr("defined ( EXAMPLE ) ", "(defined EXAMPLE)") 1041 test_cpp_expr("!defined(EXAMPLE)", "(! (defined EXAMPLE))") 1042 test_cpp_expr("defined(ABC) || defined(BINGO)", "(|| (defined ABC) (defined BINGO))") 1043 test_cpp_expr("FOO(BAR)", "(call FOO [BAR])") 1044 test_cpp_expr("A == 1 || defined(B)", "(|| (== (ident A) (int 1)) (defined B))") 1045 1046 test_cpp_expr_optim("0", "(int 0)") 1047 test_cpp_expr_optim("1", "(int 1)") 1048 test_cpp_expr_optim("1 && 1", "(int 1)") 1049 test_cpp_expr_optim("1 && 0", "(int 0)") 1050 test_cpp_expr_optim("0 && 1", "(int 0)") 1051 test_cpp_expr_optim("0 && 0", "(int 0)") 1052 test_cpp_expr_optim("1 || 1", "(int 1)") 1053 test_cpp_expr_optim("1 || 0", "(int 1)") 1054 test_cpp_expr_optim("0 || 1", "(int 1)") 1055 test_cpp_expr_optim("0 || 0", "(int 0)") 1056 test_cpp_expr_optim("A", "(ident A)") 1057 test_cpp_expr_optim("A", "(int 1)", { "A": 1 }) 1058 test_cpp_expr_optim("A || B", "(int 1)", { "A": 1 }) 1059 test_cpp_expr_optim("A || B", "(int 1)", { "B": 1 }) 1060 test_cpp_expr_optim("A && B", "(ident B)", { "A": 1 }) 1061 test_cpp_expr_optim("A && B", "(ident A)", { "B": 1 }) 1062 test_cpp_expr_optim("A && B", "(&& (ident A) (ident B))") 1063 test_cpp_expr_optim("EXAMPLE", "(ident EXAMPLE)") 1064 test_cpp_expr_optim("EXAMPLE - 3", "(- (ident EXAMPLE) (int 3))") 1065 test_cpp_expr_optim("defined(EXAMPLE)", "(defined EXAMPLE)") 1066 test_cpp_expr_optim("defined(EXAMPLE)", "(defined XOWOE)", { "EXAMPLE": "XOWOE" }) 1067 test_cpp_expr_optim("defined(EXAMPLE)", "(int 0)", { "EXAMPLE": kCppUndefinedMacro}) 1068 test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined EXAMPLE))") 1069 test_cpp_expr_optim("!defined(EXAMPLE)", "(! (defined XOWOE))", { "EXAMPLE" : "XOWOE" }) 1070 test_cpp_expr_optim("!defined(EXAMPLE)", "(int 1)", { "EXAMPLE" : kCppUndefinedMacro }) 1071 test_cpp_expr_optim("defined(A) || defined(B)", "(|| (defined A) (defined B))") 1072 test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", { "A" : "1" }) 1073 test_cpp_expr_optim("defined(A) || defined(B)", "(int 1)", { "B" : "1" }) 1074 test_cpp_expr_optim("defined(A) || defined(B)", "(defined A)", { "B" : kCppUndefinedMacro }) 1075 test_cpp_expr_optim("defined(A) || defined(B)", "(int 0)", { "A" : kCppUndefinedMacro, "B" : kCppUndefinedMacro }) 1076 test_cpp_expr_optim("defined(A) && defined(B)", "(&& (defined A) (defined B))") 1077 test_cpp_expr_optim("defined(A) && defined(B)", "(defined B)", { "A" : "1" }) 1078 test_cpp_expr_optim("defined(A) && defined(B)", "(defined A)", { "B" : "1" }) 1079 test_cpp_expr_optim("defined(A) && defined(B)", "(int 0)", { "B" : kCppUndefinedMacro }) 1080 test_cpp_expr_optim("defined(A) && defined(B)", "(int 0)", { "A" : kCppUndefinedMacro }) 1081 test_cpp_expr_optim("A == 1 || defined(B)", "(|| (== (ident A) (int 1)) (defined B))" ) 1082 test_cpp_expr_optim("defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)", "(|| (! (defined __GLIBC__)) (< (ident __GLIBC__) (int 2)))", { "__KERNEL__": kCppUndefinedMacro }) 1083 1084 test_cpp_expr_source("0", "0") 1085 test_cpp_expr_source("1", "1") 1086 test_cpp_expr_source("1 && 1", "1 && 1") 1087 test_cpp_expr_source("1 && 0", "1 && 0") 1088 test_cpp_expr_source("0 && 1", "0 && 1") 1089 test_cpp_expr_source("0 && 0", "0 && 0") 1090 test_cpp_expr_source("1 || 1", "1 || 1") 1091 test_cpp_expr_source("1 || 0", "1 || 0") 1092 test_cpp_expr_source("0 || 1", "0 || 1") 1093 test_cpp_expr_source("0 || 0", "0 || 0") 1094 test_cpp_expr_source("EXAMPLE", "EXAMPLE") 1095 test_cpp_expr_source("EXAMPLE - 3", "EXAMPLE - 3") 1096 test_cpp_expr_source("defined(EXAMPLE)", "defined(EXAMPLE)") 1097 test_cpp_expr_source("defined EXAMPLE", "defined(EXAMPLE)") 1098 test_cpp_expr_source("A == 1 || defined(B)", "A == 1 || defined(B)") 1099 1100 1101##################################################################################### 1102##################################################################################### 1103##### ##### 1104##### C P P B L O C K ##### 1105##### ##### 1106##################################################################################### 1107##################################################################################### 1108 1109class Block: 1110 """a class used to model a block of input source text. there are two block types: 1111 - directive blocks: contain the tokens of a single pre-processor directive (e.g. #if) 1112 - text blocks, contain the tokens of non-directive blocks 1113 1114 the cpp parser class below will transform an input source file into a list of Block 1115 objects (grouped in a BlockList object for convenience)""" 1116 1117 def __init__(self,tokens,directive=None,lineno=0): 1118 """initialize a new block, if 'directive' is None, this is a text block 1119 NOTE: this automatically converts '#ifdef MACRO' into '#if defined(MACRO)' 1120 and '#ifndef MACRO' into '#if !defined(MACRO)'""" 1121 if directive == "ifdef": 1122 tok = Token() 1123 tok.set(tokDEFINED) 1124 tokens = [ tok ] + tokens 1125 directive = "if" 1126 1127 elif directive == "ifndef": 1128 tok1 = Token() 1129 tok2 = Token() 1130 tok1.set(tokNOT) 1131 tok2.set(tokDEFINED) 1132 tokens = [ tok1, tok2 ] + tokens 1133 directive = "if" 1134 1135 self.tokens = tokens 1136 self.directive = directive 1137 if lineno > 0: 1138 self.lineno = lineno 1139 else: 1140 self.lineno = self.tokens[0].lineno 1141 1142 if self.isIf(): 1143 self.expr = CppExpr( self.tokens ) 1144 1145 def isDirective(self): 1146 """returns True iff this is a directive block""" 1147 return self.directive != None 1148 1149 def isConditional(self): 1150 """returns True iff this is a conditional directive block""" 1151 return self.directive in ["if","ifdef","ifndef","else","elif","endif"] 1152 1153 def isDefine(self): 1154 """returns the macro name in a #define directive, or None otherwise""" 1155 if self.directive != "define": 1156 return None 1157 1158 return self.tokens[0].value 1159 1160 def isIf(self): 1161 """returns True iff this is an #if-like directive block""" 1162 return self.directive in ["if","ifdef","ifndef","elif"] 1163 1164 def isInclude(self): 1165 """checks whether this is a #include directive. if true, then returns the 1166 corresponding file name (with brackets or double-qoutes). None otherwise""" 1167 if self.directive != "include": 1168 return None 1169 1170 if self.tokens[0].id == tokSTRING: 1171 # a double-quote include, that's easy 1172 return self.tokens[0].value 1173 1174 # we only want the bracket part, not any comments or junk after it 1175 if self.tokens[0].id == "<": 1176 i = 0 1177 tok = self.tokens 1178 n = len(tok) 1179 while i < n and tok[i].id != ">": 1180 i += 1 1181 1182 if i >= n: 1183 return None 1184 1185 return string.join([ str(x) for x in tok[:i+1] ],"") 1186 1187 else: 1188 return None 1189 1190 def removeWhiteSpace(self): 1191 # Remove trailing whitespace and empty lines 1192 # All whitespace is also contracted to a single space 1193 if self.directive != None: 1194 return 1195 1196 tokens = [] 1197 line = 0 # index of line start 1198 space = -1 # index of first space, or -1 1199 ii = 0 1200 nn = len(self.tokens) 1201 while ii < nn: 1202 tok = self.tokens[ii] 1203 1204 # If we find a space, record its position if this is the first 1205 # one the line start or the previous character. Don't append 1206 # anything to tokens array yet though. 1207 if tok.id == tokSPACE: 1208 if space < 0: 1209 space = ii 1210 ii += 1 1211 continue 1212 1213 # If this is a line space, ignore the spaces we found previously 1214 # on the line, and remove empty lines. 1215 if tok.id == tokLN: 1216 old_line = line 1217 old_space = space 1218 ii += 1 1219 line = ii 1220 space = -1 1221 if old_space == old_line: # line only contains spaces 1222 continue 1223 if ii-1 == old_line: # line is empty 1224 continue 1225 tokens.append(tok) 1226 continue 1227 1228 # Other token, append any space range if any, converting each 1229 # one to a single space character, then append the token. 1230 if space >= 0: 1231 jj = space 1232 space = -1 1233 while jj < ii: 1234 tok2 = self.tokens[jj] 1235 tok2.value = " " 1236 tokens.append(tok2) 1237 jj += 1 1238 1239 tokens.append(tok) 1240 ii += 1 1241 1242 self.tokens = tokens 1243 1244 def writeWithWarning(self,out,warning,left_count,repeat_count): 1245 # removeWhiteSpace() will sometimes creates non-directive blocks 1246 # without any tokens. These come from blocks that only contained 1247 # empty lines and spaces. They should not be printed in the final 1248 # output, and then should not be counted for this operation. 1249 # 1250 if not self.directive and self.tokens == []: 1251 return left_count 1252 1253 if self.directive: 1254 out.write(str(self).rstrip() + "\n") 1255 left_count -= 1 1256 if left_count == 0: 1257 out.write(warning) 1258 left_count = repeat_count 1259 1260 else: 1261 for tok in self.tokens: 1262 out.write(str(tok)) 1263 if tok.id == tokLN: 1264 left_count -= 1 1265 if left_count == 0: 1266 out.write(warning) 1267 left_count = repeat_count 1268 1269 return left_count 1270 1271 1272 def __repr__(self): 1273 """generate the representation of a given block""" 1274 if self.directive: 1275 result = "#%s " % self.directive 1276 if self.isIf(): 1277 result += repr(self.expr) 1278 else: 1279 for tok in self.tokens: 1280 result += repr(tok) 1281 else: 1282 result = "" 1283 for tok in self.tokens: 1284 result += repr(tok) 1285 1286 return result 1287 1288 def __str__(self): 1289 """generate the string representation of a given block""" 1290 if self.directive: 1291 if self.directive == "if": 1292 # small optimization to re-generate #ifdef and #ifndef 1293 e = self.expr.expr 1294 op = e[0] 1295 if op == "defined": 1296 result = "#ifdef %s" % e[1] 1297 elif op == "!" and e[1][0] == "defined": 1298 result = "#ifndef %s" % e[1][1] 1299 else: 1300 result = "#if " + str(self.expr) 1301 else: 1302 result = "#%s" % self.directive 1303 if len(self.tokens): 1304 result += " " 1305 for tok in self.tokens: 1306 result += str(tok) 1307 else: 1308 result = "" 1309 for tok in self.tokens: 1310 result += str(tok) 1311 1312 return result 1313 1314class BlockList: 1315 """a convenience class used to hold and process a list of blocks returned by 1316 the cpp parser""" 1317 def __init__(self,blocks): 1318 self.blocks = blocks 1319 1320 def __len__(self): 1321 return len(self.blocks) 1322 1323 def __getitem__(self,n): 1324 return self.blocks[n] 1325 1326 def __repr__(self): 1327 return repr(self.blocks) 1328 1329 def __str__(self): 1330 result = "" 1331 for b in self.blocks: 1332 result += str(b) 1333 if b.isDirective(): 1334 result = result.rstrip() + '\n' 1335 return result 1336 1337 def optimizeIf01(self): 1338 """remove the code between #if 0 .. #endif in a BlockList""" 1339 self.blocks = optimize_if01(self.blocks) 1340 1341 def optimizeMacros(self, macros): 1342 """remove known defined and undefined macros from a BlockList""" 1343 for b in self.blocks: 1344 if b.isIf(): 1345 b.expr.optimize(macros) 1346 1347 def removeMacroDefines(self,macros): 1348 """remove known macro definitions from a BlockList""" 1349 self.blocks = remove_macro_defines(self.blocks,macros) 1350 1351 def removeWhiteSpace(self): 1352 for b in self.blocks: 1353 b.removeWhiteSpace() 1354 1355 def optimizeAll(self,macros): 1356 self.optimizeMacros(macros) 1357 self.optimizeIf01() 1358 return 1359 1360 def findIncludes(self): 1361 """return the list of included files in a BlockList""" 1362 result = [] 1363 for b in self.blocks: 1364 i = b.isInclude() 1365 if i: 1366 result.append(i) 1367 1368 return result 1369 1370 1371 def write(self,out): 1372 out.write(str(self)) 1373 1374 def writeWithWarning(self,out,warning,repeat_count): 1375 left_count = repeat_count 1376 for b in self.blocks: 1377 left_count = b.writeWithWarning(out,warning,left_count,repeat_count) 1378 1379 def removeComments(self): 1380 for b in self.blocks: 1381 for tok in b.tokens: 1382 if tok.id == tokSPACE: 1383 tok.value = " " 1384 1385 def removeVarsAndFuncs(self,knownStatics=set()): 1386 """remove all extern and static declarations corresponding 1387 to variable and function declarations. we only accept typedefs 1388 and enum/structs/union declarations. 1389 1390 however, we keep the definitions corresponding to the set 1391 of known static inline functions in the set 'knownStatics', 1392 which is useful for optimized byteorder swap functions and 1393 stuff like that. 1394 """ 1395 # state = 0 => normal (i.e. LN + spaces) 1396 # state = 1 => typedef/struct encountered, ends with ";" 1397 # state = 2 => var declaration encountered, ends with ";" 1398 # state = 3 => func declaration encountered, ends with "}" 1399 state = 0 1400 depth = 0 1401 blocks2 = [] 1402 skipTokens = False 1403 for b in self.blocks: 1404 if b.isDirective(): 1405 blocks2.append(b) 1406 else: 1407 n = len(b.tokens) 1408 i = 0 1409 if skipTokens: 1410 first = n 1411 else: 1412 first = 0 1413 while i < n: 1414 tok = b.tokens[i] 1415 tokid = tok.id 1416 # If we are not looking for the start of a new 1417 # type/var/func, then skip over tokens until 1418 # we find our terminator, managing the depth of 1419 # accolades as we go. 1420 if state > 0: 1421 terminator = False 1422 if tokid == '{': 1423 depth += 1 1424 elif tokid == '}': 1425 if depth > 0: 1426 depth -= 1 1427 if (depth == 0) and (state == 3): 1428 terminator = True 1429 elif tokid == ';' and depth == 0: 1430 terminator = True 1431 1432 if terminator: 1433 # we found the terminator 1434 state = 0 1435 if skipTokens: 1436 skipTokens = False 1437 first = i+1 1438 1439 i = i+1 1440 continue 1441 1442 # We are looking for the start of a new type/func/var 1443 # ignore whitespace 1444 if tokid in [tokLN, tokSPACE]: 1445 i = i+1 1446 continue 1447 1448 # Is it a new type definition, then start recording it 1449 if tok.value in [ 'struct', 'typedef', 'enum', 'union', '__extension__' ]: 1450 state = 1 1451 i = i+1 1452 continue 1453 1454 # Is it a variable or function definition. If so, first 1455 # try to determine which type it is, and also extract 1456 # its name. 1457 # 1458 # We're going to parse the next tokens of the same block 1459 # until we find a semi-column or a left parenthesis. 1460 # 1461 # The semi-column corresponds to a variable definition, 1462 # the left-parenthesis to a function definition. 1463 # 1464 # We also assume that the var/func name is the last 1465 # identifier before the terminator. 1466 # 1467 j = i+1 1468 ident = "" 1469 while j < n: 1470 tokid = b.tokens[j].id 1471 if tokid == '(': # a function declaration 1472 state = 3 1473 break 1474 elif tokid == ';': # a variable declaration 1475 state = 2 1476 break 1477 if tokid == tokIDENT: 1478 ident = b.tokens[j].value 1479 j += 1 1480 1481 if j >= n: 1482 # This can only happen when the declaration 1483 # does not end on the current block (e.g. with 1484 # a directive mixed inside it. 1485 # 1486 # We will treat it as malformed because 1487 # it's very hard to recover from this case 1488 # without making our parser much more 1489 # complex. 1490 # 1491 #print "### skip unterminated static '%s'" % ident 1492 break 1493 1494 if ident in knownStatics: 1495 #print "### keep var/func '%s': %s" % (ident,repr(b.tokens[i:j])) 1496 pass 1497 else: 1498 # We're going to skip the tokens for this declaration 1499 #print "### skip variable /func'%s': %s" % (ident,repr(b.tokens[i:j])) 1500 if i > first: 1501 blocks2.append( Block(b.tokens[first:i])) 1502 skipTokens = True 1503 first = n 1504 1505 i = i+1 1506 1507 if i > first: 1508 #print "### final '%s'" % repr(b.tokens[first:i]) 1509 blocks2.append( Block(b.tokens[first:i]) ) 1510 1511 self.blocks = blocks2 1512 1513 def insertDisclaimer(self,disclaimer="/* auto-generated file, DO NOT EDIT */"): 1514 """insert your standard issue disclaimer that this is an 1515 auto-generated file, etc..""" 1516 tokens = CppLineTokenizer( disclaimer ).toTokenList() 1517 tokens = tokens[:-1] # remove trailing tokLN 1518 self.blocks = [ Block(tokens) ] + self.blocks 1519 1520 def replaceTokens(self,replacements): 1521 """replace tokens according to the given dict""" 1522 for b in self.blocks: 1523 made_change = False 1524 if b.isInclude() == None: 1525 for tok in b.tokens: 1526 if tok.id == tokIDENT: 1527 if tok.value in replacements: 1528 tok.value = replacements[tok.value] 1529 made_change = True 1530 1531 if made_change and b.isIf(): 1532 # Keep 'expr' in sync with 'tokens'. 1533 b.expr = CppExpr(b.tokens) 1534 1535class BlockParser: 1536 """a class used to convert an input source file into a BlockList object""" 1537 1538 def __init__(self,tokzer=None): 1539 """initialize a block parser. the input source is provided through a Tokenizer 1540 object""" 1541 self.reset(tokzer) 1542 1543 def reset(self,tokzer): 1544 self.state = 1 1545 self.tokzer = tokzer 1546 1547 def getBlocks(self,tokzer=None): 1548 """tokenize and parse the input source, return a BlockList object 1549 NOTE: empty and line-numbering directives are ignored and removed 1550 from the result. as a consequence, it is possible to have 1551 two successive text blocks in the result""" 1552 # state 0 => in source code 1553 # state 1 => in source code, after a LN 1554 # state 2 => in source code, after LN then some space 1555 state = 1 1556 lastLN = 0 1557 current = [] 1558 blocks = [] 1559 1560 if tokzer == None: 1561 tokzer = self.tokzer 1562 1563 while 1: 1564 tok = tokzer.getToken() 1565 if tok.id == tokEOF: 1566 break 1567 1568 if tok.id == tokLN: 1569 state = 1 1570 current.append(tok) 1571 lastLN = len(current) 1572 1573 elif tok.id == tokSPACE: 1574 if state == 1: 1575 state = 2 1576 current.append(tok) 1577 1578 elif tok.id == "#": 1579 if state > 0: 1580 # this is the start of a directive 1581 1582 if lastLN > 0: 1583 # record previous tokens as text block 1584 block = Block(current[:lastLN]) 1585 blocks.append(block) 1586 lastLN = 0 1587 1588 current = [] 1589 1590 # skip spaces after the # 1591 while 1: 1592 tok = tokzer.getToken() 1593 if tok.id != tokSPACE: 1594 break 1595 1596 if tok.id != tokIDENT: 1597 # empty or line-numbering, ignore it 1598 if tok.id != tokLN and tok.id != tokEOF: 1599 while 1: 1600 tok = tokzer.getToken() 1601 if tok.id == tokLN or tok.id == tokEOF: 1602 break 1603 continue 1604 1605 directive = tok.value 1606 lineno = tok.lineno 1607 1608 # skip spaces 1609 tok = tokzer.getToken() 1610 while tok.id == tokSPACE: 1611 tok = tokzer.getToken() 1612 1613 # then record tokens until LN 1614 dirtokens = [] 1615 while tok.id != tokLN and tok.id != tokEOF: 1616 dirtokens.append(tok) 1617 tok = tokzer.getToken() 1618 1619 block = Block(dirtokens,directive,lineno) 1620 blocks.append(block) 1621 state = 1 1622 1623 else: 1624 state = 0 1625 current.append(tok) 1626 1627 if len(current) > 0: 1628 block = Block(current) 1629 blocks.append(block) 1630 1631 return BlockList(blocks) 1632 1633 def parse(self,tokzer): 1634 return self.getBlocks( tokzer ) 1635 1636 def parseLines(self,lines): 1637 """parse a list of text lines into a BlockList object""" 1638 return self.getBlocks( CppLinesTokenizer(lines) ) 1639 1640 def parseFile(self,path): 1641 """parse a file into a BlockList object""" 1642 file = open(path, "rt") 1643 result = self.getBlocks( CppFileTokenizer(file) ) 1644 file.close() 1645 return result 1646 1647 1648def test_block_parsing(lines,expected): 1649 blocks = BlockParser().parse( CppLinesTokenizer(lines) ) 1650 if len(blocks) != len(expected): 1651 raise BadExpectedToken, "parser.buildBlocks returned '%s' expecting '%s'" \ 1652 % (str(blocks), repr(expected)) 1653 for n in range(len(blocks)): 1654 if str(blocks[n]) != expected[n]: 1655 raise BadExpectedToken, "parser.buildBlocks()[%d] is '%s', expecting '%s'" \ 1656 % (n, str(blocks[n]), expected[n]) 1657 #for block in blocks: 1658 # print block 1659 1660def test_BlockParser(): 1661 test_block_parsing(["#error hello"],["#error hello"]) 1662 test_block_parsing([ "foo", "", "bar" ], [ "foo\n\nbar\n" ]) 1663 test_block_parsing([ "foo", " # ", "bar" ], [ "foo\n","bar\n" ]) 1664 test_block_parsing(\ 1665 [ "foo", " # ", " # /* ahah */ if defined(__KERNEL__) ", "bar", "#endif" ], 1666 [ "foo\n", "#ifdef __KERNEL__", "bar\n", "#endif" ] ) 1667 1668 1669##################################################################################### 1670##################################################################################### 1671##### ##### 1672##### B L O C K L I S T O P T I M I Z A T I O N ##### 1673##### ##### 1674##################################################################################### 1675##################################################################################### 1676 1677def remove_macro_defines( blocks, excludedMacros=set() ): 1678 """remove macro definitions like #define <macroName> ....""" 1679 result = [] 1680 for b in blocks: 1681 macroName = b.isDefine() 1682 if macroName == None or not macroName in excludedMacros: 1683 result.append(b) 1684 1685 return result 1686 1687def find_matching_endif( blocks, i ): 1688 n = len(blocks) 1689 depth = 1 1690 while i < n: 1691 if blocks[i].isDirective(): 1692 dir = blocks[i].directive 1693 if dir in [ "if", "ifndef", "ifdef" ]: 1694 depth += 1 1695 elif depth == 1 and dir in [ "else", "elif" ]: 1696 return i 1697 elif dir == "endif": 1698 depth -= 1 1699 if depth == 0: 1700 return i 1701 i += 1 1702 return i 1703 1704def optimize_if01( blocks ): 1705 """remove the code between #if 0 .. #endif in a list of CppBlocks""" 1706 i = 0 1707 n = len(blocks) 1708 result = [] 1709 while i < n: 1710 j = i 1711 while j < n and not blocks[j].isIf(): 1712 j += 1 1713 if j > i: 1714 D2("appending lines %d to %d" % (blocks[i].lineno, blocks[j-1].lineno)) 1715 result += blocks[i:j] 1716 if j >= n: 1717 break 1718 expr = blocks[j].expr 1719 r = expr.toInt() 1720 if r == None: 1721 result.append(blocks[j]) 1722 i = j + 1 1723 continue 1724 1725 if r == 0: 1726 # if 0 => skip everything until the corresponding #endif 1727 j = find_matching_endif( blocks, j+1 ) 1728 if j >= n: 1729 # unterminated #if 0, finish here 1730 break 1731 dir = blocks[j].directive 1732 if dir == "endif": 1733 D2("remove 'if 0' .. 'endif' (lines %d to %d)" % (blocks[i].lineno, blocks[j].lineno)) 1734 i = j + 1 1735 elif dir == "else": 1736 # convert 'else' into 'if 1' 1737 D2("convert 'if 0' .. 'else' into 'if 1' (lines %d to %d)" % (blocks[i].lineno, blocks[j-1].lineno)) 1738 blocks[j].directive = "if" 1739 blocks[j].expr = CppExpr( CppLineTokenizer("1").toTokenList() ) 1740 i = j 1741 elif dir == "elif": 1742 # convert 'elif' into 'if' 1743 D2("convert 'if 0' .. 'elif' into 'if'") 1744 blocks[j].directive = "if" 1745 i = j 1746 continue 1747 1748 # if 1 => find corresponding endif and remove/transform them 1749 k = find_matching_endif( blocks, j+1 ) 1750 if k >= n: 1751 # unterminated #if 1, finish here 1752 D2("unterminated 'if 1'") 1753 result += blocks[j+1:k] 1754 break 1755 1756 dir = blocks[k].directive 1757 if dir == "endif": 1758 D2("convert 'if 1' .. 'endif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno)) 1759 result += optimize_if01(blocks[j+1:k]) 1760 i = k+1 1761 elif dir == "else": 1762 # convert 'else' into 'if 0' 1763 D2("convert 'if 1' .. 'else' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno)) 1764 result += optimize_if01(blocks[j+1:k]) 1765 blocks[k].directive = "if" 1766 blocks[k].expr = CppExpr( CppLineTokenizer("0").toTokenList() ) 1767 i = k 1768 elif dir == "elif": 1769 # convert 'elif' into 'if 0' 1770 D2("convert 'if 1' .. 'elif' (lines %d to %d)" % (blocks[j].lineno, blocks[k].lineno)) 1771 result += optimize_if01(blocks[j+1:k]) 1772 blocks[k].expr = CppExpr( CppLineTokenizer("0").toTokenList() ) 1773 i = k 1774 return result 1775 1776def test_optimizeAll(): 1777 text = """\ 1778#if 1 1779#define GOOD_1 1780#endif 1781#if 0 1782#define BAD_2 1783#define BAD_3 1784#endif 1785 1786#if 1 1787#define GOOD_2 1788#else 1789#define BAD_4 1790#endif 1791 1792#if 0 1793#define BAD_5 1794#else 1795#define GOOD_3 1796#endif 1797 1798#if defined(__KERNEL__) 1799#define BAD_KERNEL 1800#endif 1801 1802#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) 1803#define X 1804#endif 1805 1806#ifndef SIGRTMAX 1807#define SIGRTMAX 123 1808#endif /* SIGRTMAX */ 1809 1810#if 0 1811#if 1 1812#define BAD_6 1813#endif 1814#endif\ 1815""" 1816 1817 expected = """\ 1818#define GOOD_1 1819 1820#define GOOD_2 1821 1822#define GOOD_3 1823 1824 1825#if !defined(__GLIBC__) || __GLIBC__ < 2 1826#define X 1827#endif 1828 1829#ifndef __SIGRTMAX 1830#define __SIGRTMAX 123 1831#endif 1832 1833""" 1834 1835 out = StringOutput() 1836 lines = string.split(text, '\n') 1837 list = BlockParser().parse( CppLinesTokenizer(lines) ) 1838 #D_setlevel(2) 1839 list.replaceTokens( kernel_token_replacements ) 1840 list.optimizeAll( {"__KERNEL__":kCppUndefinedMacro} ) 1841 list.write(out) 1842 if out.get() != expected: 1843 print "[FAIL]: macro optimization failed\n" 1844 print "<<<< expecting '", 1845 print expected, 1846 print "'\n>>>> result '" 1847 print out.get(), 1848 print "'\n----" 1849 global failure_count 1850 failure_count += 1 1851 1852 1853# -- Always run the unit tests. 1854 1855def runUnitTests(): 1856 """run all unit tests for this program""" 1857 test_CppTokenizer() 1858 test_CppExpr() 1859 test_optimizeAll() 1860 test_BlockParser() 1861 1862failure_count = 0 1863runUnitTests() 1864if failure_count != 0: 1865 sys.exit(1) 1866