1#------------------------------------------------------------------------------ 2# pycparser: c_parser.py 3# 4# CParser class: Parser and AST builder for the C language 5# 6# Eli Bendersky [https://eli.thegreenplace.net/] 7# License: BSD 8#------------------------------------------------------------------------------ 9import re 10 11from .ply import yacc 12 13from . import c_ast 14from .c_lexer import CLexer 15from .plyparser import PLYParser, Coord, ParseError, parameterized, template 16from .ast_transforms import fix_switch_cases 17 18 19@template 20class CParser(PLYParser): 21 def __init__( 22 self, 23 lex_optimize=True, 24 lexer=CLexer, 25 lextab='pycparser.lextab', 26 yacc_optimize=True, 27 yacctab='pycparser.yacctab', 28 yacc_debug=False, 29 taboutputdir=''): 30 """ Create a new CParser. 31 32 Some arguments for controlling the debug/optimization 33 level of the parser are provided. The defaults are 34 tuned for release/performance mode. 35 The simple rules for using them are: 36 *) When tweaking CParser/CLexer, set these to False 37 *) When releasing a stable parser, set to True 38 39 lex_optimize: 40 Set to False when you're modifying the lexer. 41 Otherwise, changes in the lexer won't be used, if 42 some lextab.py file exists. 43 When releasing with a stable lexer, set to True 44 to save the re-generation of the lexer table on 45 each run. 46 47 lexer: 48 Set this parameter to define the lexer to use if 49 you're not using the default CLexer. 50 51 lextab: 52 Points to the lex table that's used for optimized 53 mode. Only if you're modifying the lexer and want 54 some tests to avoid re-generating the table, make 55 this point to a local lex table file (that's been 56 earlier generated with lex_optimize=True) 57 58 yacc_optimize: 59 Set to False when you're modifying the parser. 60 Otherwise, changes in the parser won't be used, if 61 some parsetab.py file exists. 62 When releasing with a stable parser, set to True 63 to save the re-generation of the parser table on 64 each run. 65 66 yacctab: 67 Points to the yacc table that's used for optimized 68 mode. Only if you're modifying the parser, make 69 this point to a local yacc table file 70 71 yacc_debug: 72 Generate a parser.out file that explains how yacc 73 built the parsing table from the grammar. 74 75 taboutputdir: 76 Set this parameter to control the location of generated 77 lextab and yacctab files. 78 """ 79 self.clex = lexer( 80 error_func=self._lex_error_func, 81 on_lbrace_func=self._lex_on_lbrace_func, 82 on_rbrace_func=self._lex_on_rbrace_func, 83 type_lookup_func=self._lex_type_lookup_func) 84 85 self.clex.build( 86 optimize=lex_optimize, 87 lextab=lextab, 88 outputdir=taboutputdir) 89 self.tokens = self.clex.tokens 90 91 rules_with_opt = [ 92 'abstract_declarator', 93 'assignment_expression', 94 'declaration_list', 95 'declaration_specifiers_no_type', 96 'designation', 97 'expression', 98 'identifier_list', 99 'init_declarator_list', 100 'id_init_declarator_list', 101 'initializer_list', 102 'parameter_type_list', 103 'block_item_list', 104 'type_qualifier_list', 105 'struct_declarator_list' 106 ] 107 108 for rule in rules_with_opt: 109 self._create_opt_rule(rule) 110 111 self.cparser = yacc.yacc( 112 module=self, 113 start='translation_unit_or_empty', 114 debug=yacc_debug, 115 optimize=yacc_optimize, 116 tabmodule=yacctab, 117 outputdir=taboutputdir) 118 119 # Stack of scopes for keeping track of symbols. _scope_stack[-1] is 120 # the current (topmost) scope. Each scope is a dictionary that 121 # specifies whether a name is a type. If _scope_stack[n][name] is 122 # True, 'name' is currently a type in the scope. If it's False, 123 # 'name' is used in the scope but not as a type (for instance, if we 124 # saw: int name; 125 # If 'name' is not a key in _scope_stack[n] then 'name' was not defined 126 # in this scope at all. 127 self._scope_stack = [dict()] 128 129 # Keeps track of the last token given to yacc (the lookahead token) 130 self._last_yielded_token = None 131 132 def parse(self, text, filename='', debuglevel=0): 133 """ Parses C code and returns an AST. 134 135 text: 136 A string containing the C source code 137 138 filename: 139 Name of the file being parsed (for meaningful 140 error messages) 141 142 debuglevel: 143 Debug level to yacc 144 """ 145 self.clex.filename = filename 146 self.clex.reset_lineno() 147 self._scope_stack = [dict()] 148 self._last_yielded_token = None 149 return self.cparser.parse( 150 input=text, 151 lexer=self.clex, 152 debug=debuglevel) 153 154 ######################-- PRIVATE --###################### 155 156 def _push_scope(self): 157 self._scope_stack.append(dict()) 158 159 def _pop_scope(self): 160 assert len(self._scope_stack) > 1 161 self._scope_stack.pop() 162 163 def _add_typedef_name(self, name, coord): 164 """ Add a new typedef name (ie a TYPEID) to the current scope 165 """ 166 if not self._scope_stack[-1].get(name, True): 167 self._parse_error( 168 "Typedef %r previously declared as non-typedef " 169 "in this scope" % name, coord) 170 self._scope_stack[-1][name] = True 171 172 def _add_identifier(self, name, coord): 173 """ Add a new object, function, or enum member name (ie an ID) to the 174 current scope 175 """ 176 if self._scope_stack[-1].get(name, False): 177 self._parse_error( 178 "Non-typedef %r previously declared as typedef " 179 "in this scope" % name, coord) 180 self._scope_stack[-1][name] = False 181 182 def _is_type_in_scope(self, name): 183 """ Is *name* a typedef-name in the current scope? 184 """ 185 for scope in reversed(self._scope_stack): 186 # If name is an identifier in this scope it shadows typedefs in 187 # higher scopes. 188 in_scope = scope.get(name) 189 if in_scope is not None: return in_scope 190 return False 191 192 def _lex_error_func(self, msg, line, column): 193 self._parse_error(msg, self._coord(line, column)) 194 195 def _lex_on_lbrace_func(self): 196 self._push_scope() 197 198 def _lex_on_rbrace_func(self): 199 self._pop_scope() 200 201 def _lex_type_lookup_func(self, name): 202 """ Looks up types that were previously defined with 203 typedef. 204 Passed to the lexer for recognizing identifiers that 205 are types. 206 """ 207 is_type = self._is_type_in_scope(name) 208 return is_type 209 210 def _get_yacc_lookahead_token(self): 211 """ We need access to yacc's lookahead token in certain cases. 212 This is the last token yacc requested from the lexer, so we 213 ask the lexer. 214 """ 215 return self.clex.last_token 216 217 # To understand what's going on here, read sections A.8.5 and 218 # A.8.6 of K&R2 very carefully. 219 # 220 # A C type consists of a basic type declaration, with a list 221 # of modifiers. For example: 222 # 223 # int *c[5]; 224 # 225 # The basic declaration here is 'int c', and the pointer and 226 # the array are the modifiers. 227 # 228 # Basic declarations are represented by TypeDecl (from module c_ast) and the 229 # modifiers are FuncDecl, PtrDecl and ArrayDecl. 230 # 231 # The standard states that whenever a new modifier is parsed, it should be 232 # added to the end of the list of modifiers. For example: 233 # 234 # K&R2 A.8.6.2: Array Declarators 235 # 236 # In a declaration T D where D has the form 237 # D1 [constant-expression-opt] 238 # and the type of the identifier in the declaration T D1 is 239 # "type-modifier T", the type of the 240 # identifier of D is "type-modifier array of T" 241 # 242 # This is what this method does. The declarator it receives 243 # can be a list of declarators ending with TypeDecl. It 244 # tacks the modifier to the end of this list, just before 245 # the TypeDecl. 246 # 247 # Additionally, the modifier may be a list itself. This is 248 # useful for pointers, that can come as a chain from the rule 249 # p_pointer. In this case, the whole modifier list is spliced 250 # into the new location. 251 def _type_modify_decl(self, decl, modifier): 252 """ Tacks a type modifier on a declarator, and returns 253 the modified declarator. 254 255 Note: the declarator and modifier may be modified 256 """ 257 #~ print '****' 258 #~ decl.show(offset=3) 259 #~ modifier.show(offset=3) 260 #~ print '****' 261 262 modifier_head = modifier 263 modifier_tail = modifier 264 265 # The modifier may be a nested list. Reach its tail. 266 # 267 while modifier_tail.type: 268 modifier_tail = modifier_tail.type 269 270 # If the decl is a basic type, just tack the modifier onto 271 # it 272 # 273 if isinstance(decl, c_ast.TypeDecl): 274 modifier_tail.type = decl 275 return modifier 276 else: 277 # Otherwise, the decl is a list of modifiers. Reach 278 # its tail and splice the modifier onto the tail, 279 # pointing to the underlying basic type. 280 # 281 decl_tail = decl 282 283 while not isinstance(decl_tail.type, c_ast.TypeDecl): 284 decl_tail = decl_tail.type 285 286 modifier_tail.type = decl_tail.type 287 decl_tail.type = modifier_head 288 return decl 289 290 # Due to the order in which declarators are constructed, 291 # they have to be fixed in order to look like a normal AST. 292 # 293 # When a declaration arrives from syntax construction, it has 294 # these problems: 295 # * The innermost TypeDecl has no type (because the basic 296 # type is only known at the uppermost declaration level) 297 # * The declaration has no variable name, since that is saved 298 # in the innermost TypeDecl 299 # * The typename of the declaration is a list of type 300 # specifiers, and not a node. Here, basic identifier types 301 # should be separated from more complex types like enums 302 # and structs. 303 # 304 # This method fixes these problems. 305 # 306 def _fix_decl_name_type(self, decl, typename): 307 """ Fixes a declaration. Modifies decl. 308 """ 309 # Reach the underlying basic type 310 # 311 type = decl 312 while not isinstance(type, c_ast.TypeDecl): 313 type = type.type 314 315 decl.name = type.declname 316 type.quals = decl.quals 317 318 # The typename is a list of types. If any type in this 319 # list isn't an IdentifierType, it must be the only 320 # type in the list (it's illegal to declare "int enum ..") 321 # If all the types are basic, they're collected in the 322 # IdentifierType holder. 323 # 324 for tn in typename: 325 if not isinstance(tn, c_ast.IdentifierType): 326 if len(typename) > 1: 327 self._parse_error( 328 "Invalid multiple types specified", tn.coord) 329 else: 330 type.type = tn 331 return decl 332 333 if not typename: 334 # Functions default to returning int 335 # 336 if not isinstance(decl.type, c_ast.FuncDecl): 337 self._parse_error( 338 "Missing type in declaration", decl.coord) 339 type.type = c_ast.IdentifierType( 340 ['int'], 341 coord=decl.coord) 342 else: 343 # At this point, we know that typename is a list of IdentifierType 344 # nodes. Concatenate all the names into a single list. 345 # 346 type.type = c_ast.IdentifierType( 347 [name for id in typename for name in id.names], 348 coord=typename[0].coord) 349 return decl 350 351 def _add_declaration_specifier(self, declspec, newspec, kind, append=False): 352 """ Declaration specifiers are represented by a dictionary 353 with the entries: 354 * qual: a list of type qualifiers 355 * storage: a list of storage type qualifiers 356 * type: a list of type specifiers 357 * function: a list of function specifiers 358 359 This method is given a declaration specifier, and a 360 new specifier of a given kind. 361 If `append` is True, the new specifier is added to the end of 362 the specifiers list, otherwise it's added at the beginning. 363 Returns the declaration specifier, with the new 364 specifier incorporated. 365 """ 366 spec = declspec or dict(qual=[], storage=[], type=[], function=[]) 367 368 if append: 369 spec[kind].append(newspec) 370 else: 371 spec[kind].insert(0, newspec) 372 373 return spec 374 375 def _build_declarations(self, spec, decls, typedef_namespace=False): 376 """ Builds a list of declarations all sharing the given specifiers. 377 If typedef_namespace is true, each declared name is added 378 to the "typedef namespace", which also includes objects, 379 functions, and enum constants. 380 """ 381 is_typedef = 'typedef' in spec['storage'] 382 declarations = [] 383 384 # Bit-fields are allowed to be unnamed. 385 # 386 if decls[0].get('bitsize') is not None: 387 pass 388 389 # When redeclaring typedef names as identifiers in inner scopes, a 390 # problem can occur where the identifier gets grouped into 391 # spec['type'], leaving decl as None. This can only occur for the 392 # first declarator. 393 # 394 elif decls[0]['decl'] is None: 395 if len(spec['type']) < 2 or len(spec['type'][-1].names) != 1 or \ 396 not self._is_type_in_scope(spec['type'][-1].names[0]): 397 coord = '?' 398 for t in spec['type']: 399 if hasattr(t, 'coord'): 400 coord = t.coord 401 break 402 self._parse_error('Invalid declaration', coord) 403 404 # Make this look as if it came from "direct_declarator:ID" 405 decls[0]['decl'] = c_ast.TypeDecl( 406 declname=spec['type'][-1].names[0], 407 type=None, 408 quals=None, 409 coord=spec['type'][-1].coord) 410 # Remove the "new" type's name from the end of spec['type'] 411 del spec['type'][-1] 412 413 # A similar problem can occur where the declaration ends up looking 414 # like an abstract declarator. Give it a name if this is the case. 415 # 416 elif not isinstance(decls[0]['decl'], 417 (c_ast.Struct, c_ast.Union, c_ast.IdentifierType)): 418 decls_0_tail = decls[0]['decl'] 419 while not isinstance(decls_0_tail, c_ast.TypeDecl): 420 decls_0_tail = decls_0_tail.type 421 if decls_0_tail.declname is None: 422 decls_0_tail.declname = spec['type'][-1].names[0] 423 del spec['type'][-1] 424 425 for decl in decls: 426 assert decl['decl'] is not None 427 if is_typedef: 428 declaration = c_ast.Typedef( 429 name=None, 430 quals=spec['qual'], 431 storage=spec['storage'], 432 type=decl['decl'], 433 coord=decl['decl'].coord) 434 else: 435 declaration = c_ast.Decl( 436 name=None, 437 quals=spec['qual'], 438 storage=spec['storage'], 439 funcspec=spec['function'], 440 type=decl['decl'], 441 init=decl.get('init'), 442 bitsize=decl.get('bitsize'), 443 coord=decl['decl'].coord) 444 445 if isinstance(declaration.type, 446 (c_ast.Struct, c_ast.Union, c_ast.IdentifierType)): 447 fixed_decl = declaration 448 else: 449 fixed_decl = self._fix_decl_name_type(declaration, spec['type']) 450 451 # Add the type name defined by typedef to a 452 # symbol table (for usage in the lexer) 453 # 454 if typedef_namespace: 455 if is_typedef: 456 self._add_typedef_name(fixed_decl.name, fixed_decl.coord) 457 else: 458 self._add_identifier(fixed_decl.name, fixed_decl.coord) 459 460 declarations.append(fixed_decl) 461 462 return declarations 463 464 def _build_function_definition(self, spec, decl, param_decls, body): 465 """ Builds a function definition. 466 """ 467 assert 'typedef' not in spec['storage'] 468 469 declaration = self._build_declarations( 470 spec=spec, 471 decls=[dict(decl=decl, init=None)], 472 typedef_namespace=True)[0] 473 474 return c_ast.FuncDef( 475 decl=declaration, 476 param_decls=param_decls, 477 body=body, 478 coord=decl.coord) 479 480 def _select_struct_union_class(self, token): 481 """ Given a token (either STRUCT or UNION), selects the 482 appropriate AST class. 483 """ 484 if token == 'struct': 485 return c_ast.Struct 486 else: 487 return c_ast.Union 488 489 ## 490 ## Precedence and associativity of operators 491 ## 492 precedence = ( 493 ('left', 'LOR'), 494 ('left', 'LAND'), 495 ('left', 'OR'), 496 ('left', 'XOR'), 497 ('left', 'AND'), 498 ('left', 'EQ', 'NE'), 499 ('left', 'GT', 'GE', 'LT', 'LE'), 500 ('left', 'RSHIFT', 'LSHIFT'), 501 ('left', 'PLUS', 'MINUS'), 502 ('left', 'TIMES', 'DIVIDE', 'MOD') 503 ) 504 505 ## 506 ## Grammar productions 507 ## Implementation of the BNF defined in K&R2 A.13 508 ## 509 510 # Wrapper around a translation unit, to allow for empty input. 511 # Not strictly part of the C99 Grammar, but useful in practice. 512 # 513 def p_translation_unit_or_empty(self, p): 514 """ translation_unit_or_empty : translation_unit 515 | empty 516 """ 517 if p[1] is None: 518 p[0] = c_ast.FileAST([]) 519 else: 520 p[0] = c_ast.FileAST(p[1]) 521 522 def p_translation_unit_1(self, p): 523 """ translation_unit : external_declaration 524 """ 525 # Note: external_declaration is already a list 526 # 527 p[0] = p[1] 528 529 def p_translation_unit_2(self, p): 530 """ translation_unit : translation_unit external_declaration 531 """ 532 p[1].extend(p[2]) 533 p[0] = p[1] 534 535 # Declarations always come as lists (because they can be 536 # several in one line), so we wrap the function definition 537 # into a list as well, to make the return value of 538 # external_declaration homogenous. 539 # 540 def p_external_declaration_1(self, p): 541 """ external_declaration : function_definition 542 """ 543 p[0] = [p[1]] 544 545 def p_external_declaration_2(self, p): 546 """ external_declaration : declaration 547 """ 548 p[0] = p[1] 549 550 def p_external_declaration_3(self, p): 551 """ external_declaration : pp_directive 552 | pppragma_directive 553 """ 554 p[0] = [p[1]] 555 556 def p_external_declaration_4(self, p): 557 """ external_declaration : SEMI 558 """ 559 p[0] = [] 560 561 def p_pp_directive(self, p): 562 """ pp_directive : PPHASH 563 """ 564 self._parse_error('Directives not supported yet', 565 self._token_coord(p, 1)) 566 567 def p_pppragma_directive(self, p): 568 """ pppragma_directive : PPPRAGMA 569 | PPPRAGMA PPPRAGMASTR 570 """ 571 if len(p) == 3: 572 p[0] = c_ast.Pragma(p[2], self._token_coord(p, 2)) 573 else: 574 p[0] = c_ast.Pragma("", self._token_coord(p, 1)) 575 576 # In function definitions, the declarator can be followed by 577 # a declaration list, for old "K&R style" function definitios. 578 # 579 def p_function_definition_1(self, p): 580 """ function_definition : id_declarator declaration_list_opt compound_statement 581 """ 582 # no declaration specifiers - 'int' becomes the default type 583 spec = dict( 584 qual=[], 585 storage=[], 586 type=[c_ast.IdentifierType(['int'], 587 coord=self._token_coord(p, 1))], 588 function=[]) 589 590 p[0] = self._build_function_definition( 591 spec=spec, 592 decl=p[1], 593 param_decls=p[2], 594 body=p[3]) 595 596 def p_function_definition_2(self, p): 597 """ function_definition : declaration_specifiers id_declarator declaration_list_opt compound_statement 598 """ 599 spec = p[1] 600 601 p[0] = self._build_function_definition( 602 spec=spec, 603 decl=p[2], 604 param_decls=p[3], 605 body=p[4]) 606 607 def p_statement(self, p): 608 """ statement : labeled_statement 609 | expression_statement 610 | compound_statement 611 | selection_statement 612 | iteration_statement 613 | jump_statement 614 | pppragma_directive 615 """ 616 p[0] = p[1] 617 618 # A pragma is generally considered a decorator rather than an actual statement. 619 # Still, for the purposes of analyzing an abstract syntax tree of C code, 620 # pragma's should not be ignored and were previously treated as a statement. 621 # This presents a problem for constructs that take a statement such as labeled_statements, 622 # selection_statements, and iteration_statements, causing a misleading structure 623 # in the AST. For example, consider the following C code. 624 # 625 # for (int i = 0; i < 3; i++) 626 # #pragma omp critical 627 # sum += 1; 628 # 629 # This code will compile and execute "sum += 1;" as the body of the for loop. 630 # Previous implementations of PyCParser would render the AST for this 631 # block of code as follows: 632 # 633 # For: 634 # DeclList: 635 # Decl: i, [], [], [] 636 # TypeDecl: i, [] 637 # IdentifierType: ['int'] 638 # Constant: int, 0 639 # BinaryOp: < 640 # ID: i 641 # Constant: int, 3 642 # UnaryOp: p++ 643 # ID: i 644 # Pragma: omp critical 645 # Assignment: += 646 # ID: sum 647 # Constant: int, 1 648 # 649 # This AST misleadingly takes the Pragma as the body of the loop and the 650 # assignment then becomes a sibling of the loop. 651 # 652 # To solve edge cases like these, the pragmacomp_or_statement rule groups 653 # a pragma and its following statement (which would otherwise be orphaned) 654 # using a compound block, effectively turning the above code into: 655 # 656 # for (int i = 0; i < 3; i++) { 657 # #pragma omp critical 658 # sum += 1; 659 # } 660 def p_pragmacomp_or_statement(self, p): 661 """ pragmacomp_or_statement : pppragma_directive statement 662 | statement 663 """ 664 if isinstance(p[1], c_ast.Pragma) and len(p) == 3: 665 p[0] = c_ast.Compound( 666 block_items=[p[1], p[2]], 667 coord=self._token_coord(p, 1)) 668 else: 669 p[0] = p[1] 670 671 # In C, declarations can come several in a line: 672 # int x, *px, romulo = 5; 673 # 674 # However, for the AST, we will split them to separate Decl 675 # nodes. 676 # 677 # This rule splits its declarations and always returns a list 678 # of Decl nodes, even if it's one element long. 679 # 680 def p_decl_body(self, p): 681 """ decl_body : declaration_specifiers init_declarator_list_opt 682 | declaration_specifiers_no_type id_init_declarator_list_opt 683 """ 684 spec = p[1] 685 686 # p[2] (init_declarator_list_opt) is either a list or None 687 # 688 if p[2] is None: 689 # By the standard, you must have at least one declarator unless 690 # declaring a structure tag, a union tag, or the members of an 691 # enumeration. 692 # 693 ty = spec['type'] 694 s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum) 695 if len(ty) == 1 and isinstance(ty[0], s_u_or_e): 696 decls = [c_ast.Decl( 697 name=None, 698 quals=spec['qual'], 699 storage=spec['storage'], 700 funcspec=spec['function'], 701 type=ty[0], 702 init=None, 703 bitsize=None, 704 coord=ty[0].coord)] 705 706 # However, this case can also occur on redeclared identifiers in 707 # an inner scope. The trouble is that the redeclared type's name 708 # gets grouped into declaration_specifiers; _build_declarations 709 # compensates for this. 710 # 711 else: 712 decls = self._build_declarations( 713 spec=spec, 714 decls=[dict(decl=None, init=None)], 715 typedef_namespace=True) 716 717 else: 718 decls = self._build_declarations( 719 spec=spec, 720 decls=p[2], 721 typedef_namespace=True) 722 723 p[0] = decls 724 725 # The declaration has been split to a decl_body sub-rule and 726 # SEMI, because having them in a single rule created a problem 727 # for defining typedefs. 728 # 729 # If a typedef line was directly followed by a line using the 730 # type defined with the typedef, the type would not be 731 # recognized. This is because to reduce the declaration rule, 732 # the parser's lookahead asked for the token after SEMI, which 733 # was the type from the next line, and the lexer had no chance 734 # to see the updated type symbol table. 735 # 736 # Splitting solves this problem, because after seeing SEMI, 737 # the parser reduces decl_body, which actually adds the new 738 # type into the table to be seen by the lexer before the next 739 # line is reached. 740 def p_declaration(self, p): 741 """ declaration : decl_body SEMI 742 """ 743 p[0] = p[1] 744 745 # Since each declaration is a list of declarations, this 746 # rule will combine all the declarations and return a single 747 # list 748 # 749 def p_declaration_list(self, p): 750 """ declaration_list : declaration 751 | declaration_list declaration 752 """ 753 p[0] = p[1] if len(p) == 2 else p[1] + p[2] 754 755 # To know when declaration-specifiers end and declarators begin, 756 # we require declaration-specifiers to have at least one 757 # type-specifier, and disallow typedef-names after we've seen any 758 # type-specifier. These are both required by the spec. 759 # 760 def p_declaration_specifiers_no_type_1(self, p): 761 """ declaration_specifiers_no_type : type_qualifier declaration_specifiers_no_type_opt 762 """ 763 p[0] = self._add_declaration_specifier(p[2], p[1], 'qual') 764 765 def p_declaration_specifiers_no_type_2(self, p): 766 """ declaration_specifiers_no_type : storage_class_specifier declaration_specifiers_no_type_opt 767 """ 768 p[0] = self._add_declaration_specifier(p[2], p[1], 'storage') 769 770 def p_declaration_specifiers_no_type_3(self, p): 771 """ declaration_specifiers_no_type : function_specifier declaration_specifiers_no_type_opt 772 """ 773 p[0] = self._add_declaration_specifier(p[2], p[1], 'function') 774 775 776 def p_declaration_specifiers_1(self, p): 777 """ declaration_specifiers : declaration_specifiers type_qualifier 778 """ 779 p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True) 780 781 def p_declaration_specifiers_2(self, p): 782 """ declaration_specifiers : declaration_specifiers storage_class_specifier 783 """ 784 p[0] = self._add_declaration_specifier(p[1], p[2], 'storage', append=True) 785 786 def p_declaration_specifiers_3(self, p): 787 """ declaration_specifiers : declaration_specifiers function_specifier 788 """ 789 p[0] = self._add_declaration_specifier(p[1], p[2], 'function', append=True) 790 791 def p_declaration_specifiers_4(self, p): 792 """ declaration_specifiers : declaration_specifiers type_specifier_no_typeid 793 """ 794 p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True) 795 796 def p_declaration_specifiers_5(self, p): 797 """ declaration_specifiers : type_specifier 798 """ 799 p[0] = self._add_declaration_specifier(None, p[1], 'type') 800 801 def p_declaration_specifiers_6(self, p): 802 """ declaration_specifiers : declaration_specifiers_no_type type_specifier 803 """ 804 p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True) 805 806 807 def p_storage_class_specifier(self, p): 808 """ storage_class_specifier : AUTO 809 | REGISTER 810 | STATIC 811 | EXTERN 812 | TYPEDEF 813 """ 814 p[0] = p[1] 815 816 def p_function_specifier(self, p): 817 """ function_specifier : INLINE 818 """ 819 p[0] = p[1] 820 821 def p_type_specifier_no_typeid(self, p): 822 """ type_specifier_no_typeid : VOID 823 | _BOOL 824 | CHAR 825 | SHORT 826 | INT 827 | LONG 828 | FLOAT 829 | DOUBLE 830 | _COMPLEX 831 | SIGNED 832 | UNSIGNED 833 | __INT128 834 """ 835 p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1)) 836 837 def p_type_specifier(self, p): 838 """ type_specifier : typedef_name 839 | enum_specifier 840 | struct_or_union_specifier 841 | type_specifier_no_typeid 842 """ 843 p[0] = p[1] 844 845 def p_type_qualifier(self, p): 846 """ type_qualifier : CONST 847 | RESTRICT 848 | VOLATILE 849 """ 850 p[0] = p[1] 851 852 def p_init_declarator_list(self, p): 853 """ init_declarator_list : init_declarator 854 | init_declarator_list COMMA init_declarator 855 """ 856 p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]] 857 858 # Returns a {decl=<declarator> : init=<initializer>} dictionary 859 # If there's no initializer, uses None 860 # 861 def p_init_declarator(self, p): 862 """ init_declarator : declarator 863 | declarator EQUALS initializer 864 """ 865 p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None)) 866 867 def p_id_init_declarator_list(self, p): 868 """ id_init_declarator_list : id_init_declarator 869 | id_init_declarator_list COMMA init_declarator 870 """ 871 p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]] 872 873 def p_id_init_declarator(self, p): 874 """ id_init_declarator : id_declarator 875 | id_declarator EQUALS initializer 876 """ 877 p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None)) 878 879 # Require at least one type specifier in a specifier-qualifier-list 880 # 881 def p_specifier_qualifier_list_1(self, p): 882 """ specifier_qualifier_list : specifier_qualifier_list type_specifier_no_typeid 883 """ 884 p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True) 885 886 def p_specifier_qualifier_list_2(self, p): 887 """ specifier_qualifier_list : specifier_qualifier_list type_qualifier 888 """ 889 p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True) 890 891 def p_specifier_qualifier_list_3(self, p): 892 """ specifier_qualifier_list : type_specifier 893 """ 894 p[0] = self._add_declaration_specifier(None, p[1], 'type') 895 896 def p_specifier_qualifier_list_4(self, p): 897 """ specifier_qualifier_list : type_qualifier_list type_specifier 898 """ 899 spec = dict(qual=p[1], storage=[], type=[], function=[]) 900 p[0] = self._add_declaration_specifier(spec, p[2], 'type', append=True) 901 902 # TYPEID is allowed here (and in other struct/enum related tag names), because 903 # struct/enum tags reside in their own namespace and can be named the same as types 904 # 905 def p_struct_or_union_specifier_1(self, p): 906 """ struct_or_union_specifier : struct_or_union ID 907 | struct_or_union TYPEID 908 """ 909 klass = self._select_struct_union_class(p[1]) 910 # None means no list of members 911 p[0] = klass( 912 name=p[2], 913 decls=None, 914 coord=self._token_coord(p, 2)) 915 916 def p_struct_or_union_specifier_2(self, p): 917 """ struct_or_union_specifier : struct_or_union brace_open struct_declaration_list brace_close 918 | struct_or_union brace_open brace_close 919 """ 920 klass = self._select_struct_union_class(p[1]) 921 if len(p) == 4: 922 # Empty sequence means an empty list of members 923 p[0] = klass( 924 name=None, 925 decls=[], 926 coord=self._token_coord(p, 2)) 927 else: 928 p[0] = klass( 929 name=None, 930 decls=p[3], 931 coord=self._token_coord(p, 2)) 932 933 934 def p_struct_or_union_specifier_3(self, p): 935 """ struct_or_union_specifier : struct_or_union ID brace_open struct_declaration_list brace_close 936 | struct_or_union ID brace_open brace_close 937 | struct_or_union TYPEID brace_open struct_declaration_list brace_close 938 | struct_or_union TYPEID brace_open brace_close 939 """ 940 klass = self._select_struct_union_class(p[1]) 941 if len(p) == 5: 942 # Empty sequence means an empty list of members 943 p[0] = klass( 944 name=p[2], 945 decls=[], 946 coord=self._token_coord(p, 2)) 947 else: 948 p[0] = klass( 949 name=p[2], 950 decls=p[4], 951 coord=self._token_coord(p, 2)) 952 953 def p_struct_or_union(self, p): 954 """ struct_or_union : STRUCT 955 | UNION 956 """ 957 p[0] = p[1] 958 959 # Combine all declarations into a single list 960 # 961 def p_struct_declaration_list(self, p): 962 """ struct_declaration_list : struct_declaration 963 | struct_declaration_list struct_declaration 964 """ 965 if len(p) == 2: 966 p[0] = p[1] or [] 967 else: 968 p[0] = p[1] + (p[2] or []) 969 970 def p_struct_declaration_1(self, p): 971 """ struct_declaration : specifier_qualifier_list struct_declarator_list_opt SEMI 972 """ 973 spec = p[1] 974 assert 'typedef' not in spec['storage'] 975 976 if p[2] is not None: 977 decls = self._build_declarations( 978 spec=spec, 979 decls=p[2]) 980 981 elif len(spec['type']) == 1: 982 # Anonymous struct/union, gcc extension, C1x feature. 983 # Although the standard only allows structs/unions here, I see no 984 # reason to disallow other types since some compilers have typedefs 985 # here, and pycparser isn't about rejecting all invalid code. 986 # 987 node = spec['type'][0] 988 if isinstance(node, c_ast.Node): 989 decl_type = node 990 else: 991 decl_type = c_ast.IdentifierType(node) 992 993 decls = self._build_declarations( 994 spec=spec, 995 decls=[dict(decl=decl_type)]) 996 997 else: 998 # Structure/union members can have the same names as typedefs. 999 # The trouble is that the member's name gets grouped into 1000 # specifier_qualifier_list; _build_declarations compensates. 1001 # 1002 decls = self._build_declarations( 1003 spec=spec, 1004 decls=[dict(decl=None, init=None)]) 1005 1006 p[0] = decls 1007 1008 def p_struct_declaration_2(self, p): 1009 """ struct_declaration : SEMI 1010 """ 1011 p[0] = None 1012 1013 def p_struct_declaration_3(self, p): 1014 """ struct_declaration : pppragma_directive 1015 """ 1016 p[0] = [p[1]] 1017 1018 def p_struct_declarator_list(self, p): 1019 """ struct_declarator_list : struct_declarator 1020 | struct_declarator_list COMMA struct_declarator 1021 """ 1022 p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]] 1023 1024 # struct_declarator passes up a dict with the keys: decl (for 1025 # the underlying declarator) and bitsize (for the bitsize) 1026 # 1027 def p_struct_declarator_1(self, p): 1028 """ struct_declarator : declarator 1029 """ 1030 p[0] = {'decl': p[1], 'bitsize': None} 1031 1032 def p_struct_declarator_2(self, p): 1033 """ struct_declarator : declarator COLON constant_expression 1034 | COLON constant_expression 1035 """ 1036 if len(p) > 3: 1037 p[0] = {'decl': p[1], 'bitsize': p[3]} 1038 else: 1039 p[0] = {'decl': c_ast.TypeDecl(None, None, None), 'bitsize': p[2]} 1040 1041 def p_enum_specifier_1(self, p): 1042 """ enum_specifier : ENUM ID 1043 | ENUM TYPEID 1044 """ 1045 p[0] = c_ast.Enum(p[2], None, self._token_coord(p, 1)) 1046 1047 def p_enum_specifier_2(self, p): 1048 """ enum_specifier : ENUM brace_open enumerator_list brace_close 1049 """ 1050 p[0] = c_ast.Enum(None, p[3], self._token_coord(p, 1)) 1051 1052 def p_enum_specifier_3(self, p): 1053 """ enum_specifier : ENUM ID brace_open enumerator_list brace_close 1054 | ENUM TYPEID brace_open enumerator_list brace_close 1055 """ 1056 p[0] = c_ast.Enum(p[2], p[4], self._token_coord(p, 1)) 1057 1058 def p_enumerator_list(self, p): 1059 """ enumerator_list : enumerator 1060 | enumerator_list COMMA 1061 | enumerator_list COMMA enumerator 1062 """ 1063 if len(p) == 2: 1064 p[0] = c_ast.EnumeratorList([p[1]], p[1].coord) 1065 elif len(p) == 3: 1066 p[0] = p[1] 1067 else: 1068 p[1].enumerators.append(p[3]) 1069 p[0] = p[1] 1070 1071 def p_enumerator(self, p): 1072 """ enumerator : ID 1073 | ID EQUALS constant_expression 1074 """ 1075 if len(p) == 2: 1076 enumerator = c_ast.Enumerator( 1077 p[1], None, 1078 self._token_coord(p, 1)) 1079 else: 1080 enumerator = c_ast.Enumerator( 1081 p[1], p[3], 1082 self._token_coord(p, 1)) 1083 self._add_identifier(enumerator.name, enumerator.coord) 1084 1085 p[0] = enumerator 1086 1087 def p_declarator(self, p): 1088 """ declarator : id_declarator 1089 | typeid_declarator 1090 """ 1091 p[0] = p[1] 1092 1093 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID')) 1094 def p_xxx_declarator_1(self, p): 1095 """ xxx_declarator : direct_xxx_declarator 1096 """ 1097 p[0] = p[1] 1098 1099 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID')) 1100 def p_xxx_declarator_2(self, p): 1101 """ xxx_declarator : pointer direct_xxx_declarator 1102 """ 1103 p[0] = self._type_modify_decl(p[2], p[1]) 1104 1105 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID')) 1106 def p_direct_xxx_declarator_1(self, p): 1107 """ direct_xxx_declarator : yyy 1108 """ 1109 p[0] = c_ast.TypeDecl( 1110 declname=p[1], 1111 type=None, 1112 quals=None, 1113 coord=self._token_coord(p, 1)) 1114 1115 @parameterized(('id', 'ID'), ('typeid', 'TYPEID')) 1116 def p_direct_xxx_declarator_2(self, p): 1117 """ direct_xxx_declarator : LPAREN xxx_declarator RPAREN 1118 """ 1119 p[0] = p[2] 1120 1121 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID')) 1122 def p_direct_xxx_declarator_3(self, p): 1123 """ direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET 1124 """ 1125 quals = (p[3] if len(p) > 5 else []) or [] 1126 # Accept dimension qualifiers 1127 # Per C99 6.7.5.3 p7 1128 arr = c_ast.ArrayDecl( 1129 type=None, 1130 dim=p[4] if len(p) > 5 else p[3], 1131 dim_quals=quals, 1132 coord=p[1].coord) 1133 1134 p[0] = self._type_modify_decl(decl=p[1], modifier=arr) 1135 1136 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID')) 1137 def p_direct_xxx_declarator_4(self, p): 1138 """ direct_xxx_declarator : direct_xxx_declarator LBRACKET STATIC type_qualifier_list_opt assignment_expression RBRACKET 1139 | direct_xxx_declarator LBRACKET type_qualifier_list STATIC assignment_expression RBRACKET 1140 """ 1141 # Using slice notation for PLY objects doesn't work in Python 3 for the 1142 # version of PLY embedded with pycparser; see PLY Google Code issue 30. 1143 # Work around that here by listing the two elements separately. 1144 listed_quals = [item if isinstance(item, list) else [item] 1145 for item in [p[3],p[4]]] 1146 dim_quals = [qual for sublist in listed_quals for qual in sublist 1147 if qual is not None] 1148 arr = c_ast.ArrayDecl( 1149 type=None, 1150 dim=p[5], 1151 dim_quals=dim_quals, 1152 coord=p[1].coord) 1153 1154 p[0] = self._type_modify_decl(decl=p[1], modifier=arr) 1155 1156 # Special for VLAs 1157 # 1158 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID')) 1159 def p_direct_xxx_declarator_5(self, p): 1160 """ direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt TIMES RBRACKET 1161 """ 1162 arr = c_ast.ArrayDecl( 1163 type=None, 1164 dim=c_ast.ID(p[4], self._token_coord(p, 4)), 1165 dim_quals=p[3] if p[3] != None else [], 1166 coord=p[1].coord) 1167 1168 p[0] = self._type_modify_decl(decl=p[1], modifier=arr) 1169 1170 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID')) 1171 def p_direct_xxx_declarator_6(self, p): 1172 """ direct_xxx_declarator : direct_xxx_declarator LPAREN parameter_type_list RPAREN 1173 | direct_xxx_declarator LPAREN identifier_list_opt RPAREN 1174 """ 1175 func = c_ast.FuncDecl( 1176 args=p[3], 1177 type=None, 1178 coord=p[1].coord) 1179 1180 # To see why _get_yacc_lookahead_token is needed, consider: 1181 # typedef char TT; 1182 # void foo(int TT) { TT = 10; } 1183 # Outside the function, TT is a typedef, but inside (starting and 1184 # ending with the braces) it's a parameter. The trouble begins with 1185 # yacc's lookahead token. We don't know if we're declaring or 1186 # defining a function until we see LBRACE, but if we wait for yacc to 1187 # trigger a rule on that token, then TT will have already been read 1188 # and incorrectly interpreted as TYPEID. We need to add the 1189 # parameters to the scope the moment the lexer sees LBRACE. 1190 # 1191 if self._get_yacc_lookahead_token().type == "LBRACE": 1192 if func.args is not None: 1193 for param in func.args.params: 1194 if isinstance(param, c_ast.EllipsisParam): break 1195 self._add_identifier(param.name, param.coord) 1196 1197 p[0] = self._type_modify_decl(decl=p[1], modifier=func) 1198 1199 def p_pointer(self, p): 1200 """ pointer : TIMES type_qualifier_list_opt 1201 | TIMES type_qualifier_list_opt pointer 1202 """ 1203 coord = self._token_coord(p, 1) 1204 # Pointer decls nest from inside out. This is important when different 1205 # levels have different qualifiers. For example: 1206 # 1207 # char * const * p; 1208 # 1209 # Means "pointer to const pointer to char" 1210 # 1211 # While: 1212 # 1213 # char ** const p; 1214 # 1215 # Means "const pointer to pointer to char" 1216 # 1217 # So when we construct PtrDecl nestings, the leftmost pointer goes in 1218 # as the most nested type. 1219 nested_type = c_ast.PtrDecl(quals=p[2] or [], type=None, coord=coord) 1220 if len(p) > 3: 1221 tail_type = p[3] 1222 while tail_type.type is not None: 1223 tail_type = tail_type.type 1224 tail_type.type = nested_type 1225 p[0] = p[3] 1226 else: 1227 p[0] = nested_type 1228 1229 def p_type_qualifier_list(self, p): 1230 """ type_qualifier_list : type_qualifier 1231 | type_qualifier_list type_qualifier 1232 """ 1233 p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]] 1234 1235 def p_parameter_type_list(self, p): 1236 """ parameter_type_list : parameter_list 1237 | parameter_list COMMA ELLIPSIS 1238 """ 1239 if len(p) > 2: 1240 p[1].params.append(c_ast.EllipsisParam(self._token_coord(p, 3))) 1241 1242 p[0] = p[1] 1243 1244 def p_parameter_list(self, p): 1245 """ parameter_list : parameter_declaration 1246 | parameter_list COMMA parameter_declaration 1247 """ 1248 if len(p) == 2: # single parameter 1249 p[0] = c_ast.ParamList([p[1]], p[1].coord) 1250 else: 1251 p[1].params.append(p[3]) 1252 p[0] = p[1] 1253 1254 # From ISO/IEC 9899:TC2, 6.7.5.3.11: 1255 # "If, in a parameter declaration, an identifier can be treated either 1256 # as a typedef name or as a parameter name, it shall be taken as a 1257 # typedef name." 1258 # 1259 # Inside a parameter declaration, once we've reduced declaration specifiers, 1260 # if we shift in an LPAREN and see a TYPEID, it could be either an abstract 1261 # declarator or a declarator nested inside parens. This rule tells us to 1262 # always treat it as an abstract declarator. Therefore, we only accept 1263 # `id_declarator`s and `typeid_noparen_declarator`s. 1264 def p_parameter_declaration_1(self, p): 1265 """ parameter_declaration : declaration_specifiers id_declarator 1266 | declaration_specifiers typeid_noparen_declarator 1267 """ 1268 spec = p[1] 1269 if not spec['type']: 1270 spec['type'] = [c_ast.IdentifierType(['int'], 1271 coord=self._token_coord(p, 1))] 1272 p[0] = self._build_declarations( 1273 spec=spec, 1274 decls=[dict(decl=p[2])])[0] 1275 1276 def p_parameter_declaration_2(self, p): 1277 """ parameter_declaration : declaration_specifiers abstract_declarator_opt 1278 """ 1279 spec = p[1] 1280 if not spec['type']: 1281 spec['type'] = [c_ast.IdentifierType(['int'], 1282 coord=self._token_coord(p, 1))] 1283 1284 # Parameters can have the same names as typedefs. The trouble is that 1285 # the parameter's name gets grouped into declaration_specifiers, making 1286 # it look like an old-style declaration; compensate. 1287 # 1288 if len(spec['type']) > 1 and len(spec['type'][-1].names) == 1 and \ 1289 self._is_type_in_scope(spec['type'][-1].names[0]): 1290 decl = self._build_declarations( 1291 spec=spec, 1292 decls=[dict(decl=p[2], init=None)])[0] 1293 1294 # This truly is an old-style parameter declaration 1295 # 1296 else: 1297 decl = c_ast.Typename( 1298 name='', 1299 quals=spec['qual'], 1300 type=p[2] or c_ast.TypeDecl(None, None, None), 1301 coord=self._token_coord(p, 2)) 1302 typename = spec['type'] 1303 decl = self._fix_decl_name_type(decl, typename) 1304 1305 p[0] = decl 1306 1307 def p_identifier_list(self, p): 1308 """ identifier_list : identifier 1309 | identifier_list COMMA identifier 1310 """ 1311 if len(p) == 2: # single parameter 1312 p[0] = c_ast.ParamList([p[1]], p[1].coord) 1313 else: 1314 p[1].params.append(p[3]) 1315 p[0] = p[1] 1316 1317 def p_initializer_1(self, p): 1318 """ initializer : assignment_expression 1319 """ 1320 p[0] = p[1] 1321 1322 def p_initializer_2(self, p): 1323 """ initializer : brace_open initializer_list_opt brace_close 1324 | brace_open initializer_list COMMA brace_close 1325 """ 1326 if p[2] is None: 1327 p[0] = c_ast.InitList([], self._token_coord(p, 1)) 1328 else: 1329 p[0] = p[2] 1330 1331 def p_initializer_list(self, p): 1332 """ initializer_list : designation_opt initializer 1333 | initializer_list COMMA designation_opt initializer 1334 """ 1335 if len(p) == 3: # single initializer 1336 init = p[2] if p[1] is None else c_ast.NamedInitializer(p[1], p[2]) 1337 p[0] = c_ast.InitList([init], p[2].coord) 1338 else: 1339 init = p[4] if p[3] is None else c_ast.NamedInitializer(p[3], p[4]) 1340 p[1].exprs.append(init) 1341 p[0] = p[1] 1342 1343 def p_designation(self, p): 1344 """ designation : designator_list EQUALS 1345 """ 1346 p[0] = p[1] 1347 1348 # Designators are represented as a list of nodes, in the order in which 1349 # they're written in the code. 1350 # 1351 def p_designator_list(self, p): 1352 """ designator_list : designator 1353 | designator_list designator 1354 """ 1355 p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]] 1356 1357 def p_designator(self, p): 1358 """ designator : LBRACKET constant_expression RBRACKET 1359 | PERIOD identifier 1360 """ 1361 p[0] = p[2] 1362 1363 def p_type_name(self, p): 1364 """ type_name : specifier_qualifier_list abstract_declarator_opt 1365 """ 1366 typename = c_ast.Typename( 1367 name='', 1368 quals=p[1]['qual'], 1369 type=p[2] or c_ast.TypeDecl(None, None, None), 1370 coord=self._token_coord(p, 2)) 1371 1372 p[0] = self._fix_decl_name_type(typename, p[1]['type']) 1373 1374 def p_abstract_declarator_1(self, p): 1375 """ abstract_declarator : pointer 1376 """ 1377 dummytype = c_ast.TypeDecl(None, None, None) 1378 p[0] = self._type_modify_decl( 1379 decl=dummytype, 1380 modifier=p[1]) 1381 1382 def p_abstract_declarator_2(self, p): 1383 """ abstract_declarator : pointer direct_abstract_declarator 1384 """ 1385 p[0] = self._type_modify_decl(p[2], p[1]) 1386 1387 def p_abstract_declarator_3(self, p): 1388 """ abstract_declarator : direct_abstract_declarator 1389 """ 1390 p[0] = p[1] 1391 1392 # Creating and using direct_abstract_declarator_opt here 1393 # instead of listing both direct_abstract_declarator and the 1394 # lack of it in the beginning of _1 and _2 caused two 1395 # shift/reduce errors. 1396 # 1397 def p_direct_abstract_declarator_1(self, p): 1398 """ direct_abstract_declarator : LPAREN abstract_declarator RPAREN """ 1399 p[0] = p[2] 1400 1401 def p_direct_abstract_declarator_2(self, p): 1402 """ direct_abstract_declarator : direct_abstract_declarator LBRACKET assignment_expression_opt RBRACKET 1403 """ 1404 arr = c_ast.ArrayDecl( 1405 type=None, 1406 dim=p[3], 1407 dim_quals=[], 1408 coord=p[1].coord) 1409 1410 p[0] = self._type_modify_decl(decl=p[1], modifier=arr) 1411 1412 def p_direct_abstract_declarator_3(self, p): 1413 """ direct_abstract_declarator : LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET 1414 """ 1415 quals = (p[2] if len(p) > 4 else []) or [] 1416 p[0] = c_ast.ArrayDecl( 1417 type=c_ast.TypeDecl(None, None, None), 1418 dim=p[3] if len(p) > 4 else p[2], 1419 dim_quals=quals, 1420 coord=self._token_coord(p, 1)) 1421 1422 def p_direct_abstract_declarator_4(self, p): 1423 """ direct_abstract_declarator : direct_abstract_declarator LBRACKET TIMES RBRACKET 1424 """ 1425 arr = c_ast.ArrayDecl( 1426 type=None, 1427 dim=c_ast.ID(p[3], self._token_coord(p, 3)), 1428 dim_quals=[], 1429 coord=p[1].coord) 1430 1431 p[0] = self._type_modify_decl(decl=p[1], modifier=arr) 1432 1433 def p_direct_abstract_declarator_5(self, p): 1434 """ direct_abstract_declarator : LBRACKET TIMES RBRACKET 1435 """ 1436 p[0] = c_ast.ArrayDecl( 1437 type=c_ast.TypeDecl(None, None, None), 1438 dim=c_ast.ID(p[3], self._token_coord(p, 3)), 1439 dim_quals=[], 1440 coord=self._token_coord(p, 1)) 1441 1442 def p_direct_abstract_declarator_6(self, p): 1443 """ direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN 1444 """ 1445 func = c_ast.FuncDecl( 1446 args=p[3], 1447 type=None, 1448 coord=p[1].coord) 1449 1450 p[0] = self._type_modify_decl(decl=p[1], modifier=func) 1451 1452 def p_direct_abstract_declarator_7(self, p): 1453 """ direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN 1454 """ 1455 p[0] = c_ast.FuncDecl( 1456 args=p[2], 1457 type=c_ast.TypeDecl(None, None, None), 1458 coord=self._token_coord(p, 1)) 1459 1460 # declaration is a list, statement isn't. To make it consistent, block_item 1461 # will always be a list 1462 # 1463 def p_block_item(self, p): 1464 """ block_item : declaration 1465 | statement 1466 """ 1467 p[0] = p[1] if isinstance(p[1], list) else [p[1]] 1468 1469 # Since we made block_item a list, this just combines lists 1470 # 1471 def p_block_item_list(self, p): 1472 """ block_item_list : block_item 1473 | block_item_list block_item 1474 """ 1475 # Empty block items (plain ';') produce [None], so ignore them 1476 p[0] = p[1] if (len(p) == 2 or p[2] == [None]) else p[1] + p[2] 1477 1478 def p_compound_statement_1(self, p): 1479 """ compound_statement : brace_open block_item_list_opt brace_close """ 1480 p[0] = c_ast.Compound( 1481 block_items=p[2], 1482 coord=self._token_coord(p, 1)) 1483 1484 def p_labeled_statement_1(self, p): 1485 """ labeled_statement : ID COLON pragmacomp_or_statement """ 1486 p[0] = c_ast.Label(p[1], p[3], self._token_coord(p, 1)) 1487 1488 def p_labeled_statement_2(self, p): 1489 """ labeled_statement : CASE constant_expression COLON pragmacomp_or_statement """ 1490 p[0] = c_ast.Case(p[2], [p[4]], self._token_coord(p, 1)) 1491 1492 def p_labeled_statement_3(self, p): 1493 """ labeled_statement : DEFAULT COLON pragmacomp_or_statement """ 1494 p[0] = c_ast.Default([p[3]], self._token_coord(p, 1)) 1495 1496 def p_selection_statement_1(self, p): 1497 """ selection_statement : IF LPAREN expression RPAREN pragmacomp_or_statement """ 1498 p[0] = c_ast.If(p[3], p[5], None, self._token_coord(p, 1)) 1499 1500 def p_selection_statement_2(self, p): 1501 """ selection_statement : IF LPAREN expression RPAREN statement ELSE pragmacomp_or_statement """ 1502 p[0] = c_ast.If(p[3], p[5], p[7], self._token_coord(p, 1)) 1503 1504 def p_selection_statement_3(self, p): 1505 """ selection_statement : SWITCH LPAREN expression RPAREN pragmacomp_or_statement """ 1506 p[0] = fix_switch_cases( 1507 c_ast.Switch(p[3], p[5], self._token_coord(p, 1))) 1508 1509 def p_iteration_statement_1(self, p): 1510 """ iteration_statement : WHILE LPAREN expression RPAREN pragmacomp_or_statement """ 1511 p[0] = c_ast.While(p[3], p[5], self._token_coord(p, 1)) 1512 1513 def p_iteration_statement_2(self, p): 1514 """ iteration_statement : DO pragmacomp_or_statement WHILE LPAREN expression RPAREN SEMI """ 1515 p[0] = c_ast.DoWhile(p[5], p[2], self._token_coord(p, 1)) 1516 1517 def p_iteration_statement_3(self, p): 1518 """ iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """ 1519 p[0] = c_ast.For(p[3], p[5], p[7], p[9], self._token_coord(p, 1)) 1520 1521 def p_iteration_statement_4(self, p): 1522 """ iteration_statement : FOR LPAREN declaration expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """ 1523 p[0] = c_ast.For(c_ast.DeclList(p[3], self._token_coord(p, 1)), 1524 p[4], p[6], p[8], self._token_coord(p, 1)) 1525 1526 def p_jump_statement_1(self, p): 1527 """ jump_statement : GOTO ID SEMI """ 1528 p[0] = c_ast.Goto(p[2], self._token_coord(p, 1)) 1529 1530 def p_jump_statement_2(self, p): 1531 """ jump_statement : BREAK SEMI """ 1532 p[0] = c_ast.Break(self._token_coord(p, 1)) 1533 1534 def p_jump_statement_3(self, p): 1535 """ jump_statement : CONTINUE SEMI """ 1536 p[0] = c_ast.Continue(self._token_coord(p, 1)) 1537 1538 def p_jump_statement_4(self, p): 1539 """ jump_statement : RETURN expression SEMI 1540 | RETURN SEMI 1541 """ 1542 p[0] = c_ast.Return(p[2] if len(p) == 4 else None, self._token_coord(p, 1)) 1543 1544 def p_expression_statement(self, p): 1545 """ expression_statement : expression_opt SEMI """ 1546 if p[1] is None: 1547 p[0] = c_ast.EmptyStatement(self._token_coord(p, 2)) 1548 else: 1549 p[0] = p[1] 1550 1551 def p_expression(self, p): 1552 """ expression : assignment_expression 1553 | expression COMMA assignment_expression 1554 """ 1555 if len(p) == 2: 1556 p[0] = p[1] 1557 else: 1558 if not isinstance(p[1], c_ast.ExprList): 1559 p[1] = c_ast.ExprList([p[1]], p[1].coord) 1560 1561 p[1].exprs.append(p[3]) 1562 p[0] = p[1] 1563 1564 def p_typedef_name(self, p): 1565 """ typedef_name : TYPEID """ 1566 p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1)) 1567 1568 def p_assignment_expression(self, p): 1569 """ assignment_expression : conditional_expression 1570 | unary_expression assignment_operator assignment_expression 1571 """ 1572 if len(p) == 2: 1573 p[0] = p[1] 1574 else: 1575 p[0] = c_ast.Assignment(p[2], p[1], p[3], p[1].coord) 1576 1577 # K&R2 defines these as many separate rules, to encode 1578 # precedence and associativity. Why work hard ? I'll just use 1579 # the built in precedence/associativity specification feature 1580 # of PLY. (see precedence declaration above) 1581 # 1582 def p_assignment_operator(self, p): 1583 """ assignment_operator : EQUALS 1584 | XOREQUAL 1585 | TIMESEQUAL 1586 | DIVEQUAL 1587 | MODEQUAL 1588 | PLUSEQUAL 1589 | MINUSEQUAL 1590 | LSHIFTEQUAL 1591 | RSHIFTEQUAL 1592 | ANDEQUAL 1593 | OREQUAL 1594 """ 1595 p[0] = p[1] 1596 1597 def p_constant_expression(self, p): 1598 """ constant_expression : conditional_expression """ 1599 p[0] = p[1] 1600 1601 def p_conditional_expression(self, p): 1602 """ conditional_expression : binary_expression 1603 | binary_expression CONDOP expression COLON conditional_expression 1604 """ 1605 if len(p) == 2: 1606 p[0] = p[1] 1607 else: 1608 p[0] = c_ast.TernaryOp(p[1], p[3], p[5], p[1].coord) 1609 1610 def p_binary_expression(self, p): 1611 """ binary_expression : cast_expression 1612 | binary_expression TIMES binary_expression 1613 | binary_expression DIVIDE binary_expression 1614 | binary_expression MOD binary_expression 1615 | binary_expression PLUS binary_expression 1616 | binary_expression MINUS binary_expression 1617 | binary_expression RSHIFT binary_expression 1618 | binary_expression LSHIFT binary_expression 1619 | binary_expression LT binary_expression 1620 | binary_expression LE binary_expression 1621 | binary_expression GE binary_expression 1622 | binary_expression GT binary_expression 1623 | binary_expression EQ binary_expression 1624 | binary_expression NE binary_expression 1625 | binary_expression AND binary_expression 1626 | binary_expression OR binary_expression 1627 | binary_expression XOR binary_expression 1628 | binary_expression LAND binary_expression 1629 | binary_expression LOR binary_expression 1630 """ 1631 if len(p) == 2: 1632 p[0] = p[1] 1633 else: 1634 p[0] = c_ast.BinaryOp(p[2], p[1], p[3], p[1].coord) 1635 1636 def p_cast_expression_1(self, p): 1637 """ cast_expression : unary_expression """ 1638 p[0] = p[1] 1639 1640 def p_cast_expression_2(self, p): 1641 """ cast_expression : LPAREN type_name RPAREN cast_expression """ 1642 p[0] = c_ast.Cast(p[2], p[4], self._token_coord(p, 1)) 1643 1644 def p_unary_expression_1(self, p): 1645 """ unary_expression : postfix_expression """ 1646 p[0] = p[1] 1647 1648 def p_unary_expression_2(self, p): 1649 """ unary_expression : PLUSPLUS unary_expression 1650 | MINUSMINUS unary_expression 1651 | unary_operator cast_expression 1652 """ 1653 p[0] = c_ast.UnaryOp(p[1], p[2], p[2].coord) 1654 1655 def p_unary_expression_3(self, p): 1656 """ unary_expression : SIZEOF unary_expression 1657 | SIZEOF LPAREN type_name RPAREN 1658 """ 1659 p[0] = c_ast.UnaryOp( 1660 p[1], 1661 p[2] if len(p) == 3 else p[3], 1662 self._token_coord(p, 1)) 1663 1664 def p_unary_operator(self, p): 1665 """ unary_operator : AND 1666 | TIMES 1667 | PLUS 1668 | MINUS 1669 | NOT 1670 | LNOT 1671 """ 1672 p[0] = p[1] 1673 1674 def p_postfix_expression_1(self, p): 1675 """ postfix_expression : primary_expression """ 1676 p[0] = p[1] 1677 1678 def p_postfix_expression_2(self, p): 1679 """ postfix_expression : postfix_expression LBRACKET expression RBRACKET """ 1680 p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord) 1681 1682 def p_postfix_expression_3(self, p): 1683 """ postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN 1684 | postfix_expression LPAREN RPAREN 1685 """ 1686 p[0] = c_ast.FuncCall(p[1], p[3] if len(p) == 5 else None, p[1].coord) 1687 1688 def p_postfix_expression_4(self, p): 1689 """ postfix_expression : postfix_expression PERIOD ID 1690 | postfix_expression PERIOD TYPEID 1691 | postfix_expression ARROW ID 1692 | postfix_expression ARROW TYPEID 1693 """ 1694 field = c_ast.ID(p[3], self._token_coord(p, 3)) 1695 p[0] = c_ast.StructRef(p[1], p[2], field, p[1].coord) 1696 1697 def p_postfix_expression_5(self, p): 1698 """ postfix_expression : postfix_expression PLUSPLUS 1699 | postfix_expression MINUSMINUS 1700 """ 1701 p[0] = c_ast.UnaryOp('p' + p[2], p[1], p[1].coord) 1702 1703 def p_postfix_expression_6(self, p): 1704 """ postfix_expression : LPAREN type_name RPAREN brace_open initializer_list brace_close 1705 | LPAREN type_name RPAREN brace_open initializer_list COMMA brace_close 1706 """ 1707 p[0] = c_ast.CompoundLiteral(p[2], p[5]) 1708 1709 def p_primary_expression_1(self, p): 1710 """ primary_expression : identifier """ 1711 p[0] = p[1] 1712 1713 def p_primary_expression_2(self, p): 1714 """ primary_expression : constant """ 1715 p[0] = p[1] 1716 1717 def p_primary_expression_3(self, p): 1718 """ primary_expression : unified_string_literal 1719 | unified_wstring_literal 1720 """ 1721 p[0] = p[1] 1722 1723 def p_primary_expression_4(self, p): 1724 """ primary_expression : LPAREN expression RPAREN """ 1725 p[0] = p[2] 1726 1727 def p_primary_expression_5(self, p): 1728 """ primary_expression : OFFSETOF LPAREN type_name COMMA offsetof_member_designator RPAREN 1729 """ 1730 coord = self._token_coord(p, 1) 1731 p[0] = c_ast.FuncCall(c_ast.ID(p[1], coord), 1732 c_ast.ExprList([p[3], p[5]], coord), 1733 coord) 1734 1735 def p_offsetof_member_designator(self, p): 1736 """ offsetof_member_designator : identifier 1737 | offsetof_member_designator PERIOD identifier 1738 | offsetof_member_designator LBRACKET expression RBRACKET 1739 """ 1740 if len(p) == 2: 1741 p[0] = p[1] 1742 elif len(p) == 4: 1743 p[0] = c_ast.StructRef(p[1], p[2], p[3], p[1].coord) 1744 elif len(p) == 5: 1745 p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord) 1746 else: 1747 raise NotImplementedError("Unexpected parsing state. len(p): %u" % len(p)) 1748 1749 def p_argument_expression_list(self, p): 1750 """ argument_expression_list : assignment_expression 1751 | argument_expression_list COMMA assignment_expression 1752 """ 1753 if len(p) == 2: # single expr 1754 p[0] = c_ast.ExprList([p[1]], p[1].coord) 1755 else: 1756 p[1].exprs.append(p[3]) 1757 p[0] = p[1] 1758 1759 def p_identifier(self, p): 1760 """ identifier : ID """ 1761 p[0] = c_ast.ID(p[1], self._token_coord(p, 1)) 1762 1763 def p_constant_1(self, p): 1764 """ constant : INT_CONST_DEC 1765 | INT_CONST_OCT 1766 | INT_CONST_HEX 1767 | INT_CONST_BIN 1768 | INT_CONST_CHAR 1769 """ 1770 uCount = 0 1771 lCount = 0 1772 for x in p[1][-3:]: 1773 if x in ('l', 'L'): 1774 lCount += 1 1775 elif x in ('u', 'U'): 1776 uCount += 1 1777 t = '' 1778 if uCount > 1: 1779 raise ValueError('Constant cannot have more than one u/U suffix.') 1780 elif lCount > 2: 1781 raise ValueError('Constant cannot have more than two l/L suffix.') 1782 prefix = 'unsigned ' * uCount + 'long ' * lCount 1783 p[0] = c_ast.Constant( 1784 prefix + 'int', p[1], self._token_coord(p, 1)) 1785 1786 def p_constant_2(self, p): 1787 """ constant : FLOAT_CONST 1788 | HEX_FLOAT_CONST 1789 """ 1790 if 'x' in p[1].lower(): 1791 t = 'float' 1792 else: 1793 if p[1][-1] in ('f', 'F'): 1794 t = 'float' 1795 elif p[1][-1] in ('l', 'L'): 1796 t = 'long double' 1797 else: 1798 t = 'double' 1799 1800 p[0] = c_ast.Constant( 1801 t, p[1], self._token_coord(p, 1)) 1802 1803 def p_constant_3(self, p): 1804 """ constant : CHAR_CONST 1805 | WCHAR_CONST 1806 """ 1807 p[0] = c_ast.Constant( 1808 'char', p[1], self._token_coord(p, 1)) 1809 1810 # The "unified" string and wstring literal rules are for supporting 1811 # concatenation of adjacent string literals. 1812 # I.e. "hello " "world" is seen by the C compiler as a single string literal 1813 # with the value "hello world" 1814 # 1815 def p_unified_string_literal(self, p): 1816 """ unified_string_literal : STRING_LITERAL 1817 | unified_string_literal STRING_LITERAL 1818 """ 1819 if len(p) == 2: # single literal 1820 p[0] = c_ast.Constant( 1821 'string', p[1], self._token_coord(p, 1)) 1822 else: 1823 p[1].value = p[1].value[:-1] + p[2][1:] 1824 p[0] = p[1] 1825 1826 def p_unified_wstring_literal(self, p): 1827 """ unified_wstring_literal : WSTRING_LITERAL 1828 | unified_wstring_literal WSTRING_LITERAL 1829 """ 1830 if len(p) == 2: # single literal 1831 p[0] = c_ast.Constant( 1832 'string', p[1], self._token_coord(p, 1)) 1833 else: 1834 p[1].value = p[1].value.rstrip()[:-1] + p[2][2:] 1835 p[0] = p[1] 1836 1837 def p_brace_open(self, p): 1838 """ brace_open : LBRACE 1839 """ 1840 p[0] = p[1] 1841 p.set_lineno(0, p.lineno(1)) 1842 1843 def p_brace_close(self, p): 1844 """ brace_close : RBRACE 1845 """ 1846 p[0] = p[1] 1847 p.set_lineno(0, p.lineno(1)) 1848 1849 def p_empty(self, p): 1850 'empty : ' 1851 p[0] = None 1852 1853 def p_error(self, p): 1854 # If error recovery is added here in the future, make sure 1855 # _get_yacc_lookahead_token still works! 1856 # 1857 if p: 1858 self._parse_error( 1859 'before: %s' % p.value, 1860 self._coord(lineno=p.lineno, 1861 column=self.clex.find_tok_column(p))) 1862 else: 1863 self._parse_error('At end of input', self.clex.filename) 1864