1grammar t033backtracking; 2options { 3 language=Cpp; 4 backtrack=true; 5 memoize=true; 6 k=2; 7} 8 9scope Symbols { 10 types; 11} 12 13@lexer::includes 14{ 15#include "UserTestTraits.hpp" 16} 17@lexer::namespace 18{ Antlr3Test } 19 20@parser::includes { 21#include "UserTestTraits.hpp" 22} 23@parser::namespace 24{ Antlr3Test } 25 26@members { 27 def isTypeName(self, name): 28 for scope in reversed(self.Symbols_stack): 29 if name in scope.types: 30 return True 31 32 return False 33 34} 35 36translation_unit 37scope Symbols; // entire file is a scope 38@init { 39 $Symbols::types = set() 40} 41 : external_declaration+ 42 ; 43 44/** Either a function definition or any other kind of C decl/def. 45 * The LL(*) analysis algorithm fails to deal with this due to 46 * recursion in the declarator rules. I'm putting in a 47 * manual predicate here so that we don't backtrack over 48 * the entire function. Further, you get a better error 49 * as errors within the function itself don't make it fail 50 * to predict that it's a function. Weird errors previously. 51 * Remember: the goal is to avoid backtrack like the plague 52 * because it makes debugging, actions, and errors harder. 53 * 54 * Note that k=1 results in a much smaller predictor for the 55 * fixed lookahead; k=2 made a few extra thousand lines. ;) 56 * I'll have to optimize that in the future. 57 */ 58external_declaration 59options {k=1;} 60 : ( declaration_specifiers? declarator declaration* '{' )=> function_definition 61 | declaration 62 ; 63 64function_definition 65scope Symbols; // put parameters and locals into same scope for now 66@init { 67 $Symbols::types = set() 68} 69 : declaration_specifiers? declarator 70// ( declaration+ compound_statement // K&R style 71// | compound_statement // ANSI style 72// ) 73 ; 74 75declaration 76scope { 77 isTypedef; 78} 79@init { 80 $declaration::isTypedef = False 81} 82 : 'typedef' declaration_specifiers? {$declaration::isTypedef = True} 83 init_declarator_list ';' // special case, looking for typedef 84 | declaration_specifiers init_declarator_list? ';' 85 ; 86 87declaration_specifiers 88 : ( storage_class_specifier 89 | type_specifier 90 | type_qualifier 91 )+ 92 ; 93 94init_declarator_list 95 : init_declarator (',' init_declarator)* 96 ; 97 98init_declarator 99 : declarator //('=' initializer)? 100 ; 101 102storage_class_specifier 103 : 'extern' 104 | 'static' 105 | 'auto' 106 | 'register' 107 ; 108 109type_specifier 110 : 'void' 111 | 'char' 112 | 'short' 113 | 'int' 114 | 'long' 115 | 'float' 116 | 'double' 117 | 'signed' 118 | 'unsigned' 119// | struct_or_union_specifier 120// | enum_specifier 121 | type_id 122 ; 123 124type_id 125 : {self.isTypeName(self.input.LT(1).getText())}? IDENTIFIER 126// {System.out.println($IDENTIFIER.text+" is a type");} 127 ; 128 129// struct_or_union_specifier 130// options {k=3;} 131// scope Symbols; // structs are scopes 132// @init { 133// $Symbols::types = set() 134// } 135// : struct_or_union IDENTIFIER? '{' struct_declaration_list '}' 136// | struct_or_union IDENTIFIER 137// ; 138 139// struct_or_union 140// : 'struct' 141// | 'union' 142// ; 143 144// struct_declaration_list 145// : struct_declaration+ 146// ; 147 148// struct_declaration 149// : specifier_qualifier_list struct_declarator_list ';' 150// ; 151 152// specifier_qualifier_list 153// : ( type_qualifier | type_specifier )+ 154// ; 155 156// struct_declarator_list 157// : struct_declarator (',' struct_declarator)* 158// ; 159 160// struct_declarator 161// : declarator (':' constant_expression)? 162// | ':' constant_expression 163// ; 164 165// enum_specifier 166// options {k=3;} 167// : 'enum' '{' enumerator_list '}' 168// | 'enum' IDENTIFIER '{' enumerator_list '}' 169// | 'enum' IDENTIFIER 170// ; 171 172// enumerator_list 173// : enumerator (',' enumerator)* 174// ; 175 176// enumerator 177// : IDENTIFIER ('=' constant_expression)? 178// ; 179 180type_qualifier 181 : 'const' 182 | 'volatile' 183 ; 184 185declarator 186 : pointer? direct_declarator 187 | pointer 188 ; 189 190direct_declarator 191 : ( IDENTIFIER 192 { 193 if $declaration and $declaration::isTypedef: 194 $Symbols::types.add($IDENTIFIER.text) 195 print("define type "+$IDENTIFIER.text) 196 } 197 | '(' declarator ')' 198 ) 199 declarator_suffix* 200 ; 201 202declarator_suffix 203 : /*'[' constant_expression ']' 204 |*/ '[' ']' 205// | '(' parameter_type_list ')' 206// | '(' identifier_list ')' 207 | '(' ')' 208 ; 209 210pointer 211 : '*' type_qualifier+ pointer? 212 | '*' pointer 213 | '*' 214 ; 215 216// parameter_type_list 217// : parameter_list (',' '...')? 218// ; 219 220// parameter_list 221// : parameter_declaration (',' parameter_declaration)* 222// ; 223 224// parameter_declaration 225// : declaration_specifiers (declarator|abstract_declarator)* 226// ; 227 228// identifier_list 229// : IDENTIFIER (',' IDENTIFIER)* 230// ; 231 232// type_name 233// : specifier_qualifier_list abstract_declarator? 234// ; 235 236// abstract_declarator 237// : pointer direct_abstract_declarator? 238// | direct_abstract_declarator 239// ; 240 241// direct_abstract_declarator 242// : ( '(' abstract_declarator ')' | abstract_declarator_suffix ) abstract_declarator_suffix* 243// ; 244 245// abstract_declarator_suffix 246// : '[' ']' 247// | '[' constant_expression ']' 248// | '(' ')' 249// | '(' parameter_type_list ')' 250// ; 251 252// initializer 253// : assignment_expression 254// | '{' initializer_list ','? '}' 255// ; 256 257// initializer_list 258// : initializer (',' initializer)* 259// ; 260 261// // E x p r e s s i o n s 262 263// argument_expression_list 264// : assignment_expression (',' assignment_expression)* 265// ; 266 267// additive_expression 268// : (multiplicative_expression) ('+' multiplicative_expression | '-' multiplicative_expression)* 269// ; 270 271// multiplicative_expression 272// : (cast_expression) ('*' cast_expression | '/' cast_expression | '%' cast_expression)* 273// ; 274 275// cast_expression 276// : '(' type_name ')' cast_expression 277// | unary_expression 278// ; 279 280// unary_expression 281// : postfix_expression 282// | '++' unary_expression 283// | '--' unary_expression 284// | unary_operator cast_expression 285// | 'sizeof' unary_expression 286// | 'sizeof' '(' type_name ')' 287// ; 288 289// postfix_expression 290// : primary_expression 291// ( '[' expression ']' 292// | '(' ')' 293// | '(' argument_expression_list ')' 294// | '.' IDENTIFIER 295// | '*' IDENTIFIER 296// | '->' IDENTIFIER 297// | '++' 298// | '--' 299// )* 300// ; 301 302// unary_operator 303// : '&' 304// | '*' 305// | '+' 306// | '-' 307// | '~' 308// | '!' 309// ; 310 311// primary_expression 312// : IDENTIFIER 313// | constant 314// | '(' expression ')' 315// ; 316 317// constant 318// : HEX_LITERAL 319// | OCTAL_LITERAL 320// | DECIMAL_LITERAL 321// | CHARACTER_LITERAL 322// | STRING_LITERAL 323// | FLOATING_POINT_LITERAL 324// ; 325 326// ///// 327 328// expression 329// : assignment_expression (',' assignment_expression)* 330// ; 331 332// constant_expression 333// : conditional_expression 334// ; 335 336// assignment_expression 337// : lvalue assignment_operator assignment_expression 338// | conditional_expression 339// ; 340 341// lvalue 342// : unary_expression 343// ; 344 345// assignment_operator 346// : '=' 347// | '*=' 348// | '/=' 349// | '%=' 350// | '+=' 351// | '-=' 352// | '<<=' 353// | '>>=' 354// | '&=' 355// | '^=' 356// | '|=' 357// ; 358 359// conditional_expression 360// : logical_or_expression ('?' expression ':' conditional_expression)? 361// ; 362 363// logical_or_expression 364// : logical_and_expression ('||' logical_and_expression)* 365// ; 366 367// logical_and_expression 368// : inclusive_or_expression ('&&' inclusive_or_expression)* 369// ; 370 371// inclusive_or_expression 372// : exclusive_or_expression ('|' exclusive_or_expression)* 373// ; 374 375// exclusive_or_expression 376// : and_expression ('^' and_expression)* 377// ; 378 379// and_expression 380// : equality_expression ('&' equality_expression)* 381// ; 382// equality_expression 383// : relational_expression (('=='|'!=') relational_expression)* 384// ; 385 386// relational_expression 387// : shift_expression (('<'|'>'|'<='|'>=') shift_expression)* 388// ; 389 390// shift_expression 391// : additive_expression (('<<'|'>>') additive_expression)* 392// ; 393 394// // S t a t e m e n t s 395 396// statement 397// : labeled_statement 398// | compound_statement 399// | expression_statement 400// | selection_statement 401// | iteration_statement 402// | jump_statement 403// ; 404 405// labeled_statement 406// : IDENTIFIER ':' statement 407// | 'case' constant_expression ':' statement 408// | 'default' ':' statement 409// ; 410 411// compound_statement 412// scope Symbols; // blocks have a scope of symbols 413// @init { 414// $Symbols::types = {} 415// } 416// : '{' declaration* statement_list? '}' 417// ; 418 419// statement_list 420// : statement+ 421// ; 422 423// expression_statement 424// : ';' 425// | expression ';' 426// ; 427 428// selection_statement 429// : 'if' '(' expression ')' statement (options {k=1; backtrack=false;}:'else' statement)? 430// | 'switch' '(' expression ')' statement 431// ; 432 433// iteration_statement 434// : 'while' '(' expression ')' statement 435// | 'do' statement 'while' '(' expression ')' ';' 436// | 'for' '(' expression_statement expression_statement expression? ')' statement 437// ; 438 439// jump_statement 440// : 'goto' IDENTIFIER ';' 441// | 'continue' ';' 442// | 'break' ';' 443// | 'return' ';' 444// | 'return' expression ';' 445// ; 446 447IDENTIFIER 448 : LETTER (LETTER|'0'..'9')* 449 ; 450 451fragment 452LETTER 453 : '$' 454 | 'A'..'Z' 455 | 'a'..'z' 456 | '_' 457 ; 458 459CHARACTER_LITERAL 460 : '\'' ( EscapeSequence | ~('\''|'\\') ) '\'' 461 ; 462 463STRING_LITERAL 464 : '"' ( EscapeSequence | ~('\\'|'"') )* '"' 465 ; 466 467HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ; 468 469DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ; 470 471OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ; 472 473fragment 474HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ; 475 476fragment 477IntegerTypeSuffix 478 : ('u'|'U')? ('l'|'L') 479 | ('u'|'U') ('l'|'L')? 480 ; 481 482FLOATING_POINT_LITERAL 483 : ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix? 484 | '.' ('0'..'9')+ Exponent? FloatTypeSuffix? 485 | ('0'..'9')+ Exponent FloatTypeSuffix? 486 | ('0'..'9')+ Exponent? FloatTypeSuffix 487 ; 488 489fragment 490Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ; 491 492fragment 493FloatTypeSuffix : ('f'|'F'|'d'|'D') ; 494 495fragment 496EscapeSequence 497 : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') 498 | OctalEscape 499 ; 500 501fragment 502OctalEscape 503 : '\\' ('0'..'3') ('0'..'7') ('0'..'7') 504 | '\\' ('0'..'7') ('0'..'7') 505 | '\\' ('0'..'7') 506 ; 507 508fragment 509UnicodeEscape 510 : '\\' 'u' HexDigit HexDigit HexDigit HexDigit 511 ; 512 513WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;} 514 ; 515 516COMMENT 517 : '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;} 518 ; 519 520LINE_COMMENT 521 : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} 522 ; 523 524// ignore #line info for now 525LINE_COMMAND 526 : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} 527 ; 528 529