1# Grammar for Python
2
3# NOTE WELL: You should also follow all the steps listed at
4# https://devguide.python.org/grammar/
5
6# Start symbols for the grammar:
7#       single_input is a single interactive statement;
8#       file_input is a module or sequence of commands read from an input file;
9#       eval_input is the input for the eval() functions.
10#       func_type_input is a PEP 484 Python 2 function type comment
11# NB: compound_stmt in single_input is followed by extra NEWLINE!
12# NB: due to the way TYPE_COMMENT is tokenized it will always be followed by a NEWLINE
13single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
14file_input: (NEWLINE | stmt)* ENDMARKER
15eval_input: testlist NEWLINE* ENDMARKER
16
17decorator: '@' namedexpr_test NEWLINE
18decorators: decorator+
19decorated: decorators (classdef | funcdef | async_funcdef)
20
21async_funcdef: ASYNC funcdef
22funcdef: 'def' NAME parameters ['->' test] ':' [TYPE_COMMENT] func_body_suite
23
24parameters: '(' [typedargslist] ')'
25
26# The following definition for typedarglist is equivalent to this set of rules:
27#
28#     arguments = argument (',' [TYPE_COMMENT] argument)*
29#     argument = tfpdef ['=' test]
30#     kwargs = '**' tfpdef [','] [TYPE_COMMENT]
31#     args = '*' [tfpdef]
32#     kwonly_kwargs = (',' [TYPE_COMMENT] argument)* (TYPE_COMMENT | [',' [TYPE_COMMENT] [kwargs]])
33#     args_kwonly_kwargs = args kwonly_kwargs | kwargs
34#     poskeyword_args_kwonly_kwargs = arguments ( TYPE_COMMENT | [',' [TYPE_COMMENT] [args_kwonly_kwargs]])
35#     typedargslist_no_posonly  = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
36#     typedarglist = (arguments ',' [TYPE_COMMENT] '/' [',' [[TYPE_COMMENT] typedargslist_no_posonly]])|(typedargslist_no_posonly)"
37#
38# It needs to be fully expanded to allow our LL(1) parser to work on it.
39
40typedargslist: (
41  (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])* ',' [TYPE_COMMENT] '/' [',' [ [TYPE_COMMENT] tfpdef ['=' test] (
42        ',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [
43        '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]])
44      | '**' tfpdef [','] [TYPE_COMMENT]]])
45  | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]])
46  | '**' tfpdef [','] [TYPE_COMMENT]]] )
47|  (tfpdef ['=' test] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] [
48   '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]])
49  | '**' tfpdef [','] [TYPE_COMMENT]]])
50  | '*' [tfpdef] (',' [TYPE_COMMENT] tfpdef ['=' test])* (TYPE_COMMENT | [',' [TYPE_COMMENT] ['**' tfpdef [','] [TYPE_COMMENT]]])
51  | '**' tfpdef [','] [TYPE_COMMENT])
52)
53tfpdef: NAME [':' test]
54
55# The following definition for varargslist is equivalent to this set of rules:
56#
57#     arguments = argument (',' argument )*
58#     argument = vfpdef ['=' test]
59#     kwargs = '**' vfpdef [',']
60#     args = '*' [vfpdef]
61#     kwonly_kwargs = (',' argument )* [',' [kwargs]]
62#     args_kwonly_kwargs = args kwonly_kwargs | kwargs
63#     poskeyword_args_kwonly_kwargs = arguments [',' [args_kwonly_kwargs]]
64#     vararglist_no_posonly = poskeyword_args_kwonly_kwargs | args_kwonly_kwargs
65#     varargslist = arguments ',' '/' [','[(vararglist_no_posonly)]] | (vararglist_no_posonly)
66#
67# It needs to be fully expanded to allow our LL(1) parser to work on it.
68
69varargslist: vfpdef ['=' test ](',' vfpdef ['=' test])* ',' '/' [',' [ (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
70        '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
71      | '**' vfpdef [',']]]
72  | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
73  | '**' vfpdef [',']) ]] | (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
74        '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
75      | '**' vfpdef [',']]]
76  | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
77  | '**' vfpdef [',']
78)
79vfpdef: NAME
80
81stmt: simple_stmt | compound_stmt
82simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
83small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
84             import_stmt | global_stmt | nonlocal_stmt | assert_stmt)
85expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
86                     [('=' (yield_expr|testlist_star_expr))+ [TYPE_COMMENT]] )
87annassign: ':' test ['=' (yield_expr|testlist_star_expr)]
88testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
89augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
90            '<<=' | '>>=' | '**=' | '//=')
91# For normal and annotated assignments, additional restrictions enforced by the interpreter
92del_stmt: 'del' exprlist
93pass_stmt: 'pass'
94flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
95break_stmt: 'break'
96continue_stmt: 'continue'
97return_stmt: 'return' [testlist_star_expr]
98yield_stmt: yield_expr
99raise_stmt: 'raise' [test ['from' test]]
100import_stmt: import_name | import_from
101import_name: 'import' dotted_as_names
102# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
103import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
104              'import' ('*' | '(' import_as_names ')' | import_as_names))
105import_as_name: NAME ['as' NAME]
106dotted_as_name: dotted_name ['as' NAME]
107import_as_names: import_as_name (',' import_as_name)* [',']
108dotted_as_names: dotted_as_name (',' dotted_as_name)*
109dotted_name: NAME ('.' NAME)*
110global_stmt: 'global' NAME (',' NAME)*
111nonlocal_stmt: 'nonlocal' NAME (',' NAME)*
112assert_stmt: 'assert' test [',' test]
113
114compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
115async_stmt: ASYNC (funcdef | with_stmt | for_stmt)
116if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
117while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite]
118for_stmt: 'for' exprlist 'in' testlist ':' [TYPE_COMMENT] suite ['else' ':' suite]
119try_stmt: ('try' ':' suite
120           ((except_clause ':' suite)+
121            ['else' ':' suite]
122            ['finally' ':' suite] |
123           'finally' ':' suite))
124with_stmt: 'with' with_item (',' with_item)*  ':' [TYPE_COMMENT] suite
125with_item: test ['as' expr]
126# NB compile.c makes sure that the default except clause is last
127except_clause: 'except' [test ['as' NAME]]
128suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
129
130namedexpr_test: test [':=' test]
131test: or_test ['if' or_test 'else' test] | lambdef
132test_nocond: or_test | lambdef_nocond
133lambdef: 'lambda' [varargslist] ':' test
134lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
135or_test: and_test ('or' and_test)*
136and_test: not_test ('and' not_test)*
137not_test: 'not' not_test | comparison
138comparison: expr (comp_op expr)*
139# <> isn't actually a valid comparison operator in Python. It's here for the
140# sake of a __future__ import described in PEP 401 (which really works :-)
141comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
142star_expr: '*' expr
143expr: xor_expr ('|' xor_expr)*
144xor_expr: and_expr ('^' and_expr)*
145and_expr: shift_expr ('&' shift_expr)*
146shift_expr: arith_expr (('<<'|'>>') arith_expr)*
147arith_expr: term (('+'|'-') term)*
148term: factor (('*'|'@'|'/'|'%'|'//') factor)*
149factor: ('+'|'-'|'~') factor | power
150power: atom_expr ['**' factor]
151atom_expr: [AWAIT] atom trailer*
152atom: ('(' [yield_expr|testlist_comp] ')' |
153       '[' [testlist_comp] ']' |
154       '{' [dictorsetmaker] '}' |
155       NAME | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False')
156testlist_comp: (namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] )
157trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
158subscriptlist: subscript (',' subscript)* [',']
159subscript: test | [test] ':' [test] [sliceop]
160sliceop: ':' [test]
161exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
162testlist: test (',' test)* [',']
163dictorsetmaker: ( ((test ':' test | '**' expr)
164                   (comp_for | (',' (test ':' test | '**' expr))* [','])) |
165                  ((test | star_expr)
166                   (comp_for | (',' (test | star_expr))* [','])) )
167
168classdef: 'class' NAME ['(' [arglist] ')'] ':' suite
169
170arglist: argument (',' argument)*  [',']
171
172# The reason that keywords are test nodes instead of NAME is that using NAME
173# results in an ambiguity. ast.c makes sure it's a NAME.
174# "test '=' test" is really "keyword '=' test", but we have no such token.
175# These need to be in a single rule to avoid grammar that is ambiguous
176# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
177# we explicitly match '*' here, too, to give it proper precedence.
178# Illegal combinations and orderings are blocked in ast.c:
179# multiple (test comp_for) arguments are blocked; keyword unpackings
180# that precede iterable unpackings are blocked; etc.
181argument: ( test [comp_for] |
182            test ':=' test |
183            test '=' test |
184            '**' test |
185            '*' test )
186
187comp_iter: comp_for | comp_if
188sync_comp_for: 'for' exprlist 'in' or_test [comp_iter]
189comp_for: [ASYNC] sync_comp_for
190comp_if: 'if' test_nocond [comp_iter]
191
192# not used in grammar, but may appear in "node" passed from Parser to Compiler
193encoding_decl: NAME
194
195yield_expr: 'yield' [yield_arg]
196yield_arg: 'from' test | testlist_star_expr
197
198# the TYPE_COMMENT in suites is only parsed for funcdefs,
199# but can't go elsewhere due to ambiguity
200func_body_suite: simple_stmt | NEWLINE [TYPE_COMMENT NEWLINE] INDENT stmt+ DEDENT
201
202func_type_input: func_type NEWLINE* ENDMARKER
203func_type: '(' [typelist] ')' '->' test
204# typelist is a modified typedargslist (see above)
205typelist: (test (',' test)* [','
206       ['*' [test] (',' test)* [',' '**' test] | '**' test]]
207     |  '*' [test] (',' test)* [',' '**' test] | '**' test)
208