1 /*
2  * This file includes functions to transform a concrete syntax tree (CST) to
3  * an abstract syntax tree (AST). The main function is PyAST_FromNode().
4  *
5  */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "node.h"
9 #include "ast.h"
10 #include "token.h"
11 #include "pythonrun.h"
12 
13 #include <assert.h>
14 #include <stdbool.h>
15 
16 static int validate_stmts(asdl_seq *);
17 static int validate_exprs(asdl_seq *, expr_context_ty, int);
18 static int validate_nonempty_seq(asdl_seq *, const char *, const char *);
19 static int validate_stmt(stmt_ty);
20 static int validate_expr(expr_ty, expr_context_ty);
21 
22 static int
validate_comprehension(asdl_seq * gens)23 validate_comprehension(asdl_seq *gens)
24 {
25     int i;
26     if (!asdl_seq_LEN(gens)) {
27         PyErr_SetString(PyExc_ValueError, "comprehension with no generators");
28         return 0;
29     }
30     for (i = 0; i < asdl_seq_LEN(gens); i++) {
31         comprehension_ty comp = asdl_seq_GET(gens, i);
32         if (!validate_expr(comp->target, Store) ||
33             !validate_expr(comp->iter, Load) ||
34             !validate_exprs(comp->ifs, Load, 0))
35             return 0;
36     }
37     return 1;
38 }
39 
40 static int
validate_slice(slice_ty slice)41 validate_slice(slice_ty slice)
42 {
43     switch (slice->kind) {
44     case Slice_kind:
45         return (!slice->v.Slice.lower || validate_expr(slice->v.Slice.lower, Load)) &&
46             (!slice->v.Slice.upper || validate_expr(slice->v.Slice.upper, Load)) &&
47             (!slice->v.Slice.step || validate_expr(slice->v.Slice.step, Load));
48     case ExtSlice_kind: {
49         int i;
50         if (!validate_nonempty_seq(slice->v.ExtSlice.dims, "dims", "ExtSlice"))
51             return 0;
52         for (i = 0; i < asdl_seq_LEN(slice->v.ExtSlice.dims); i++)
53             if (!validate_slice(asdl_seq_GET(slice->v.ExtSlice.dims, i)))
54                 return 0;
55         return 1;
56     }
57     case Index_kind:
58         return validate_expr(slice->v.Index.value, Load);
59     default:
60         PyErr_SetString(PyExc_SystemError, "unknown slice node");
61         return 0;
62     }
63 }
64 
65 static int
validate_keywords(asdl_seq * keywords)66 validate_keywords(asdl_seq *keywords)
67 {
68     int i;
69     for (i = 0; i < asdl_seq_LEN(keywords); i++)
70         if (!validate_expr(((keyword_ty)asdl_seq_GET(keywords, i))->value, Load))
71             return 0;
72     return 1;
73 }
74 
75 static int
validate_args(asdl_seq * args)76 validate_args(asdl_seq *args)
77 {
78     int i;
79     for (i = 0; i < asdl_seq_LEN(args); i++) {
80         arg_ty arg = asdl_seq_GET(args, i);
81         if (arg->annotation && !validate_expr(arg->annotation, Load))
82             return 0;
83     }
84     return 1;
85 }
86 
87 static const char *
expr_context_name(expr_context_ty ctx)88 expr_context_name(expr_context_ty ctx)
89 {
90     switch (ctx) {
91     case Load:
92         return "Load";
93     case Store:
94         return "Store";
95     case Del:
96         return "Del";
97     case AugLoad:
98         return "AugLoad";
99     case AugStore:
100         return "AugStore";
101     case Param:
102         return "Param";
103     default:
104         Py_UNREACHABLE();
105     }
106 }
107 
108 static int
validate_arguments(arguments_ty args)109 validate_arguments(arguments_ty args)
110 {
111     if (!validate_args(args->args))
112         return 0;
113     if (args->vararg && args->vararg->annotation
114         && !validate_expr(args->vararg->annotation, Load)) {
115             return 0;
116     }
117     if (!validate_args(args->kwonlyargs))
118         return 0;
119     if (args->kwarg && args->kwarg->annotation
120         && !validate_expr(args->kwarg->annotation, Load)) {
121             return 0;
122     }
123     if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->args)) {
124         PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments");
125         return 0;
126     }
127     if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) {
128         PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as "
129                         "kw_defaults on arguments");
130         return 0;
131     }
132     return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1);
133 }
134 
135 static int
validate_constant(PyObject * value)136 validate_constant(PyObject *value)
137 {
138     if (value == Py_None || value == Py_Ellipsis)
139         return 1;
140 
141     if (PyLong_CheckExact(value)
142             || PyFloat_CheckExact(value)
143             || PyComplex_CheckExact(value)
144             || PyBool_Check(value)
145             || PyUnicode_CheckExact(value)
146             || PyBytes_CheckExact(value))
147         return 1;
148 
149     if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) {
150         PyObject *it;
151 
152         it = PyObject_GetIter(value);
153         if (it == NULL)
154             return 0;
155 
156         while (1) {
157             PyObject *item = PyIter_Next(it);
158             if (item == NULL) {
159                 if (PyErr_Occurred()) {
160                     Py_DECREF(it);
161                     return 0;
162                 }
163                 break;
164             }
165 
166             if (!validate_constant(item)) {
167                 Py_DECREF(it);
168                 Py_DECREF(item);
169                 return 0;
170             }
171             Py_DECREF(item);
172         }
173 
174         Py_DECREF(it);
175         return 1;
176     }
177 
178     return 0;
179 }
180 
181 static int
validate_expr(expr_ty exp,expr_context_ty ctx)182 validate_expr(expr_ty exp, expr_context_ty ctx)
183 {
184     int check_ctx = 1;
185     expr_context_ty actual_ctx;
186 
187     /* First check expression context. */
188     switch (exp->kind) {
189     case Attribute_kind:
190         actual_ctx = exp->v.Attribute.ctx;
191         break;
192     case Subscript_kind:
193         actual_ctx = exp->v.Subscript.ctx;
194         break;
195     case Starred_kind:
196         actual_ctx = exp->v.Starred.ctx;
197         break;
198     case Name_kind:
199         actual_ctx = exp->v.Name.ctx;
200         break;
201     case List_kind:
202         actual_ctx = exp->v.List.ctx;
203         break;
204     case Tuple_kind:
205         actual_ctx = exp->v.Tuple.ctx;
206         break;
207     default:
208         if (ctx != Load) {
209             PyErr_Format(PyExc_ValueError, "expression which can't be "
210                          "assigned to in %s context", expr_context_name(ctx));
211             return 0;
212         }
213         check_ctx = 0;
214         /* set actual_ctx to prevent gcc warning */
215         actual_ctx = 0;
216     }
217     if (check_ctx && actual_ctx != ctx) {
218         PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead",
219                      expr_context_name(ctx), expr_context_name(actual_ctx));
220         return 0;
221     }
222 
223     /* Now validate expression. */
224     switch (exp->kind) {
225     case BoolOp_kind:
226         if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) {
227             PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values");
228             return 0;
229         }
230         return validate_exprs(exp->v.BoolOp.values, Load, 0);
231     case BinOp_kind:
232         return validate_expr(exp->v.BinOp.left, Load) &&
233             validate_expr(exp->v.BinOp.right, Load);
234     case UnaryOp_kind:
235         return validate_expr(exp->v.UnaryOp.operand, Load);
236     case Lambda_kind:
237         return validate_arguments(exp->v.Lambda.args) &&
238             validate_expr(exp->v.Lambda.body, Load);
239     case IfExp_kind:
240         return validate_expr(exp->v.IfExp.test, Load) &&
241             validate_expr(exp->v.IfExp.body, Load) &&
242             validate_expr(exp->v.IfExp.orelse, Load);
243     case Dict_kind:
244         if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) {
245             PyErr_SetString(PyExc_ValueError,
246                             "Dict doesn't have the same number of keys as values");
247             return 0;
248         }
249         /* null_ok=1 for keys expressions to allow dict unpacking to work in
250            dict literals, i.e. ``{**{a:b}}`` */
251         return validate_exprs(exp->v.Dict.keys, Load, /*null_ok=*/ 1) &&
252             validate_exprs(exp->v.Dict.values, Load, /*null_ok=*/ 0);
253     case Set_kind:
254         return validate_exprs(exp->v.Set.elts, Load, 0);
255 #define COMP(NAME) \
256         case NAME ## _kind: \
257             return validate_comprehension(exp->v.NAME.generators) && \
258                 validate_expr(exp->v.NAME.elt, Load);
259     COMP(ListComp)
260     COMP(SetComp)
261     COMP(GeneratorExp)
262 #undef COMP
263     case DictComp_kind:
264         return validate_comprehension(exp->v.DictComp.generators) &&
265             validate_expr(exp->v.DictComp.key, Load) &&
266             validate_expr(exp->v.DictComp.value, Load);
267     case Yield_kind:
268         return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load);
269     case YieldFrom_kind:
270         return validate_expr(exp->v.YieldFrom.value, Load);
271     case Await_kind:
272         return validate_expr(exp->v.Await.value, Load);
273     case Compare_kind:
274         if (!asdl_seq_LEN(exp->v.Compare.comparators)) {
275             PyErr_SetString(PyExc_ValueError, "Compare with no comparators");
276             return 0;
277         }
278         if (asdl_seq_LEN(exp->v.Compare.comparators) !=
279             asdl_seq_LEN(exp->v.Compare.ops)) {
280             PyErr_SetString(PyExc_ValueError, "Compare has a different number "
281                             "of comparators and operands");
282             return 0;
283         }
284         return validate_exprs(exp->v.Compare.comparators, Load, 0) &&
285             validate_expr(exp->v.Compare.left, Load);
286     case Call_kind:
287         return validate_expr(exp->v.Call.func, Load) &&
288             validate_exprs(exp->v.Call.args, Load, 0) &&
289             validate_keywords(exp->v.Call.keywords);
290     case Constant_kind:
291         if (!validate_constant(exp->v.Constant.value)) {
292             PyErr_Format(PyExc_TypeError,
293                          "got an invalid type in Constant: %s",
294                          Py_TYPE(exp->v.Constant.value)->tp_name);
295             return 0;
296         }
297         return 1;
298     case Num_kind: {
299         PyObject *n = exp->v.Num.n;
300         if (!PyLong_CheckExact(n) && !PyFloat_CheckExact(n) &&
301             !PyComplex_CheckExact(n)) {
302             PyErr_SetString(PyExc_TypeError, "non-numeric type in Num");
303             return 0;
304         }
305         return 1;
306     }
307     case Str_kind: {
308         PyObject *s = exp->v.Str.s;
309         if (!PyUnicode_CheckExact(s)) {
310             PyErr_SetString(PyExc_TypeError, "non-string type in Str");
311             return 0;
312         }
313         return 1;
314     }
315     case JoinedStr_kind:
316         return validate_exprs(exp->v.JoinedStr.values, Load, 0);
317     case FormattedValue_kind:
318         if (validate_expr(exp->v.FormattedValue.value, Load) == 0)
319             return 0;
320         if (exp->v.FormattedValue.format_spec)
321             return validate_expr(exp->v.FormattedValue.format_spec, Load);
322         return 1;
323     case Bytes_kind: {
324         PyObject *b = exp->v.Bytes.s;
325         if (!PyBytes_CheckExact(b)) {
326             PyErr_SetString(PyExc_TypeError, "non-bytes type in Bytes");
327             return 0;
328         }
329         return 1;
330     }
331     case Attribute_kind:
332         return validate_expr(exp->v.Attribute.value, Load);
333     case Subscript_kind:
334         return validate_slice(exp->v.Subscript.slice) &&
335             validate_expr(exp->v.Subscript.value, Load);
336     case Starred_kind:
337         return validate_expr(exp->v.Starred.value, ctx);
338     case List_kind:
339         return validate_exprs(exp->v.List.elts, ctx, 0);
340     case Tuple_kind:
341         return validate_exprs(exp->v.Tuple.elts, ctx, 0);
342     /* These last cases don't have any checking. */
343     case Name_kind:
344     case NameConstant_kind:
345     case Ellipsis_kind:
346         return 1;
347     default:
348         PyErr_SetString(PyExc_SystemError, "unexpected expression");
349         return 0;
350     }
351 }
352 
353 static int
validate_nonempty_seq(asdl_seq * seq,const char * what,const char * owner)354 validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner)
355 {
356     if (asdl_seq_LEN(seq))
357         return 1;
358     PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner);
359     return 0;
360 }
361 
362 static int
validate_assignlist(asdl_seq * targets,expr_context_ty ctx)363 validate_assignlist(asdl_seq *targets, expr_context_ty ctx)
364 {
365     return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") &&
366         validate_exprs(targets, ctx, 0);
367 }
368 
369 static int
validate_body(asdl_seq * body,const char * owner)370 validate_body(asdl_seq *body, const char *owner)
371 {
372     return validate_nonempty_seq(body, "body", owner) && validate_stmts(body);
373 }
374 
375 static int
validate_stmt(stmt_ty stmt)376 validate_stmt(stmt_ty stmt)
377 {
378     int i;
379     switch (stmt->kind) {
380     case FunctionDef_kind:
381         return validate_body(stmt->v.FunctionDef.body, "FunctionDef") &&
382             validate_arguments(stmt->v.FunctionDef.args) &&
383             validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) &&
384             (!stmt->v.FunctionDef.returns ||
385              validate_expr(stmt->v.FunctionDef.returns, Load));
386     case ClassDef_kind:
387         return validate_body(stmt->v.ClassDef.body, "ClassDef") &&
388             validate_exprs(stmt->v.ClassDef.bases, Load, 0) &&
389             validate_keywords(stmt->v.ClassDef.keywords) &&
390             validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0);
391     case Return_kind:
392         return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load);
393     case Delete_kind:
394         return validate_assignlist(stmt->v.Delete.targets, Del);
395     case Assign_kind:
396         return validate_assignlist(stmt->v.Assign.targets, Store) &&
397             validate_expr(stmt->v.Assign.value, Load);
398     case AugAssign_kind:
399         return validate_expr(stmt->v.AugAssign.target, Store) &&
400             validate_expr(stmt->v.AugAssign.value, Load);
401     case AnnAssign_kind:
402         if (stmt->v.AnnAssign.target->kind != Name_kind &&
403             stmt->v.AnnAssign.simple) {
404             PyErr_SetString(PyExc_TypeError,
405                             "AnnAssign with simple non-Name target");
406             return 0;
407         }
408         return validate_expr(stmt->v.AnnAssign.target, Store) &&
409                (!stmt->v.AnnAssign.value ||
410                 validate_expr(stmt->v.AnnAssign.value, Load)) &&
411                validate_expr(stmt->v.AnnAssign.annotation, Load);
412     case For_kind:
413         return validate_expr(stmt->v.For.target, Store) &&
414             validate_expr(stmt->v.For.iter, Load) &&
415             validate_body(stmt->v.For.body, "For") &&
416             validate_stmts(stmt->v.For.orelse);
417     case AsyncFor_kind:
418         return validate_expr(stmt->v.AsyncFor.target, Store) &&
419             validate_expr(stmt->v.AsyncFor.iter, Load) &&
420             validate_body(stmt->v.AsyncFor.body, "AsyncFor") &&
421             validate_stmts(stmt->v.AsyncFor.orelse);
422     case While_kind:
423         return validate_expr(stmt->v.While.test, Load) &&
424             validate_body(stmt->v.While.body, "While") &&
425             validate_stmts(stmt->v.While.orelse);
426     case If_kind:
427         return validate_expr(stmt->v.If.test, Load) &&
428             validate_body(stmt->v.If.body, "If") &&
429             validate_stmts(stmt->v.If.orelse);
430     case With_kind:
431         if (!validate_nonempty_seq(stmt->v.With.items, "items", "With"))
432             return 0;
433         for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) {
434             withitem_ty item = asdl_seq_GET(stmt->v.With.items, i);
435             if (!validate_expr(item->context_expr, Load) ||
436                 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
437                 return 0;
438         }
439         return validate_body(stmt->v.With.body, "With");
440     case AsyncWith_kind:
441         if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith"))
442             return 0;
443         for (i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) {
444             withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i);
445             if (!validate_expr(item->context_expr, Load) ||
446                 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
447                 return 0;
448         }
449         return validate_body(stmt->v.AsyncWith.body, "AsyncWith");
450     case Raise_kind:
451         if (stmt->v.Raise.exc) {
452             return validate_expr(stmt->v.Raise.exc, Load) &&
453                 (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load));
454         }
455         if (stmt->v.Raise.cause) {
456             PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception");
457             return 0;
458         }
459         return 1;
460     case Try_kind:
461         if (!validate_body(stmt->v.Try.body, "Try"))
462             return 0;
463         if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
464             !asdl_seq_LEN(stmt->v.Try.finalbody)) {
465             PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody");
466             return 0;
467         }
468         if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
469             asdl_seq_LEN(stmt->v.Try.orelse)) {
470             PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers");
471             return 0;
472         }
473         for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) {
474             excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i);
475             if ((handler->v.ExceptHandler.type &&
476                  !validate_expr(handler->v.ExceptHandler.type, Load)) ||
477                 !validate_body(handler->v.ExceptHandler.body, "ExceptHandler"))
478                 return 0;
479         }
480         return (!asdl_seq_LEN(stmt->v.Try.finalbody) ||
481                 validate_stmts(stmt->v.Try.finalbody)) &&
482             (!asdl_seq_LEN(stmt->v.Try.orelse) ||
483              validate_stmts(stmt->v.Try.orelse));
484     case Assert_kind:
485         return validate_expr(stmt->v.Assert.test, Load) &&
486             (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load));
487     case Import_kind:
488         return validate_nonempty_seq(stmt->v.Import.names, "names", "Import");
489     case ImportFrom_kind:
490         if (stmt->v.ImportFrom.level < 0) {
491             PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level");
492             return 0;
493         }
494         return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom");
495     case Global_kind:
496         return validate_nonempty_seq(stmt->v.Global.names, "names", "Global");
497     case Nonlocal_kind:
498         return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal");
499     case Expr_kind:
500         return validate_expr(stmt->v.Expr.value, Load);
501     case AsyncFunctionDef_kind:
502         return validate_body(stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") &&
503             validate_arguments(stmt->v.AsyncFunctionDef.args) &&
504             validate_exprs(stmt->v.AsyncFunctionDef.decorator_list, Load, 0) &&
505             (!stmt->v.AsyncFunctionDef.returns ||
506              validate_expr(stmt->v.AsyncFunctionDef.returns, Load));
507     case Pass_kind:
508     case Break_kind:
509     case Continue_kind:
510         return 1;
511     default:
512         PyErr_SetString(PyExc_SystemError, "unexpected statement");
513         return 0;
514     }
515 }
516 
517 static int
validate_stmts(asdl_seq * seq)518 validate_stmts(asdl_seq *seq)
519 {
520     int i;
521     for (i = 0; i < asdl_seq_LEN(seq); i++) {
522         stmt_ty stmt = asdl_seq_GET(seq, i);
523         if (stmt) {
524             if (!validate_stmt(stmt))
525                 return 0;
526         }
527         else {
528             PyErr_SetString(PyExc_ValueError,
529                             "None disallowed in statement list");
530             return 0;
531         }
532     }
533     return 1;
534 }
535 
536 static int
validate_exprs(asdl_seq * exprs,expr_context_ty ctx,int null_ok)537 validate_exprs(asdl_seq *exprs, expr_context_ty ctx, int null_ok)
538 {
539     int i;
540     for (i = 0; i < asdl_seq_LEN(exprs); i++) {
541         expr_ty expr = asdl_seq_GET(exprs, i);
542         if (expr) {
543             if (!validate_expr(expr, ctx))
544                 return 0;
545         }
546         else if (!null_ok) {
547             PyErr_SetString(PyExc_ValueError,
548                             "None disallowed in expression list");
549             return 0;
550         }
551 
552     }
553     return 1;
554 }
555 
556 int
PyAST_Validate(mod_ty mod)557 PyAST_Validate(mod_ty mod)
558 {
559     int res = 0;
560 
561     switch (mod->kind) {
562     case Module_kind:
563         res = validate_stmts(mod->v.Module.body);
564         break;
565     case Interactive_kind:
566         res = validate_stmts(mod->v.Interactive.body);
567         break;
568     case Expression_kind:
569         res = validate_expr(mod->v.Expression.body, Load);
570         break;
571     case Suite_kind:
572         PyErr_SetString(PyExc_ValueError, "Suite is not valid in the CPython compiler");
573         break;
574     default:
575         PyErr_SetString(PyExc_SystemError, "impossible module node");
576         res = 0;
577         break;
578     }
579     return res;
580 }
581 
582 /* This is done here, so defines like "test" don't interfere with AST use above. */
583 #include "grammar.h"
584 #include "parsetok.h"
585 #include "graminit.h"
586 
587 /* Data structure used internally */
588 struct compiling {
589     PyArena *c_arena; /* Arena for allocating memory. */
590     PyObject *c_filename; /* filename */
591     PyObject *c_normalize; /* Normalization function from unicodedata. */
592 };
593 
594 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
595 static expr_ty ast_for_expr(struct compiling *, const node *);
596 static stmt_ty ast_for_stmt(struct compiling *, const node *);
597 static asdl_seq *ast_for_suite(struct compiling *c, const node *n);
598 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
599                                   expr_context_ty);
600 static expr_ty ast_for_testlist(struct compiling *, const node *);
601 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
602 
603 static stmt_ty ast_for_with_stmt(struct compiling *, const node *, bool);
604 static stmt_ty ast_for_for_stmt(struct compiling *, const node *, bool);
605 
606 /* Note different signature for ast_for_call */
607 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty, bool);
608 
609 static PyObject *parsenumber(struct compiling *, const char *);
610 static expr_ty parsestrplus(struct compiling *, const node *n);
611 
612 #define COMP_GENEXP   0
613 #define COMP_LISTCOMP 1
614 #define COMP_SETCOMP  2
615 
616 static int
init_normalization(struct compiling * c)617 init_normalization(struct compiling *c)
618 {
619     PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
620     if (!m)
621         return 0;
622     c->c_normalize = PyObject_GetAttrString(m, "normalize");
623     Py_DECREF(m);
624     if (!c->c_normalize)
625         return 0;
626     return 1;
627 }
628 
629 static identifier
new_identifier(const char * n,struct compiling * c)630 new_identifier(const char *n, struct compiling *c)
631 {
632     PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
633     if (!id)
634         return NULL;
635     /* PyUnicode_DecodeUTF8 should always return a ready string. */
636     assert(PyUnicode_IS_READY(id));
637     /* Check whether there are non-ASCII characters in the
638        identifier; if so, normalize to NFKC. */
639     if (!PyUnicode_IS_ASCII(id)) {
640         PyObject *id2;
641         _Py_IDENTIFIER(NFKC);
642         if (!c->c_normalize && !init_normalization(c)) {
643             Py_DECREF(id);
644             return NULL;
645         }
646         PyObject *form = _PyUnicode_FromId(&PyId_NFKC);
647         if (form == NULL) {
648             Py_DECREF(id);
649             return NULL;
650         }
651         PyObject *args[2] = {form, id};
652         id2 = _PyObject_FastCall(c->c_normalize, args, 2);
653         Py_DECREF(id);
654         if (!id2)
655             return NULL;
656         if (!PyUnicode_Check(id2)) {
657             PyErr_Format(PyExc_TypeError,
658                          "unicodedata.normalize() must return a string, not "
659                          "%.200s",
660                          Py_TYPE(id2)->tp_name);
661             Py_DECREF(id2);
662             return NULL;
663         }
664         id = id2;
665     }
666     PyUnicode_InternInPlace(&id);
667     if (PyArena_AddPyObject(c->c_arena, id) < 0) {
668         Py_DECREF(id);
669         return NULL;
670     }
671     return id;
672 }
673 
674 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c)
675 
676 static int
ast_error(struct compiling * c,const node * n,const char * errmsg)677 ast_error(struct compiling *c, const node *n, const char *errmsg)
678 {
679     PyObject *value, *errstr, *loc, *tmp;
680 
681     loc = PyErr_ProgramTextObject(c->c_filename, LINENO(n));
682     if (!loc) {
683         Py_INCREF(Py_None);
684         loc = Py_None;
685     }
686     tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset, loc);
687     if (!tmp)
688         return 0;
689     errstr = PyUnicode_FromString(errmsg);
690     if (!errstr) {
691         Py_DECREF(tmp);
692         return 0;
693     }
694     value = PyTuple_Pack(2, errstr, tmp);
695     Py_DECREF(errstr);
696     Py_DECREF(tmp);
697     if (value) {
698         PyErr_SetObject(PyExc_SyntaxError, value);
699         Py_DECREF(value);
700     }
701     return 0;
702 }
703 
704 /* num_stmts() returns number of contained statements.
705 
706    Use this routine to determine how big a sequence is needed for
707    the statements in a parse tree.  Its raison d'etre is this bit of
708    grammar:
709 
710    stmt: simple_stmt | compound_stmt
711    simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
712 
713    A simple_stmt can contain multiple small_stmt elements joined
714    by semicolons.  If the arg is a simple_stmt, the number of
715    small_stmt elements is returned.
716 */
717 
718 static int
num_stmts(const node * n)719 num_stmts(const node *n)
720 {
721     int i, l;
722     node *ch;
723 
724     switch (TYPE(n)) {
725         case single_input:
726             if (TYPE(CHILD(n, 0)) == NEWLINE)
727                 return 0;
728             else
729                 return num_stmts(CHILD(n, 0));
730         case file_input:
731             l = 0;
732             for (i = 0; i < NCH(n); i++) {
733                 ch = CHILD(n, i);
734                 if (TYPE(ch) == stmt)
735                     l += num_stmts(ch);
736             }
737             return l;
738         case stmt:
739             return num_stmts(CHILD(n, 0));
740         case compound_stmt:
741             return 1;
742         case simple_stmt:
743             return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
744         case suite:
745             if (NCH(n) == 1)
746                 return num_stmts(CHILD(n, 0));
747             else {
748                 l = 0;
749                 for (i = 2; i < (NCH(n) - 1); i++)
750                     l += num_stmts(CHILD(n, i));
751                 return l;
752             }
753         default: {
754             char buf[128];
755 
756             sprintf(buf, "Non-statement found: %d %d",
757                     TYPE(n), NCH(n));
758             Py_FatalError(buf);
759         }
760     }
761     Py_UNREACHABLE();
762 }
763 
764 /* Transform the CST rooted at node * to the appropriate AST
765 */
766 
767 mod_ty
PyAST_FromNodeObject(const node * n,PyCompilerFlags * flags,PyObject * filename,PyArena * arena)768 PyAST_FromNodeObject(const node *n, PyCompilerFlags *flags,
769                      PyObject *filename, PyArena *arena)
770 {
771     int i, j, k, num;
772     asdl_seq *stmts = NULL;
773     stmt_ty s;
774     node *ch;
775     struct compiling c;
776     mod_ty res = NULL;
777 
778     c.c_arena = arena;
779     /* borrowed reference */
780     c.c_filename = filename;
781     c.c_normalize = NULL;
782 
783     if (TYPE(n) == encoding_decl)
784         n = CHILD(n, 0);
785 
786     k = 0;
787     switch (TYPE(n)) {
788         case file_input:
789             stmts = _Py_asdl_seq_new(num_stmts(n), arena);
790             if (!stmts)
791                 goto out;
792             for (i = 0; i < NCH(n) - 1; i++) {
793                 ch = CHILD(n, i);
794                 if (TYPE(ch) == NEWLINE)
795                     continue;
796                 REQ(ch, stmt);
797                 num = num_stmts(ch);
798                 if (num == 1) {
799                     s = ast_for_stmt(&c, ch);
800                     if (!s)
801                         goto out;
802                     asdl_seq_SET(stmts, k++, s);
803                 }
804                 else {
805                     ch = CHILD(ch, 0);
806                     REQ(ch, simple_stmt);
807                     for (j = 0; j < num; j++) {
808                         s = ast_for_stmt(&c, CHILD(ch, j * 2));
809                         if (!s)
810                             goto out;
811                         asdl_seq_SET(stmts, k++, s);
812                     }
813                 }
814             }
815             res = Module(stmts, arena);
816             break;
817         case eval_input: {
818             expr_ty testlist_ast;
819 
820             /* XXX Why not comp_for here? */
821             testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
822             if (!testlist_ast)
823                 goto out;
824             res = Expression(testlist_ast, arena);
825             break;
826         }
827         case single_input:
828             if (TYPE(CHILD(n, 0)) == NEWLINE) {
829                 stmts = _Py_asdl_seq_new(1, arena);
830                 if (!stmts)
831                     goto out;
832                 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
833                                             arena));
834                 if (!asdl_seq_GET(stmts, 0))
835                     goto out;
836                 res = Interactive(stmts, arena);
837             }
838             else {
839                 n = CHILD(n, 0);
840                 num = num_stmts(n);
841                 stmts = _Py_asdl_seq_new(num, arena);
842                 if (!stmts)
843                     goto out;
844                 if (num == 1) {
845                     s = ast_for_stmt(&c, n);
846                     if (!s)
847                         goto out;
848                     asdl_seq_SET(stmts, 0, s);
849                 }
850                 else {
851                     /* Only a simple_stmt can contain multiple statements. */
852                     REQ(n, simple_stmt);
853                     for (i = 0; i < NCH(n); i += 2) {
854                         if (TYPE(CHILD(n, i)) == NEWLINE)
855                             break;
856                         s = ast_for_stmt(&c, CHILD(n, i));
857                         if (!s)
858                             goto out;
859                         asdl_seq_SET(stmts, i / 2, s);
860                     }
861                 }
862 
863                 res = Interactive(stmts, arena);
864             }
865             break;
866         default:
867             PyErr_Format(PyExc_SystemError,
868                          "invalid node %d for PyAST_FromNode", TYPE(n));
869             goto out;
870     }
871  out:
872     if (c.c_normalize) {
873         Py_DECREF(c.c_normalize);
874     }
875     return res;
876 }
877 
878 mod_ty
PyAST_FromNode(const node * n,PyCompilerFlags * flags,const char * filename_str,PyArena * arena)879 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename_str,
880                PyArena *arena)
881 {
882     mod_ty mod;
883     PyObject *filename;
884     filename = PyUnicode_DecodeFSDefault(filename_str);
885     if (filename == NULL)
886         return NULL;
887     mod = PyAST_FromNodeObject(n, flags, filename, arena);
888     Py_DECREF(filename);
889     return mod;
890 
891 }
892 
893 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
894 */
895 
896 static operator_ty
get_operator(const node * n)897 get_operator(const node *n)
898 {
899     switch (TYPE(n)) {
900         case VBAR:
901             return BitOr;
902         case CIRCUMFLEX:
903             return BitXor;
904         case AMPER:
905             return BitAnd;
906         case LEFTSHIFT:
907             return LShift;
908         case RIGHTSHIFT:
909             return RShift;
910         case PLUS:
911             return Add;
912         case MINUS:
913             return Sub;
914         case STAR:
915             return Mult;
916         case AT:
917             return MatMult;
918         case SLASH:
919             return Div;
920         case DOUBLESLASH:
921             return FloorDiv;
922         case PERCENT:
923             return Mod;
924         default:
925             return (operator_ty)0;
926     }
927 }
928 
929 static const char * const FORBIDDEN[] = {
930     "None",
931     "True",
932     "False",
933     NULL,
934 };
935 
936 static int
forbidden_name(struct compiling * c,identifier name,const node * n,int full_checks)937 forbidden_name(struct compiling *c, identifier name, const node *n,
938                int full_checks)
939 {
940     assert(PyUnicode_Check(name));
941     if (_PyUnicode_EqualToASCIIString(name, "__debug__")) {
942         ast_error(c, n, "assignment to keyword");
943         return 1;
944     }
945     if (full_checks) {
946         const char * const *p;
947         for (p = FORBIDDEN; *p; p++) {
948             if (_PyUnicode_EqualToASCIIString(name, *p)) {
949                 ast_error(c, n, "assignment to keyword");
950                 return 1;
951             }
952         }
953     }
954     return 0;
955 }
956 
957 /* Set the context ctx for expr_ty e, recursively traversing e.
958 
959    Only sets context for expr kinds that "can appear in assignment context"
960    (according to ../Parser/Python.asdl).  For other expr kinds, it sets
961    an appropriate syntax error and returns false.
962 */
963 
964 static int
set_context(struct compiling * c,expr_ty e,expr_context_ty ctx,const node * n)965 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
966 {
967     asdl_seq *s = NULL;
968     /* If a particular expression type can't be used for assign / delete,
969        set expr_name to its name and an error message will be generated.
970     */
971     const char* expr_name = NULL;
972 
973     /* The ast defines augmented store and load contexts, but the
974        implementation here doesn't actually use them.  The code may be
975        a little more complex than necessary as a result.  It also means
976        that expressions in an augmented assignment have a Store context.
977        Consider restructuring so that augmented assignment uses
978        set_context(), too.
979     */
980     assert(ctx != AugStore && ctx != AugLoad);
981 
982     switch (e->kind) {
983         case Attribute_kind:
984             e->v.Attribute.ctx = ctx;
985             if (ctx == Store && forbidden_name(c, e->v.Attribute.attr, n, 1))
986                 return 0;
987             break;
988         case Subscript_kind:
989             e->v.Subscript.ctx = ctx;
990             break;
991         case Starred_kind:
992             e->v.Starred.ctx = ctx;
993             if (!set_context(c, e->v.Starred.value, ctx, n))
994                 return 0;
995             break;
996         case Name_kind:
997             if (ctx == Store) {
998                 if (forbidden_name(c, e->v.Name.id, n, 0))
999                     return 0; /* forbidden_name() calls ast_error() */
1000             }
1001             e->v.Name.ctx = ctx;
1002             break;
1003         case List_kind:
1004             e->v.List.ctx = ctx;
1005             s = e->v.List.elts;
1006             break;
1007         case Tuple_kind:
1008             e->v.Tuple.ctx = ctx;
1009             s = e->v.Tuple.elts;
1010             break;
1011         case Lambda_kind:
1012             expr_name = "lambda";
1013             break;
1014         case Call_kind:
1015             expr_name = "function call";
1016             break;
1017         case BoolOp_kind:
1018         case BinOp_kind:
1019         case UnaryOp_kind:
1020             expr_name = "operator";
1021             break;
1022         case GeneratorExp_kind:
1023             expr_name = "generator expression";
1024             break;
1025         case Yield_kind:
1026         case YieldFrom_kind:
1027             expr_name = "yield expression";
1028             break;
1029         case Await_kind:
1030             expr_name = "await expression";
1031             break;
1032         case ListComp_kind:
1033             expr_name = "list comprehension";
1034             break;
1035         case SetComp_kind:
1036             expr_name = "set comprehension";
1037             break;
1038         case DictComp_kind:
1039             expr_name = "dict comprehension";
1040             break;
1041         case Dict_kind:
1042         case Set_kind:
1043         case Num_kind:
1044         case Str_kind:
1045         case Bytes_kind:
1046         case JoinedStr_kind:
1047         case FormattedValue_kind:
1048             expr_name = "literal";
1049             break;
1050         case NameConstant_kind:
1051             expr_name = "keyword";
1052             break;
1053         case Ellipsis_kind:
1054             expr_name = "Ellipsis";
1055             break;
1056         case Compare_kind:
1057             expr_name = "comparison";
1058             break;
1059         case IfExp_kind:
1060             expr_name = "conditional expression";
1061             break;
1062         default:
1063             PyErr_Format(PyExc_SystemError,
1064                          "unexpected expression in assignment %d (line %d)",
1065                          e->kind, e->lineno);
1066             return 0;
1067     }
1068     /* Check for error string set by switch */
1069     if (expr_name) {
1070         char buf[300];
1071         PyOS_snprintf(buf, sizeof(buf),
1072                       "can't %s %s",
1073                       ctx == Store ? "assign to" : "delete",
1074                       expr_name);
1075         return ast_error(c, n, buf);
1076     }
1077 
1078     /* If the LHS is a list or tuple, we need to set the assignment
1079        context for all the contained elements.
1080     */
1081     if (s) {
1082         int i;
1083 
1084         for (i = 0; i < asdl_seq_LEN(s); i++) {
1085             if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
1086                 return 0;
1087         }
1088     }
1089     return 1;
1090 }
1091 
1092 static operator_ty
ast_for_augassign(struct compiling * c,const node * n)1093 ast_for_augassign(struct compiling *c, const node *n)
1094 {
1095     REQ(n, augassign);
1096     n = CHILD(n, 0);
1097     switch (STR(n)[0]) {
1098         case '+':
1099             return Add;
1100         case '-':
1101             return Sub;
1102         case '/':
1103             if (STR(n)[1] == '/')
1104                 return FloorDiv;
1105             else
1106                 return Div;
1107         case '%':
1108             return Mod;
1109         case '<':
1110             return LShift;
1111         case '>':
1112             return RShift;
1113         case '&':
1114             return BitAnd;
1115         case '^':
1116             return BitXor;
1117         case '|':
1118             return BitOr;
1119         case '*':
1120             if (STR(n)[1] == '*')
1121                 return Pow;
1122             else
1123                 return Mult;
1124         case '@':
1125             return MatMult;
1126         default:
1127             PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
1128             return (operator_ty)0;
1129     }
1130 }
1131 
1132 static cmpop_ty
ast_for_comp_op(struct compiling * c,const node * n)1133 ast_for_comp_op(struct compiling *c, const node *n)
1134 {
1135     /* comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'
1136                |'is' 'not'
1137     */
1138     REQ(n, comp_op);
1139     if (NCH(n) == 1) {
1140         n = CHILD(n, 0);
1141         switch (TYPE(n)) {
1142             case LESS:
1143                 return Lt;
1144             case GREATER:
1145                 return Gt;
1146             case EQEQUAL:                       /* == */
1147                 return Eq;
1148             case LESSEQUAL:
1149                 return LtE;
1150             case GREATEREQUAL:
1151                 return GtE;
1152             case NOTEQUAL:
1153                 return NotEq;
1154             case NAME:
1155                 if (strcmp(STR(n), "in") == 0)
1156                     return In;
1157                 if (strcmp(STR(n), "is") == 0)
1158                     return Is;
1159                 /* fall through */
1160             default:
1161                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
1162                              STR(n));
1163                 return (cmpop_ty)0;
1164         }
1165     }
1166     else if (NCH(n) == 2) {
1167         /* handle "not in" and "is not" */
1168         switch (TYPE(CHILD(n, 0))) {
1169             case NAME:
1170                 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
1171                     return NotIn;
1172                 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
1173                     return IsNot;
1174                 /* fall through */
1175             default:
1176                 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
1177                              STR(CHILD(n, 0)), STR(CHILD(n, 1)));
1178                 return (cmpop_ty)0;
1179         }
1180     }
1181     PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
1182                  NCH(n));
1183     return (cmpop_ty)0;
1184 }
1185 
1186 static asdl_seq *
seq_for_testlist(struct compiling * c,const node * n)1187 seq_for_testlist(struct compiling *c, const node *n)
1188 {
1189     /* testlist: test (',' test)* [',']
1190        testlist_star_expr: test|star_expr (',' test|star_expr)* [',']
1191     */
1192     asdl_seq *seq;
1193     expr_ty expression;
1194     int i;
1195     assert(TYPE(n) == testlist || TYPE(n) == testlist_star_expr || TYPE(n) == testlist_comp);
1196 
1197     seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1198     if (!seq)
1199         return NULL;
1200 
1201     for (i = 0; i < NCH(n); i += 2) {
1202         const node *ch = CHILD(n, i);
1203         assert(TYPE(ch) == test || TYPE(ch) == test_nocond || TYPE(ch) == star_expr);
1204 
1205         expression = ast_for_expr(c, ch);
1206         if (!expression)
1207             return NULL;
1208 
1209         assert(i / 2 < seq->size);
1210         asdl_seq_SET(seq, i / 2, expression);
1211     }
1212     return seq;
1213 }
1214 
1215 static arg_ty
ast_for_arg(struct compiling * c,const node * n)1216 ast_for_arg(struct compiling *c, const node *n)
1217 {
1218     identifier name;
1219     expr_ty annotation = NULL;
1220     node *ch;
1221     arg_ty ret;
1222 
1223     assert(TYPE(n) == tfpdef || TYPE(n) == vfpdef);
1224     ch = CHILD(n, 0);
1225     name = NEW_IDENTIFIER(ch);
1226     if (!name)
1227         return NULL;
1228     if (forbidden_name(c, name, ch, 0))
1229         return NULL;
1230 
1231     if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) {
1232         annotation = ast_for_expr(c, CHILD(n, 2));
1233         if (!annotation)
1234             return NULL;
1235     }
1236 
1237     ret = arg(name, annotation, LINENO(n), n->n_col_offset, c->c_arena);
1238     if (!ret)
1239         return NULL;
1240     return ret;
1241 }
1242 
1243 /* returns -1 if failed to handle keyword only arguments
1244    returns new position to keep processing if successful
1245                (',' tfpdef ['=' test])*
1246                      ^^^
1247    start pointing here
1248  */
1249 static int
handle_keywordonly_args(struct compiling * c,const node * n,int start,asdl_seq * kwonlyargs,asdl_seq * kwdefaults)1250 handle_keywordonly_args(struct compiling *c, const node *n, int start,
1251                         asdl_seq *kwonlyargs, asdl_seq *kwdefaults)
1252 {
1253     PyObject *argname;
1254     node *ch;
1255     expr_ty expression, annotation;
1256     arg_ty arg;
1257     int i = start;
1258     int j = 0; /* index for kwdefaults and kwonlyargs */
1259 
1260     if (kwonlyargs == NULL) {
1261         ast_error(c, CHILD(n, start), "named arguments must follow bare *");
1262         return -1;
1263     }
1264     assert(kwdefaults != NULL);
1265     while (i < NCH(n)) {
1266         ch = CHILD(n, i);
1267         switch (TYPE(ch)) {
1268             case vfpdef:
1269             case tfpdef:
1270                 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1271                     expression = ast_for_expr(c, CHILD(n, i + 2));
1272                     if (!expression)
1273                         goto error;
1274                     asdl_seq_SET(kwdefaults, j, expression);
1275                     i += 2; /* '=' and test */
1276                 }
1277                 else { /* setting NULL if no default value exists */
1278                     asdl_seq_SET(kwdefaults, j, NULL);
1279                 }
1280                 if (NCH(ch) == 3) {
1281                     /* ch is NAME ':' test */
1282                     annotation = ast_for_expr(c, CHILD(ch, 2));
1283                     if (!annotation)
1284                         goto error;
1285                 }
1286                 else {
1287                     annotation = NULL;
1288                 }
1289                 ch = CHILD(ch, 0);
1290                 argname = NEW_IDENTIFIER(ch);
1291                 if (!argname)
1292                     goto error;
1293                 if (forbidden_name(c, argname, ch, 0))
1294                     goto error;
1295                 arg = arg(argname, annotation, LINENO(ch), ch->n_col_offset,
1296                           c->c_arena);
1297                 if (!arg)
1298                     goto error;
1299                 asdl_seq_SET(kwonlyargs, j++, arg);
1300                 i += 2; /* the name and the comma */
1301                 break;
1302             case DOUBLESTAR:
1303                 return i;
1304             default:
1305                 ast_error(c, ch, "unexpected node");
1306                 goto error;
1307         }
1308     }
1309     return i;
1310  error:
1311     return -1;
1312 }
1313 
1314 /* Create AST for argument list. */
1315 
1316 static arguments_ty
ast_for_arguments(struct compiling * c,const node * n)1317 ast_for_arguments(struct compiling *c, const node *n)
1318 {
1319     /* This function handles both typedargslist (function definition)
1320        and varargslist (lambda definition).
1321 
1322        parameters: '(' [typedargslist] ')'
1323        typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
1324                '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
1325              | '**' tfpdef [',']]]
1326          | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
1327          | '**' tfpdef [','])
1328        tfpdef: NAME [':' test]
1329        varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
1330                '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
1331              | '**' vfpdef [',']]]
1332          | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
1333          | '**' vfpdef [',']
1334        )
1335        vfpdef: NAME
1336 
1337     */
1338     int i, j, k, nposargs = 0, nkwonlyargs = 0;
1339     int nposdefaults = 0, found_default = 0;
1340     asdl_seq *posargs, *posdefaults, *kwonlyargs, *kwdefaults;
1341     arg_ty vararg = NULL, kwarg = NULL;
1342     arg_ty arg;
1343     node *ch;
1344 
1345     if (TYPE(n) == parameters) {
1346         if (NCH(n) == 2) /* () as argument list */
1347             return arguments(NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1348         n = CHILD(n, 1);
1349     }
1350     assert(TYPE(n) == typedargslist || TYPE(n) == varargslist);
1351 
1352     /* First count the number of positional args & defaults.  The
1353        variable i is the loop index for this for loop and the next.
1354        The next loop picks up where the first leaves off.
1355     */
1356     for (i = 0; i < NCH(n); i++) {
1357         ch = CHILD(n, i);
1358         if (TYPE(ch) == STAR) {
1359             /* skip star */
1360             i++;
1361             if (i < NCH(n) && /* skip argument following star */
1362                 (TYPE(CHILD(n, i)) == tfpdef ||
1363                  TYPE(CHILD(n, i)) == vfpdef)) {
1364                 i++;
1365             }
1366             break;
1367         }
1368         if (TYPE(ch) == DOUBLESTAR) break;
1369         if (TYPE(ch) == vfpdef || TYPE(ch) == tfpdef) nposargs++;
1370         if (TYPE(ch) == EQUAL) nposdefaults++;
1371     }
1372     /* count the number of keyword only args &
1373        defaults for keyword only args */
1374     for ( ; i < NCH(n); ++i) {
1375         ch = CHILD(n, i);
1376         if (TYPE(ch) == DOUBLESTAR) break;
1377         if (TYPE(ch) == tfpdef || TYPE(ch) == vfpdef) nkwonlyargs++;
1378     }
1379     posargs = (nposargs ? _Py_asdl_seq_new(nposargs, c->c_arena) : NULL);
1380     if (!posargs && nposargs)
1381         return NULL;
1382     kwonlyargs = (nkwonlyargs ?
1383                    _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1384     if (!kwonlyargs && nkwonlyargs)
1385         return NULL;
1386     posdefaults = (nposdefaults ?
1387                     _Py_asdl_seq_new(nposdefaults, c->c_arena) : NULL);
1388     if (!posdefaults && nposdefaults)
1389         return NULL;
1390     /* The length of kwonlyargs and kwdefaults are same
1391        since we set NULL as default for keyword only argument w/o default
1392        - we have sequence data structure, but no dictionary */
1393     kwdefaults = (nkwonlyargs ?
1394                    _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1395     if (!kwdefaults && nkwonlyargs)
1396         return NULL;
1397 
1398     /* tfpdef: NAME [':' test]
1399        vfpdef: NAME
1400     */
1401     i = 0;
1402     j = 0;  /* index for defaults */
1403     k = 0;  /* index for args */
1404     while (i < NCH(n)) {
1405         ch = CHILD(n, i);
1406         switch (TYPE(ch)) {
1407             case tfpdef:
1408             case vfpdef:
1409                 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
1410                    anything other than EQUAL or a comma? */
1411                 /* XXX Should NCH(n) check be made a separate check? */
1412                 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1413                     expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
1414                     if (!expression)
1415                         return NULL;
1416                     assert(posdefaults != NULL);
1417                     asdl_seq_SET(posdefaults, j++, expression);
1418                     i += 2;
1419                     found_default = 1;
1420                 }
1421                 else if (found_default) {
1422                     ast_error(c, n,
1423                              "non-default argument follows default argument");
1424                     return NULL;
1425                 }
1426                 arg = ast_for_arg(c, ch);
1427                 if (!arg)
1428                     return NULL;
1429                 asdl_seq_SET(posargs, k++, arg);
1430                 i += 2; /* the name and the comma */
1431                 break;
1432             case STAR:
1433                 if (i+1 >= NCH(n) ||
1434                     (i+2 == NCH(n) && TYPE(CHILD(n, i+1)) == COMMA)) {
1435                     ast_error(c, CHILD(n, i),
1436                         "named arguments must follow bare *");
1437                     return NULL;
1438                 }
1439                 ch = CHILD(n, i+1);  /* tfpdef or COMMA */
1440                 if (TYPE(ch) == COMMA) {
1441                     int res = 0;
1442                     i += 2; /* now follows keyword only arguments */
1443                     res = handle_keywordonly_args(c, n, i,
1444                                                   kwonlyargs, kwdefaults);
1445                     if (res == -1) return NULL;
1446                     i = res; /* res has new position to process */
1447                 }
1448                 else {
1449                     vararg = ast_for_arg(c, ch);
1450                     if (!vararg)
1451                         return NULL;
1452 
1453                     i += 3;
1454                     if (i < NCH(n) && (TYPE(CHILD(n, i)) == tfpdef
1455                                     || TYPE(CHILD(n, i)) == vfpdef)) {
1456                         int res = 0;
1457                         res = handle_keywordonly_args(c, n, i,
1458                                                       kwonlyargs, kwdefaults);
1459                         if (res == -1) return NULL;
1460                         i = res; /* res has new position to process */
1461                     }
1462                 }
1463                 break;
1464             case DOUBLESTAR:
1465                 ch = CHILD(n, i+1);  /* tfpdef */
1466                 assert(TYPE(ch) == tfpdef || TYPE(ch) == vfpdef);
1467                 kwarg = ast_for_arg(c, ch);
1468                 if (!kwarg)
1469                     return NULL;
1470                 i += 3;
1471                 break;
1472             default:
1473                 PyErr_Format(PyExc_SystemError,
1474                              "unexpected node in varargslist: %d @ %d",
1475                              TYPE(ch), i);
1476                 return NULL;
1477         }
1478     }
1479     return arguments(posargs, vararg, kwonlyargs, kwdefaults, kwarg, posdefaults, c->c_arena);
1480 }
1481 
1482 static expr_ty
ast_for_dotted_name(struct compiling * c,const node * n)1483 ast_for_dotted_name(struct compiling *c, const node *n)
1484 {
1485     expr_ty e;
1486     identifier id;
1487     int lineno, col_offset;
1488     int i;
1489 
1490     REQ(n, dotted_name);
1491 
1492     lineno = LINENO(n);
1493     col_offset = n->n_col_offset;
1494 
1495     id = NEW_IDENTIFIER(CHILD(n, 0));
1496     if (!id)
1497         return NULL;
1498     e = Name(id, Load, lineno, col_offset, c->c_arena);
1499     if (!e)
1500         return NULL;
1501 
1502     for (i = 2; i < NCH(n); i+=2) {
1503         id = NEW_IDENTIFIER(CHILD(n, i));
1504         if (!id)
1505             return NULL;
1506         e = Attribute(e, id, Load, lineno, col_offset, c->c_arena);
1507         if (!e)
1508             return NULL;
1509     }
1510 
1511     return e;
1512 }
1513 
1514 static expr_ty
ast_for_decorator(struct compiling * c,const node * n)1515 ast_for_decorator(struct compiling *c, const node *n)
1516 {
1517     /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
1518     expr_ty d = NULL;
1519     expr_ty name_expr;
1520 
1521     REQ(n, decorator);
1522     REQ(CHILD(n, 0), AT);
1523     REQ(RCHILD(n, -1), NEWLINE);
1524 
1525     name_expr = ast_for_dotted_name(c, CHILD(n, 1));
1526     if (!name_expr)
1527         return NULL;
1528 
1529     if (NCH(n) == 3) { /* No arguments */
1530         d = name_expr;
1531         name_expr = NULL;
1532     }
1533     else if (NCH(n) == 5) { /* Call with no arguments */
1534         d = Call(name_expr, NULL, NULL, LINENO(n),
1535                  n->n_col_offset, c->c_arena);
1536         if (!d)
1537             return NULL;
1538         name_expr = NULL;
1539     }
1540     else {
1541         d = ast_for_call(c, CHILD(n, 3), name_expr, true);
1542         if (!d)
1543             return NULL;
1544         name_expr = NULL;
1545     }
1546 
1547     return d;
1548 }
1549 
1550 static asdl_seq*
ast_for_decorators(struct compiling * c,const node * n)1551 ast_for_decorators(struct compiling *c, const node *n)
1552 {
1553     asdl_seq* decorator_seq;
1554     expr_ty d;
1555     int i;
1556 
1557     REQ(n, decorators);
1558     decorator_seq = _Py_asdl_seq_new(NCH(n), c->c_arena);
1559     if (!decorator_seq)
1560         return NULL;
1561 
1562     for (i = 0; i < NCH(n); i++) {
1563         d = ast_for_decorator(c, CHILD(n, i));
1564         if (!d)
1565             return NULL;
1566         asdl_seq_SET(decorator_seq, i, d);
1567     }
1568     return decorator_seq;
1569 }
1570 
1571 static stmt_ty
ast_for_funcdef_impl(struct compiling * c,const node * n0,asdl_seq * decorator_seq,bool is_async)1572 ast_for_funcdef_impl(struct compiling *c, const node *n0,
1573                      asdl_seq *decorator_seq, bool is_async)
1574 {
1575     /* funcdef: 'def' NAME parameters ['->' test] ':' suite */
1576     const node * const n = is_async ? CHILD(n0, 1) : n0;
1577     identifier name;
1578     arguments_ty args;
1579     asdl_seq *body;
1580     expr_ty returns = NULL;
1581     int name_i = 1;
1582 
1583     REQ(n, funcdef);
1584 
1585     name = NEW_IDENTIFIER(CHILD(n, name_i));
1586     if (!name)
1587         return NULL;
1588     if (forbidden_name(c, name, CHILD(n, name_i), 0))
1589         return NULL;
1590     args = ast_for_arguments(c, CHILD(n, name_i + 1));
1591     if (!args)
1592         return NULL;
1593     if (TYPE(CHILD(n, name_i+2)) == RARROW) {
1594         returns = ast_for_expr(c, CHILD(n, name_i + 3));
1595         if (!returns)
1596             return NULL;
1597         name_i += 2;
1598     }
1599     body = ast_for_suite(c, CHILD(n, name_i + 3));
1600     if (!body)
1601         return NULL;
1602 
1603     if (is_async)
1604         return AsyncFunctionDef(name, args, body, decorator_seq, returns,
1605                                 LINENO(n0), n0->n_col_offset, c->c_arena);
1606     else
1607         return FunctionDef(name, args, body, decorator_seq, returns,
1608                            LINENO(n), n->n_col_offset, c->c_arena);
1609 }
1610 
1611 static stmt_ty
ast_for_async_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1612 ast_for_async_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1613 {
1614     /* async_funcdef: 'async' funcdef */
1615     REQ(n, async_funcdef);
1616     REQ(CHILD(n, 0), NAME);
1617     assert(strcmp(STR(CHILD(n, 0)), "async") == 0);
1618     REQ(CHILD(n, 1), funcdef);
1619 
1620     return ast_for_funcdef_impl(c, n, decorator_seq,
1621                                 true /* is_async */);
1622 }
1623 
1624 static stmt_ty
ast_for_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1625 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1626 {
1627     /* funcdef: 'def' NAME parameters ['->' test] ':' suite */
1628     return ast_for_funcdef_impl(c, n, decorator_seq,
1629                                 false /* is_async */);
1630 }
1631 
1632 
1633 static stmt_ty
ast_for_async_stmt(struct compiling * c,const node * n)1634 ast_for_async_stmt(struct compiling *c, const node *n)
1635 {
1636     /* async_stmt: 'async' (funcdef | with_stmt | for_stmt) */
1637     REQ(n, async_stmt);
1638     REQ(CHILD(n, 0), NAME);
1639     assert(strcmp(STR(CHILD(n, 0)), "async") == 0);
1640 
1641     switch (TYPE(CHILD(n, 1))) {
1642         case funcdef:
1643             return ast_for_funcdef_impl(c, n, NULL,
1644                                         true /* is_async */);
1645         case with_stmt:
1646             return ast_for_with_stmt(c, n,
1647                                      true /* is_async */);
1648 
1649         case for_stmt:
1650             return ast_for_for_stmt(c, n,
1651                                     true /* is_async */);
1652 
1653         default:
1654             PyErr_Format(PyExc_SystemError,
1655                          "invalid async stament: %s",
1656                          STR(CHILD(n, 1)));
1657             return NULL;
1658     }
1659 }
1660 
1661 static stmt_ty
ast_for_decorated(struct compiling * c,const node * n)1662 ast_for_decorated(struct compiling *c, const node *n)
1663 {
1664     /* decorated: decorators (classdef | funcdef | async_funcdef) */
1665     stmt_ty thing = NULL;
1666     asdl_seq *decorator_seq = NULL;
1667 
1668     REQ(n, decorated);
1669 
1670     decorator_seq = ast_for_decorators(c, CHILD(n, 0));
1671     if (!decorator_seq)
1672       return NULL;
1673 
1674     assert(TYPE(CHILD(n, 1)) == funcdef ||
1675            TYPE(CHILD(n, 1)) == async_funcdef ||
1676            TYPE(CHILD(n, 1)) == classdef);
1677 
1678     if (TYPE(CHILD(n, 1)) == funcdef) {
1679       thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
1680     } else if (TYPE(CHILD(n, 1)) == classdef) {
1681       thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
1682     } else if (TYPE(CHILD(n, 1)) == async_funcdef) {
1683       thing = ast_for_async_funcdef(c, CHILD(n, 1), decorator_seq);
1684     }
1685     /* we count the decorators in when talking about the class' or
1686      * function's line number */
1687     if (thing) {
1688         thing->lineno = LINENO(n);
1689         thing->col_offset = n->n_col_offset;
1690     }
1691     return thing;
1692 }
1693 
1694 static expr_ty
ast_for_lambdef(struct compiling * c,const node * n)1695 ast_for_lambdef(struct compiling *c, const node *n)
1696 {
1697     /* lambdef: 'lambda' [varargslist] ':' test
1698        lambdef_nocond: 'lambda' [varargslist] ':' test_nocond */
1699     arguments_ty args;
1700     expr_ty expression;
1701 
1702     if (NCH(n) == 3) {
1703         args = arguments(NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1704         if (!args)
1705             return NULL;
1706         expression = ast_for_expr(c, CHILD(n, 2));
1707         if (!expression)
1708             return NULL;
1709     }
1710     else {
1711         args = ast_for_arguments(c, CHILD(n, 1));
1712         if (!args)
1713             return NULL;
1714         expression = ast_for_expr(c, CHILD(n, 3));
1715         if (!expression)
1716             return NULL;
1717     }
1718 
1719     return Lambda(args, expression, LINENO(n), n->n_col_offset, c->c_arena);
1720 }
1721 
1722 static expr_ty
ast_for_ifexpr(struct compiling * c,const node * n)1723 ast_for_ifexpr(struct compiling *c, const node *n)
1724 {
1725     /* test: or_test 'if' or_test 'else' test */
1726     expr_ty expression, body, orelse;
1727 
1728     assert(NCH(n) == 5);
1729     body = ast_for_expr(c, CHILD(n, 0));
1730     if (!body)
1731         return NULL;
1732     expression = ast_for_expr(c, CHILD(n, 2));
1733     if (!expression)
1734         return NULL;
1735     orelse = ast_for_expr(c, CHILD(n, 4));
1736     if (!orelse)
1737         return NULL;
1738     return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
1739                  c->c_arena);
1740 }
1741 
1742 /*
1743    Count the number of 'for' loops in a comprehension.
1744 
1745    Helper for ast_for_comprehension().
1746 */
1747 
1748 static int
count_comp_fors(struct compiling * c,const node * n)1749 count_comp_fors(struct compiling *c, const node *n)
1750 {
1751     int n_fors = 0;
1752 
1753   count_comp_for:
1754     n_fors++;
1755     REQ(n, comp_for);
1756     if (NCH(n) == 2) {
1757         REQ(CHILD(n, 0), NAME);
1758         assert(strcmp(STR(CHILD(n, 0)), "async") == 0);
1759         n = CHILD(n, 1);
1760     }
1761     else if (NCH(n) == 1) {
1762         n = CHILD(n, 0);
1763     }
1764     else {
1765         goto error;
1766     }
1767     if (NCH(n) == (5)) {
1768         n = CHILD(n, 4);
1769     }
1770     else {
1771         return n_fors;
1772     }
1773   count_comp_iter:
1774     REQ(n, comp_iter);
1775     n = CHILD(n, 0);
1776     if (TYPE(n) == comp_for)
1777         goto count_comp_for;
1778     else if (TYPE(n) == comp_if) {
1779         if (NCH(n) == 3) {
1780             n = CHILD(n, 2);
1781             goto count_comp_iter;
1782         }
1783         else
1784             return n_fors;
1785     }
1786 
1787   error:
1788     /* Should never be reached */
1789     PyErr_SetString(PyExc_SystemError,
1790                     "logic error in count_comp_fors");
1791     return -1;
1792 }
1793 
1794 /* Count the number of 'if' statements in a comprehension.
1795 
1796    Helper for ast_for_comprehension().
1797 */
1798 
1799 static int
count_comp_ifs(struct compiling * c,const node * n)1800 count_comp_ifs(struct compiling *c, const node *n)
1801 {
1802     int n_ifs = 0;
1803 
1804     while (1) {
1805         REQ(n, comp_iter);
1806         if (TYPE(CHILD(n, 0)) == comp_for)
1807             return n_ifs;
1808         n = CHILD(n, 0);
1809         REQ(n, comp_if);
1810         n_ifs++;
1811         if (NCH(n) == 2)
1812             return n_ifs;
1813         n = CHILD(n, 2);
1814     }
1815 }
1816 
1817 static asdl_seq *
ast_for_comprehension(struct compiling * c,const node * n)1818 ast_for_comprehension(struct compiling *c, const node *n)
1819 {
1820     int i, n_fors;
1821     asdl_seq *comps;
1822 
1823     n_fors = count_comp_fors(c, n);
1824     if (n_fors == -1)
1825         return NULL;
1826 
1827     comps = _Py_asdl_seq_new(n_fors, c->c_arena);
1828     if (!comps)
1829         return NULL;
1830 
1831     for (i = 0; i < n_fors; i++) {
1832         comprehension_ty comp;
1833         asdl_seq *t;
1834         expr_ty expression, first;
1835         node *for_ch;
1836         node *sync_n;
1837         int is_async = 0;
1838 
1839         REQ(n, comp_for);
1840 
1841         if (NCH(n) == 2) {
1842             is_async = 1;
1843             REQ(CHILD(n, 0), NAME);
1844             assert(strcmp(STR(CHILD(n, 0)), "async") == 0);
1845             sync_n = CHILD(n, 1);
1846         }
1847         else {
1848             sync_n = CHILD(n, 0);
1849         }
1850         REQ(sync_n, sync_comp_for);
1851 
1852         for_ch = CHILD(sync_n, 1);
1853         t = ast_for_exprlist(c, for_ch, Store);
1854         if (!t)
1855             return NULL;
1856         expression = ast_for_expr(c, CHILD(sync_n, 3));
1857         if (!expression)
1858             return NULL;
1859 
1860         /* Check the # of children rather than the length of t, since
1861            (x for x, in ...) has 1 element in t, but still requires a Tuple. */
1862         first = (expr_ty)asdl_seq_GET(t, 0);
1863         if (NCH(for_ch) == 1)
1864             comp = comprehension(first, expression, NULL,
1865                                  is_async, c->c_arena);
1866         else
1867             comp = comprehension(Tuple(t, Store, first->lineno,
1868                                        first->col_offset, c->c_arena),
1869                                  expression, NULL, is_async, c->c_arena);
1870         if (!comp)
1871             return NULL;
1872 
1873         if (NCH(sync_n) == 5) {
1874             int j, n_ifs;
1875             asdl_seq *ifs;
1876 
1877             n = CHILD(sync_n, 4);
1878             n_ifs = count_comp_ifs(c, n);
1879             if (n_ifs == -1)
1880                 return NULL;
1881 
1882             ifs = _Py_asdl_seq_new(n_ifs, c->c_arena);
1883             if (!ifs)
1884                 return NULL;
1885 
1886             for (j = 0; j < n_ifs; j++) {
1887                 REQ(n, comp_iter);
1888                 n = CHILD(n, 0);
1889                 REQ(n, comp_if);
1890 
1891                 expression = ast_for_expr(c, CHILD(n, 1));
1892                 if (!expression)
1893                     return NULL;
1894                 asdl_seq_SET(ifs, j, expression);
1895                 if (NCH(n) == 3)
1896                     n = CHILD(n, 2);
1897             }
1898             /* on exit, must guarantee that n is a comp_for */
1899             if (TYPE(n) == comp_iter)
1900                 n = CHILD(n, 0);
1901             comp->ifs = ifs;
1902         }
1903         asdl_seq_SET(comps, i, comp);
1904     }
1905     return comps;
1906 }
1907 
1908 static expr_ty
ast_for_itercomp(struct compiling * c,const node * n,int type)1909 ast_for_itercomp(struct compiling *c, const node *n, int type)
1910 {
1911     /* testlist_comp: (test|star_expr)
1912      *                ( comp_for | (',' (test|star_expr))* [','] ) */
1913     expr_ty elt;
1914     asdl_seq *comps;
1915     node *ch;
1916 
1917     assert(NCH(n) > 1);
1918 
1919     ch = CHILD(n, 0);
1920     elt = ast_for_expr(c, ch);
1921     if (!elt)
1922         return NULL;
1923     if (elt->kind == Starred_kind) {
1924         ast_error(c, ch, "iterable unpacking cannot be used in comprehension");
1925         return NULL;
1926     }
1927 
1928     comps = ast_for_comprehension(c, CHILD(n, 1));
1929     if (!comps)
1930         return NULL;
1931 
1932     if (type == COMP_GENEXP)
1933         return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1934     else if (type == COMP_LISTCOMP)
1935         return ListComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1936     else if (type == COMP_SETCOMP)
1937         return SetComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1938     else
1939         /* Should never happen */
1940         return NULL;
1941 }
1942 
1943 /* Fills in the key, value pair corresponding to the dict element.  In case
1944  * of an unpacking, key is NULL.  *i is advanced by the number of ast
1945  * elements.  Iff successful, nonzero is returned.
1946  */
1947 static int
ast_for_dictelement(struct compiling * c,const node * n,int * i,expr_ty * key,expr_ty * value)1948 ast_for_dictelement(struct compiling *c, const node *n, int *i,
1949                     expr_ty *key, expr_ty *value)
1950 {
1951     expr_ty expression;
1952     if (TYPE(CHILD(n, *i)) == DOUBLESTAR) {
1953         assert(NCH(n) - *i >= 2);
1954 
1955         expression = ast_for_expr(c, CHILD(n, *i + 1));
1956         if (!expression)
1957             return 0;
1958         *key = NULL;
1959         *value = expression;
1960 
1961         *i += 2;
1962     }
1963     else {
1964         assert(NCH(n) - *i >= 3);
1965 
1966         expression = ast_for_expr(c, CHILD(n, *i));
1967         if (!expression)
1968             return 0;
1969         *key = expression;
1970 
1971         REQ(CHILD(n, *i + 1), COLON);
1972 
1973         expression = ast_for_expr(c, CHILD(n, *i + 2));
1974         if (!expression)
1975             return 0;
1976         *value = expression;
1977 
1978         *i += 3;
1979     }
1980     return 1;
1981 }
1982 
1983 static expr_ty
ast_for_dictcomp(struct compiling * c,const node * n)1984 ast_for_dictcomp(struct compiling *c, const node *n)
1985 {
1986     expr_ty key, value;
1987     asdl_seq *comps;
1988     int i = 0;
1989 
1990     if (!ast_for_dictelement(c, n, &i, &key, &value))
1991         return NULL;
1992     assert(key);
1993     assert(NCH(n) - i >= 1);
1994 
1995     comps = ast_for_comprehension(c, CHILD(n, i));
1996     if (!comps)
1997         return NULL;
1998 
1999     return DictComp(key, value, comps, LINENO(n), n->n_col_offset, c->c_arena);
2000 }
2001 
2002 static expr_ty
ast_for_dictdisplay(struct compiling * c,const node * n)2003 ast_for_dictdisplay(struct compiling *c, const node *n)
2004 {
2005     int i;
2006     int j;
2007     int size;
2008     asdl_seq *keys, *values;
2009 
2010     size = (NCH(n) + 1) / 3; /* +1 in case no trailing comma */
2011     keys = _Py_asdl_seq_new(size, c->c_arena);
2012     if (!keys)
2013         return NULL;
2014 
2015     values = _Py_asdl_seq_new(size, c->c_arena);
2016     if (!values)
2017         return NULL;
2018 
2019     j = 0;
2020     for (i = 0; i < NCH(n); i++) {
2021         expr_ty key, value;
2022 
2023         if (!ast_for_dictelement(c, n, &i, &key, &value))
2024             return NULL;
2025         asdl_seq_SET(keys, j, key);
2026         asdl_seq_SET(values, j, value);
2027 
2028         j++;
2029     }
2030     keys->size = j;
2031     values->size = j;
2032     return Dict(keys, values, LINENO(n), n->n_col_offset, c->c_arena);
2033 }
2034 
2035 static expr_ty
ast_for_genexp(struct compiling * c,const node * n)2036 ast_for_genexp(struct compiling *c, const node *n)
2037 {
2038     assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
2039     return ast_for_itercomp(c, n, COMP_GENEXP);
2040 }
2041 
2042 static expr_ty
ast_for_listcomp(struct compiling * c,const node * n)2043 ast_for_listcomp(struct compiling *c, const node *n)
2044 {
2045     assert(TYPE(n) == (testlist_comp));
2046     return ast_for_itercomp(c, n, COMP_LISTCOMP);
2047 }
2048 
2049 static expr_ty
ast_for_setcomp(struct compiling * c,const node * n)2050 ast_for_setcomp(struct compiling *c, const node *n)
2051 {
2052     assert(TYPE(n) == (dictorsetmaker));
2053     return ast_for_itercomp(c, n, COMP_SETCOMP);
2054 }
2055 
2056 static expr_ty
ast_for_setdisplay(struct compiling * c,const node * n)2057 ast_for_setdisplay(struct compiling *c, const node *n)
2058 {
2059     int i;
2060     int size;
2061     asdl_seq *elts;
2062 
2063     assert(TYPE(n) == (dictorsetmaker));
2064     size = (NCH(n) + 1) / 2; /* +1 in case no trailing comma */
2065     elts = _Py_asdl_seq_new(size, c->c_arena);
2066     if (!elts)
2067         return NULL;
2068     for (i = 0; i < NCH(n); i += 2) {
2069         expr_ty expression;
2070         expression = ast_for_expr(c, CHILD(n, i));
2071         if (!expression)
2072             return NULL;
2073         asdl_seq_SET(elts, i / 2, expression);
2074     }
2075     return Set(elts, LINENO(n), n->n_col_offset, c->c_arena);
2076 }
2077 
2078 static expr_ty
ast_for_atom(struct compiling * c,const node * n)2079 ast_for_atom(struct compiling *c, const node *n)
2080 {
2081     /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']'
2082        | '{' [dictmaker|testlist_comp] '}' | NAME | NUMBER | STRING+
2083        | '...' | 'None' | 'True' | 'False'
2084     */
2085     node *ch = CHILD(n, 0);
2086 
2087     switch (TYPE(ch)) {
2088     case NAME: {
2089         PyObject *name;
2090         const char *s = STR(ch);
2091         size_t len = strlen(s);
2092         if (len >= 4 && len <= 5) {
2093             if (!strcmp(s, "None"))
2094                 return NameConstant(Py_None, LINENO(n), n->n_col_offset, c->c_arena);
2095             if (!strcmp(s, "True"))
2096                 return NameConstant(Py_True, LINENO(n), n->n_col_offset, c->c_arena);
2097             if (!strcmp(s, "False"))
2098                 return NameConstant(Py_False, LINENO(n), n->n_col_offset, c->c_arena);
2099         }
2100         name = new_identifier(s, c);
2101         if (!name)
2102             return NULL;
2103         /* All names start in Load context, but may later be changed. */
2104         return Name(name, Load, LINENO(n), n->n_col_offset, c->c_arena);
2105     }
2106     case STRING: {
2107         expr_ty str = parsestrplus(c, n);
2108         if (!str) {
2109             const char *errtype = NULL;
2110             if (PyErr_ExceptionMatches(PyExc_UnicodeError))
2111                 errtype = "unicode error";
2112             else if (PyErr_ExceptionMatches(PyExc_ValueError))
2113                 errtype = "value error";
2114             if (errtype) {
2115                 char buf[128];
2116                 const char *s = NULL;
2117                 PyObject *type, *value, *tback, *errstr;
2118                 PyErr_Fetch(&type, &value, &tback);
2119                 errstr = PyObject_Str(value);
2120                 if (errstr)
2121                     s = PyUnicode_AsUTF8(errstr);
2122                 if (s) {
2123                     PyOS_snprintf(buf, sizeof(buf), "(%s) %s", errtype, s);
2124                 } else {
2125                     PyErr_Clear();
2126                     PyOS_snprintf(buf, sizeof(buf), "(%s) unknown error", errtype);
2127                 }
2128                 Py_XDECREF(errstr);
2129                 ast_error(c, n, buf);
2130                 Py_DECREF(type);
2131                 Py_XDECREF(value);
2132                 Py_XDECREF(tback);
2133             }
2134             return NULL;
2135         }
2136         return str;
2137     }
2138     case NUMBER: {
2139         PyObject *pynum = parsenumber(c, STR(ch));
2140         if (!pynum)
2141             return NULL;
2142 
2143         if (PyArena_AddPyObject(c->c_arena, pynum) < 0) {
2144             Py_DECREF(pynum);
2145             return NULL;
2146         }
2147         return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
2148     }
2149     case ELLIPSIS: /* Ellipsis */
2150         return Ellipsis(LINENO(n), n->n_col_offset, c->c_arena);
2151     case LPAR: /* some parenthesized expressions */
2152         ch = CHILD(n, 1);
2153 
2154         if (TYPE(ch) == RPAR)
2155             return Tuple(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
2156 
2157         if (TYPE(ch) == yield_expr)
2158             return ast_for_expr(c, ch);
2159 
2160         /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2161         if ((NCH(ch) > 1) && (TYPE(CHILD(ch, 1)) == comp_for))
2162             return ast_for_genexp(c, ch);
2163 
2164         return ast_for_testlist(c, ch);
2165     case LSQB: /* list (or list comprehension) */
2166         ch = CHILD(n, 1);
2167 
2168         if (TYPE(ch) == RSQB)
2169             return List(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
2170 
2171         REQ(ch, testlist_comp);
2172         if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
2173             asdl_seq *elts = seq_for_testlist(c, ch);
2174             if (!elts)
2175                 return NULL;
2176 
2177             return List(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
2178         }
2179         else
2180             return ast_for_listcomp(c, ch);
2181     case LBRACE: {
2182         /* dictorsetmaker: ( ((test ':' test | '**' test)
2183          *                    (comp_for | (',' (test ':' test | '**' test))* [','])) |
2184          *                   ((test | '*' test)
2185          *                    (comp_for | (',' (test | '*' test))* [','])) ) */
2186         expr_ty res;
2187         ch = CHILD(n, 1);
2188         if (TYPE(ch) == RBRACE) {
2189             /* It's an empty dict. */
2190             return Dict(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2191         }
2192         else {
2193             int is_dict = (TYPE(CHILD(ch, 0)) == DOUBLESTAR);
2194             if (NCH(ch) == 1 ||
2195                     (NCH(ch) > 1 &&
2196                      TYPE(CHILD(ch, 1)) == COMMA)) {
2197                 /* It's a set display. */
2198                 res = ast_for_setdisplay(c, ch);
2199             }
2200             else if (NCH(ch) > 1 &&
2201                     TYPE(CHILD(ch, 1)) == comp_for) {
2202                 /* It's a set comprehension. */
2203                 res = ast_for_setcomp(c, ch);
2204             }
2205             else if (NCH(ch) > 3 - is_dict &&
2206                     TYPE(CHILD(ch, 3 - is_dict)) == comp_for) {
2207                 /* It's a dictionary comprehension. */
2208                 if (is_dict) {
2209                     ast_error(c, n, "dict unpacking cannot be used in "
2210                             "dict comprehension");
2211                     return NULL;
2212                 }
2213                 res = ast_for_dictcomp(c, ch);
2214             }
2215             else {
2216                 /* It's a dictionary display. */
2217                 res = ast_for_dictdisplay(c, ch);
2218             }
2219             if (res) {
2220                 res->lineno = LINENO(n);
2221                 res->col_offset = n->n_col_offset;
2222             }
2223             return res;
2224         }
2225     }
2226     default:
2227         PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
2228         return NULL;
2229     }
2230 }
2231 
2232 static slice_ty
ast_for_slice(struct compiling * c,const node * n)2233 ast_for_slice(struct compiling *c, const node *n)
2234 {
2235     node *ch;
2236     expr_ty lower = NULL, upper = NULL, step = NULL;
2237 
2238     REQ(n, subscript);
2239 
2240     /*
2241        subscript: test | [test] ':' [test] [sliceop]
2242        sliceop: ':' [test]
2243     */
2244     ch = CHILD(n, 0);
2245     if (NCH(n) == 1 && TYPE(ch) == test) {
2246         /* 'step' variable hold no significance in terms of being used over
2247            other vars */
2248         step = ast_for_expr(c, ch);
2249         if (!step)
2250             return NULL;
2251 
2252         return Index(step, c->c_arena);
2253     }
2254 
2255     if (TYPE(ch) == test) {
2256         lower = ast_for_expr(c, ch);
2257         if (!lower)
2258             return NULL;
2259     }
2260 
2261     /* If there's an upper bound it's in the second or third position. */
2262     if (TYPE(ch) == COLON) {
2263         if (NCH(n) > 1) {
2264             node *n2 = CHILD(n, 1);
2265 
2266             if (TYPE(n2) == test) {
2267                 upper = ast_for_expr(c, n2);
2268                 if (!upper)
2269                     return NULL;
2270             }
2271         }
2272     } else if (NCH(n) > 2) {
2273         node *n2 = CHILD(n, 2);
2274 
2275         if (TYPE(n2) == test) {
2276             upper = ast_for_expr(c, n2);
2277             if (!upper)
2278                 return NULL;
2279         }
2280     }
2281 
2282     ch = CHILD(n, NCH(n) - 1);
2283     if (TYPE(ch) == sliceop) {
2284         if (NCH(ch) != 1) {
2285             ch = CHILD(ch, 1);
2286             if (TYPE(ch) == test) {
2287                 step = ast_for_expr(c, ch);
2288                 if (!step)
2289                     return NULL;
2290             }
2291         }
2292     }
2293 
2294     return Slice(lower, upper, step, c->c_arena);
2295 }
2296 
2297 static expr_ty
ast_for_binop(struct compiling * c,const node * n)2298 ast_for_binop(struct compiling *c, const node *n)
2299 {
2300     /* Must account for a sequence of expressions.
2301        How should A op B op C by represented?
2302        BinOp(BinOp(A, op, B), op, C).
2303     */
2304 
2305     int i, nops;
2306     expr_ty expr1, expr2, result;
2307     operator_ty newoperator;
2308 
2309     expr1 = ast_for_expr(c, CHILD(n, 0));
2310     if (!expr1)
2311         return NULL;
2312 
2313     expr2 = ast_for_expr(c, CHILD(n, 2));
2314     if (!expr2)
2315         return NULL;
2316 
2317     newoperator = get_operator(CHILD(n, 1));
2318     if (!newoperator)
2319         return NULL;
2320 
2321     result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2322                    c->c_arena);
2323     if (!result)
2324         return NULL;
2325 
2326     nops = (NCH(n) - 1) / 2;
2327     for (i = 1; i < nops; i++) {
2328         expr_ty tmp_result, tmp;
2329         const node* next_oper = CHILD(n, i * 2 + 1);
2330 
2331         newoperator = get_operator(next_oper);
2332         if (!newoperator)
2333             return NULL;
2334 
2335         tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
2336         if (!tmp)
2337             return NULL;
2338 
2339         tmp_result = BinOp(result, newoperator, tmp,
2340                            LINENO(next_oper), next_oper->n_col_offset,
2341                            c->c_arena);
2342         if (!tmp_result)
2343             return NULL;
2344         result = tmp_result;
2345     }
2346     return result;
2347 }
2348 
2349 static expr_ty
ast_for_trailer(struct compiling * c,const node * n,expr_ty left_expr)2350 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr)
2351 {
2352     /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
2353        subscriptlist: subscript (',' subscript)* [',']
2354        subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
2355      */
2356     REQ(n, trailer);
2357     if (TYPE(CHILD(n, 0)) == LPAR) {
2358         if (NCH(n) == 2)
2359             return Call(left_expr, NULL, NULL, LINENO(n),
2360                         n->n_col_offset, c->c_arena);
2361         else
2362             return ast_for_call(c, CHILD(n, 1), left_expr, true);
2363     }
2364     else if (TYPE(CHILD(n, 0)) == DOT) {
2365         PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
2366         if (!attr_id)
2367             return NULL;
2368         return Attribute(left_expr, attr_id, Load,
2369                          LINENO(n), n->n_col_offset, c->c_arena);
2370     }
2371     else {
2372         REQ(CHILD(n, 0), LSQB);
2373         REQ(CHILD(n, 2), RSQB);
2374         n = CHILD(n, 1);
2375         if (NCH(n) == 1) {
2376             slice_ty slc = ast_for_slice(c, CHILD(n, 0));
2377             if (!slc)
2378                 return NULL;
2379             return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset,
2380                              c->c_arena);
2381         }
2382         else {
2383             /* The grammar is ambiguous here. The ambiguity is resolved
2384                by treating the sequence as a tuple literal if there are
2385                no slice features.
2386             */
2387             int j;
2388             slice_ty slc;
2389             expr_ty e;
2390             int simple = 1;
2391             asdl_seq *slices, *elts;
2392             slices = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2393             if (!slices)
2394                 return NULL;
2395             for (j = 0; j < NCH(n); j += 2) {
2396                 slc = ast_for_slice(c, CHILD(n, j));
2397                 if (!slc)
2398                     return NULL;
2399                 if (slc->kind != Index_kind)
2400                     simple = 0;
2401                 asdl_seq_SET(slices, j / 2, slc);
2402             }
2403             if (!simple) {
2404                 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
2405                                  Load, LINENO(n), n->n_col_offset, c->c_arena);
2406             }
2407             /* extract Index values and put them in a Tuple */
2408             elts = _Py_asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
2409             if (!elts)
2410                 return NULL;
2411             for (j = 0; j < asdl_seq_LEN(slices); ++j) {
2412                 slc = (slice_ty)asdl_seq_GET(slices, j);
2413                 assert(slc->kind == Index_kind  && slc->v.Index.value);
2414                 asdl_seq_SET(elts, j, slc->v.Index.value);
2415             }
2416             e = Tuple(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
2417             if (!e)
2418                 return NULL;
2419             return Subscript(left_expr, Index(e, c->c_arena),
2420                              Load, LINENO(n), n->n_col_offset, c->c_arena);
2421         }
2422     }
2423 }
2424 
2425 static expr_ty
ast_for_factor(struct compiling * c,const node * n)2426 ast_for_factor(struct compiling *c, const node *n)
2427 {
2428     expr_ty expression;
2429 
2430     expression = ast_for_expr(c, CHILD(n, 1));
2431     if (!expression)
2432         return NULL;
2433 
2434     switch (TYPE(CHILD(n, 0))) {
2435         case PLUS:
2436             return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
2437                            c->c_arena);
2438         case MINUS:
2439             return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
2440                            c->c_arena);
2441         case TILDE:
2442             return UnaryOp(Invert, expression, LINENO(n),
2443                            n->n_col_offset, c->c_arena);
2444     }
2445     PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
2446                  TYPE(CHILD(n, 0)));
2447     return NULL;
2448 }
2449 
2450 static expr_ty
ast_for_atom_expr(struct compiling * c,const node * n)2451 ast_for_atom_expr(struct compiling *c, const node *n)
2452 {
2453     int i, nch, start = 0;
2454     expr_ty e, tmp;
2455 
2456     REQ(n, atom_expr);
2457     nch = NCH(n);
2458 
2459     if (TYPE(CHILD(n, 0)) == NAME && strcmp(STR(CHILD(n, 0)), "await") == 0) {
2460         start = 1;
2461         assert(nch > 1);
2462     }
2463 
2464     e = ast_for_atom(c, CHILD(n, start));
2465     if (!e)
2466         return NULL;
2467     if (nch == 1)
2468         return e;
2469     if (start && nch == 2) {
2470         return Await(e, LINENO(n), n->n_col_offset, c->c_arena);
2471     }
2472 
2473     for (i = start + 1; i < nch; i++) {
2474         node *ch = CHILD(n, i);
2475         if (TYPE(ch) != trailer)
2476             break;
2477         tmp = ast_for_trailer(c, ch, e);
2478         if (!tmp)
2479             return NULL;
2480         tmp->lineno = e->lineno;
2481         tmp->col_offset = e->col_offset;
2482         e = tmp;
2483     }
2484 
2485     if (start) {
2486         /* there was an 'await' */
2487         return Await(e, LINENO(n), n->n_col_offset, c->c_arena);
2488     }
2489     else {
2490         return e;
2491     }
2492 }
2493 
2494 static expr_ty
ast_for_power(struct compiling * c,const node * n)2495 ast_for_power(struct compiling *c, const node *n)
2496 {
2497     /* power: atom trailer* ('**' factor)*
2498      */
2499     expr_ty e;
2500     REQ(n, power);
2501     e = ast_for_atom_expr(c, CHILD(n, 0));
2502     if (!e)
2503         return NULL;
2504     if (NCH(n) == 1)
2505         return e;
2506     if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
2507         expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
2508         if (!f)
2509             return NULL;
2510         e = BinOp(e, Pow, f, LINENO(n), n->n_col_offset, c->c_arena);
2511     }
2512     return e;
2513 }
2514 
2515 static expr_ty
ast_for_starred(struct compiling * c,const node * n)2516 ast_for_starred(struct compiling *c, const node *n)
2517 {
2518     expr_ty tmp;
2519     REQ(n, star_expr);
2520 
2521     tmp = ast_for_expr(c, CHILD(n, 1));
2522     if (!tmp)
2523         return NULL;
2524 
2525     /* The Load context is changed later. */
2526     return Starred(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
2527 }
2528 
2529 
2530 /* Do not name a variable 'expr'!  Will cause a compile error.
2531 */
2532 
2533 static expr_ty
ast_for_expr(struct compiling * c,const node * n)2534 ast_for_expr(struct compiling *c, const node *n)
2535 {
2536     /* handle the full range of simple expressions
2537        test: or_test ['if' or_test 'else' test] | lambdef
2538        test_nocond: or_test | lambdef_nocond
2539        or_test: and_test ('or' and_test)*
2540        and_test: not_test ('and' not_test)*
2541        not_test: 'not' not_test | comparison
2542        comparison: expr (comp_op expr)*
2543        expr: xor_expr ('|' xor_expr)*
2544        xor_expr: and_expr ('^' and_expr)*
2545        and_expr: shift_expr ('&' shift_expr)*
2546        shift_expr: arith_expr (('<<'|'>>') arith_expr)*
2547        arith_expr: term (('+'|'-') term)*
2548        term: factor (('*'|'@'|'/'|'%'|'//') factor)*
2549        factor: ('+'|'-'|'~') factor | power
2550        power: atom_expr ['**' factor]
2551        atom_expr: ['await'] atom trailer*
2552        yield_expr: 'yield' [yield_arg]
2553     */
2554 
2555     asdl_seq *seq;
2556     int i;
2557 
2558  loop:
2559     switch (TYPE(n)) {
2560         case test:
2561         case test_nocond:
2562             if (TYPE(CHILD(n, 0)) == lambdef ||
2563                 TYPE(CHILD(n, 0)) == lambdef_nocond)
2564                 return ast_for_lambdef(c, CHILD(n, 0));
2565             else if (NCH(n) > 1)
2566                 return ast_for_ifexpr(c, n);
2567             /* Fallthrough */
2568         case or_test:
2569         case and_test:
2570             if (NCH(n) == 1) {
2571                 n = CHILD(n, 0);
2572                 goto loop;
2573             }
2574             seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2575             if (!seq)
2576                 return NULL;
2577             for (i = 0; i < NCH(n); i += 2) {
2578                 expr_ty e = ast_for_expr(c, CHILD(n, i));
2579                 if (!e)
2580                     return NULL;
2581                 asdl_seq_SET(seq, i / 2, e);
2582             }
2583             if (!strcmp(STR(CHILD(n, 1)), "and"))
2584                 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
2585                               c->c_arena);
2586             assert(!strcmp(STR(CHILD(n, 1)), "or"));
2587             return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena);
2588         case not_test:
2589             if (NCH(n) == 1) {
2590                 n = CHILD(n, 0);
2591                 goto loop;
2592             }
2593             else {
2594                 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2595                 if (!expression)
2596                     return NULL;
2597 
2598                 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
2599                                c->c_arena);
2600             }
2601         case comparison:
2602             if (NCH(n) == 1) {
2603                 n = CHILD(n, 0);
2604                 goto loop;
2605             }
2606             else {
2607                 expr_ty expression;
2608                 asdl_int_seq *ops;
2609                 asdl_seq *cmps;
2610                 ops = _Py_asdl_int_seq_new(NCH(n) / 2, c->c_arena);
2611                 if (!ops)
2612                     return NULL;
2613                 cmps = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
2614                 if (!cmps) {
2615                     return NULL;
2616                 }
2617                 for (i = 1; i < NCH(n); i += 2) {
2618                     cmpop_ty newoperator;
2619 
2620                     newoperator = ast_for_comp_op(c, CHILD(n, i));
2621                     if (!newoperator) {
2622                         return NULL;
2623                     }
2624 
2625                     expression = ast_for_expr(c, CHILD(n, i + 1));
2626                     if (!expression) {
2627                         return NULL;
2628                     }
2629 
2630                     asdl_seq_SET(ops, i / 2, newoperator);
2631                     asdl_seq_SET(cmps, i / 2, expression);
2632                 }
2633                 expression = ast_for_expr(c, CHILD(n, 0));
2634                 if (!expression) {
2635                     return NULL;
2636                 }
2637 
2638                 return Compare(expression, ops, cmps, LINENO(n),
2639                                n->n_col_offset, c->c_arena);
2640             }
2641             break;
2642 
2643         case star_expr:
2644             return ast_for_starred(c, n);
2645         /* The next five cases all handle BinOps.  The main body of code
2646            is the same in each case, but the switch turned inside out to
2647            reuse the code for each type of operator.
2648          */
2649         case expr:
2650         case xor_expr:
2651         case and_expr:
2652         case shift_expr:
2653         case arith_expr:
2654         case term:
2655             if (NCH(n) == 1) {
2656                 n = CHILD(n, 0);
2657                 goto loop;
2658             }
2659             return ast_for_binop(c, n);
2660         case yield_expr: {
2661             node *an = NULL;
2662             node *en = NULL;
2663             int is_from = 0;
2664             expr_ty exp = NULL;
2665             if (NCH(n) > 1)
2666                 an = CHILD(n, 1); /* yield_arg */
2667             if (an) {
2668                 en = CHILD(an, NCH(an) - 1);
2669                 if (NCH(an) == 2) {
2670                     is_from = 1;
2671                     exp = ast_for_expr(c, en);
2672                 }
2673                 else
2674                     exp = ast_for_testlist(c, en);
2675                 if (!exp)
2676                     return NULL;
2677             }
2678             if (is_from)
2679                 return YieldFrom(exp, LINENO(n), n->n_col_offset, c->c_arena);
2680             return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena);
2681         }
2682         case factor:
2683             if (NCH(n) == 1) {
2684                 n = CHILD(n, 0);
2685                 goto loop;
2686             }
2687             return ast_for_factor(c, n);
2688         case power:
2689             return ast_for_power(c, n);
2690         default:
2691             PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
2692             return NULL;
2693     }
2694     /* should never get here unless if error is set */
2695     return NULL;
2696 }
2697 
2698 static expr_ty
ast_for_call(struct compiling * c,const node * n,expr_ty func,bool allowgen)2699 ast_for_call(struct compiling *c, const node *n, expr_ty func, bool allowgen)
2700 {
2701     /*
2702       arglist: argument (',' argument)*  [',']
2703       argument: ( test [comp_for] | '*' test | test '=' test | '**' test )
2704     */
2705 
2706     int i, nargs, nkeywords;
2707     int ndoublestars;
2708     asdl_seq *args;
2709     asdl_seq *keywords;
2710 
2711     REQ(n, arglist);
2712 
2713     nargs = 0;
2714     nkeywords = 0;
2715     for (i = 0; i < NCH(n); i++) {
2716         node *ch = CHILD(n, i);
2717         if (TYPE(ch) == argument) {
2718             if (NCH(ch) == 1)
2719                 nargs++;
2720             else if (TYPE(CHILD(ch, 1)) == comp_for) {
2721                 nargs++;
2722                 if (!allowgen) {
2723                     ast_error(c, ch, "invalid syntax");
2724                     return NULL;
2725                 }
2726                 if (NCH(n) > 1) {
2727                     ast_error(c, ch, "Generator expression must be parenthesized");
2728                     return NULL;
2729                 }
2730             }
2731             else if (TYPE(CHILD(ch, 0)) == STAR)
2732                 nargs++;
2733             else
2734                 /* TYPE(CHILD(ch, 0)) == DOUBLESTAR or keyword argument */
2735                 nkeywords++;
2736         }
2737     }
2738 
2739     args = _Py_asdl_seq_new(nargs, c->c_arena);
2740     if (!args)
2741         return NULL;
2742     keywords = _Py_asdl_seq_new(nkeywords, c->c_arena);
2743     if (!keywords)
2744         return NULL;
2745 
2746     nargs = 0;  /* positional arguments + iterable argument unpackings */
2747     nkeywords = 0;  /* keyword arguments + keyword argument unpackings */
2748     ndoublestars = 0;  /* just keyword argument unpackings */
2749     for (i = 0; i < NCH(n); i++) {
2750         node *ch = CHILD(n, i);
2751         if (TYPE(ch) == argument) {
2752             expr_ty e;
2753             node *chch = CHILD(ch, 0);
2754             if (NCH(ch) == 1) {
2755                 /* a positional argument */
2756                 if (nkeywords) {
2757                     if (ndoublestars) {
2758                         ast_error(c, chch,
2759                                 "positional argument follows "
2760                                 "keyword argument unpacking");
2761                     }
2762                     else {
2763                         ast_error(c, chch,
2764                                 "positional argument follows "
2765                                 "keyword argument");
2766                     }
2767                     return NULL;
2768                 }
2769                 e = ast_for_expr(c, chch);
2770                 if (!e)
2771                     return NULL;
2772                 asdl_seq_SET(args, nargs++, e);
2773             }
2774             else if (TYPE(chch) == STAR) {
2775                 /* an iterable argument unpacking */
2776                 expr_ty starred;
2777                 if (ndoublestars) {
2778                     ast_error(c, chch,
2779                             "iterable argument unpacking follows "
2780                             "keyword argument unpacking");
2781                     return NULL;
2782                 }
2783                 e = ast_for_expr(c, CHILD(ch, 1));
2784                 if (!e)
2785                     return NULL;
2786                 starred = Starred(e, Load, LINENO(chch),
2787                         chch->n_col_offset,
2788                         c->c_arena);
2789                 if (!starred)
2790                     return NULL;
2791                 asdl_seq_SET(args, nargs++, starred);
2792 
2793             }
2794             else if (TYPE(chch) == DOUBLESTAR) {
2795                 /* a keyword argument unpacking */
2796                 keyword_ty kw;
2797                 i++;
2798                 e = ast_for_expr(c, CHILD(ch, 1));
2799                 if (!e)
2800                     return NULL;
2801                 kw = keyword(NULL, e, c->c_arena);
2802                 asdl_seq_SET(keywords, nkeywords++, kw);
2803                 ndoublestars++;
2804             }
2805             else if (TYPE(CHILD(ch, 1)) == comp_for) {
2806                 /* the lone generator expression */
2807                 e = ast_for_genexp(c, ch);
2808                 if (!e)
2809                     return NULL;
2810                 asdl_seq_SET(args, nargs++, e);
2811             }
2812             else {
2813                 /* a keyword argument */
2814                 keyword_ty kw;
2815                 identifier key, tmp;
2816                 int k;
2817 
2818                 /* chch is test, but must be an identifier? */
2819                 e = ast_for_expr(c, chch);
2820                 if (!e)
2821                     return NULL;
2822                 /* f(lambda x: x[0] = 3) ends up getting parsed with
2823                  * LHS test = lambda x: x[0], and RHS test = 3.
2824                  * SF bug 132313 points out that complaining about a keyword
2825                  * then is very confusing.
2826                  */
2827                 if (e->kind == Lambda_kind) {
2828                     ast_error(c, chch,
2829                             "lambda cannot contain assignment");
2830                     return NULL;
2831                 }
2832                 else if (e->kind != Name_kind) {
2833                     ast_error(c, chch,
2834                             "keyword can't be an expression");
2835                     return NULL;
2836                 }
2837                 else if (forbidden_name(c, e->v.Name.id, ch, 1)) {
2838                     return NULL;
2839                 }
2840                 key = e->v.Name.id;
2841                 for (k = 0; k < nkeywords; k++) {
2842                     tmp = ((keyword_ty)asdl_seq_GET(keywords, k))->arg;
2843                     if (tmp && !PyUnicode_Compare(tmp, key)) {
2844                         ast_error(c, chch,
2845                                 "keyword argument repeated");
2846                         return NULL;
2847                     }
2848                 }
2849                 e = ast_for_expr(c, CHILD(ch, 2));
2850                 if (!e)
2851                     return NULL;
2852                 kw = keyword(key, e, c->c_arena);
2853                 if (!kw)
2854                     return NULL;
2855                 asdl_seq_SET(keywords, nkeywords++, kw);
2856             }
2857         }
2858     }
2859 
2860     return Call(func, args, keywords, func->lineno, func->col_offset, c->c_arena);
2861 }
2862 
2863 static expr_ty
ast_for_testlist(struct compiling * c,const node * n)2864 ast_for_testlist(struct compiling *c, const node* n)
2865 {
2866     /* testlist_comp: test (comp_for | (',' test)* [',']) */
2867     /* testlist: test (',' test)* [','] */
2868     assert(NCH(n) > 0);
2869     if (TYPE(n) == testlist_comp) {
2870         if (NCH(n) > 1)
2871             assert(TYPE(CHILD(n, 1)) != comp_for);
2872     }
2873     else {
2874         assert(TYPE(n) == testlist ||
2875                TYPE(n) == testlist_star_expr);
2876     }
2877     if (NCH(n) == 1)
2878         return ast_for_expr(c, CHILD(n, 0));
2879     else {
2880         asdl_seq *tmp = seq_for_testlist(c, n);
2881         if (!tmp)
2882             return NULL;
2883         return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
2884     }
2885 }
2886 
2887 static stmt_ty
ast_for_expr_stmt(struct compiling * c,const node * n)2888 ast_for_expr_stmt(struct compiling *c, const node *n)
2889 {
2890     REQ(n, expr_stmt);
2891     /* expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
2892                             ('=' (yield_expr|testlist_star_expr))*)
2893        annassign: ':' test ['=' test]
2894        testlist_star_expr: (test|star_expr) (',' test|star_expr)* [',']
2895        augassign: '+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^='
2896                 | '<<=' | '>>=' | '**=' | '//='
2897        test: ... here starts the operator precedence dance
2898      */
2899 
2900     if (NCH(n) == 1) {
2901         expr_ty e = ast_for_testlist(c, CHILD(n, 0));
2902         if (!e)
2903             return NULL;
2904 
2905         return Expr(e, LINENO(n), n->n_col_offset, c->c_arena);
2906     }
2907     else if (TYPE(CHILD(n, 1)) == augassign) {
2908         expr_ty expr1, expr2;
2909         operator_ty newoperator;
2910         node *ch = CHILD(n, 0);
2911 
2912         expr1 = ast_for_testlist(c, ch);
2913         if (!expr1)
2914             return NULL;
2915         if(!set_context(c, expr1, Store, ch))
2916             return NULL;
2917         /* set_context checks that most expressions are not the left side.
2918           Augmented assignments can only have a name, a subscript, or an
2919           attribute on the left, though, so we have to explicitly check for
2920           those. */
2921         switch (expr1->kind) {
2922             case Name_kind:
2923             case Attribute_kind:
2924             case Subscript_kind:
2925                 break;
2926             default:
2927                 ast_error(c, ch, "illegal expression for augmented assignment");
2928                 return NULL;
2929         }
2930 
2931         ch = CHILD(n, 2);
2932         if (TYPE(ch) == testlist)
2933             expr2 = ast_for_testlist(c, ch);
2934         else
2935             expr2 = ast_for_expr(c, ch);
2936         if (!expr2)
2937             return NULL;
2938 
2939         newoperator = ast_for_augassign(c, CHILD(n, 1));
2940         if (!newoperator)
2941             return NULL;
2942 
2943         return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset, c->c_arena);
2944     }
2945     else if (TYPE(CHILD(n, 1)) == annassign) {
2946         expr_ty expr1, expr2, expr3;
2947         node *ch = CHILD(n, 0);
2948         node *deep, *ann = CHILD(n, 1);
2949         int simple = 1;
2950 
2951         /* we keep track of parens to qualify (x) as expression not name */
2952         deep = ch;
2953         while (NCH(deep) == 1) {
2954             deep = CHILD(deep, 0);
2955         }
2956         if (NCH(deep) > 0 && TYPE(CHILD(deep, 0)) == LPAR) {
2957             simple = 0;
2958         }
2959         expr1 = ast_for_testlist(c, ch);
2960         if (!expr1) {
2961             return NULL;
2962         }
2963         switch (expr1->kind) {
2964             case Name_kind:
2965                 if (forbidden_name(c, expr1->v.Name.id, n, 0)) {
2966                     return NULL;
2967                 }
2968                 expr1->v.Name.ctx = Store;
2969                 break;
2970             case Attribute_kind:
2971                 if (forbidden_name(c, expr1->v.Attribute.attr, n, 1)) {
2972                     return NULL;
2973                 }
2974                 expr1->v.Attribute.ctx = Store;
2975                 break;
2976             case Subscript_kind:
2977                 expr1->v.Subscript.ctx = Store;
2978                 break;
2979             case List_kind:
2980                 ast_error(c, ch,
2981                           "only single target (not list) can be annotated");
2982                 return NULL;
2983             case Tuple_kind:
2984                 ast_error(c, ch,
2985                           "only single target (not tuple) can be annotated");
2986                 return NULL;
2987             default:
2988                 ast_error(c, ch,
2989                           "illegal target for annotation");
2990                 return NULL;
2991         }
2992 
2993         if (expr1->kind != Name_kind) {
2994             simple = 0;
2995         }
2996         ch = CHILD(ann, 1);
2997         expr2 = ast_for_expr(c, ch);
2998         if (!expr2) {
2999             return NULL;
3000         }
3001         if (NCH(ann) == 2) {
3002             return AnnAssign(expr1, expr2, NULL, simple,
3003                              LINENO(n), n->n_col_offset, c->c_arena);
3004         }
3005         else {
3006             ch = CHILD(ann, 3);
3007             expr3 = ast_for_expr(c, ch);
3008             if (!expr3) {
3009                 return NULL;
3010             }
3011             return AnnAssign(expr1, expr2, expr3, simple,
3012                              LINENO(n), n->n_col_offset, c->c_arena);
3013         }
3014     }
3015     else {
3016         int i;
3017         asdl_seq *targets;
3018         node *value;
3019         expr_ty expression;
3020 
3021         /* a normal assignment */
3022         REQ(CHILD(n, 1), EQUAL);
3023         targets = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3024         if (!targets)
3025             return NULL;
3026         for (i = 0; i < NCH(n) - 2; i += 2) {
3027             expr_ty e;
3028             node *ch = CHILD(n, i);
3029             if (TYPE(ch) == yield_expr) {
3030                 ast_error(c, ch, "assignment to yield expression not possible");
3031                 return NULL;
3032             }
3033             e = ast_for_testlist(c, ch);
3034             if (!e)
3035               return NULL;
3036 
3037             /* set context to assign */
3038             if (!set_context(c, e, Store, CHILD(n, i)))
3039               return NULL;
3040 
3041             asdl_seq_SET(targets, i / 2, e);
3042         }
3043         value = CHILD(n, NCH(n) - 1);
3044         if (TYPE(value) == testlist_star_expr)
3045             expression = ast_for_testlist(c, value);
3046         else
3047             expression = ast_for_expr(c, value);
3048         if (!expression)
3049             return NULL;
3050         return Assign(targets, expression, LINENO(n), n->n_col_offset, c->c_arena);
3051     }
3052 }
3053 
3054 
3055 static asdl_seq *
ast_for_exprlist(struct compiling * c,const node * n,expr_context_ty context)3056 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
3057 {
3058     asdl_seq *seq;
3059     int i;
3060     expr_ty e;
3061 
3062     REQ(n, exprlist);
3063 
3064     seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3065     if (!seq)
3066         return NULL;
3067     for (i = 0; i < NCH(n); i += 2) {
3068         e = ast_for_expr(c, CHILD(n, i));
3069         if (!e)
3070             return NULL;
3071         asdl_seq_SET(seq, i / 2, e);
3072         if (context && !set_context(c, e, context, CHILD(n, i)))
3073             return NULL;
3074     }
3075     return seq;
3076 }
3077 
3078 static stmt_ty
ast_for_del_stmt(struct compiling * c,const node * n)3079 ast_for_del_stmt(struct compiling *c, const node *n)
3080 {
3081     asdl_seq *expr_list;
3082 
3083     /* del_stmt: 'del' exprlist */
3084     REQ(n, del_stmt);
3085 
3086     expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
3087     if (!expr_list)
3088         return NULL;
3089     return Delete(expr_list, LINENO(n), n->n_col_offset, c->c_arena);
3090 }
3091 
3092 static stmt_ty
ast_for_flow_stmt(struct compiling * c,const node * n)3093 ast_for_flow_stmt(struct compiling *c, const node *n)
3094 {
3095     /*
3096       flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
3097                  | yield_stmt
3098       break_stmt: 'break'
3099       continue_stmt: 'continue'
3100       return_stmt: 'return' [testlist]
3101       yield_stmt: yield_expr
3102       yield_expr: 'yield' testlist | 'yield' 'from' test
3103       raise_stmt: 'raise' [test [',' test [',' test]]]
3104     */
3105     node *ch;
3106 
3107     REQ(n, flow_stmt);
3108     ch = CHILD(n, 0);
3109     switch (TYPE(ch)) {
3110         case break_stmt:
3111             return Break(LINENO(n), n->n_col_offset, c->c_arena);
3112         case continue_stmt:
3113             return Continue(LINENO(n), n->n_col_offset, c->c_arena);
3114         case yield_stmt: { /* will reduce to yield_expr */
3115             expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
3116             if (!exp)
3117                 return NULL;
3118             return Expr(exp, LINENO(n), n->n_col_offset, c->c_arena);
3119         }
3120         case return_stmt:
3121             if (NCH(ch) == 1)
3122                 return Return(NULL, LINENO(n), n->n_col_offset, c->c_arena);
3123             else {
3124                 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
3125                 if (!expression)
3126                     return NULL;
3127                 return Return(expression, LINENO(n), n->n_col_offset, c->c_arena);
3128             }
3129         case raise_stmt:
3130             if (NCH(ch) == 1)
3131                 return Raise(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
3132             else if (NCH(ch) >= 2) {
3133                 expr_ty cause = NULL;
3134                 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
3135                 if (!expression)
3136                     return NULL;
3137                 if (NCH(ch) == 4) {
3138                     cause = ast_for_expr(c, CHILD(ch, 3));
3139                     if (!cause)
3140                         return NULL;
3141                 }
3142                 return Raise(expression, cause, LINENO(n), n->n_col_offset, c->c_arena);
3143             }
3144             /* fall through */
3145         default:
3146             PyErr_Format(PyExc_SystemError,
3147                          "unexpected flow_stmt: %d", TYPE(ch));
3148             return NULL;
3149     }
3150 }
3151 
3152 static alias_ty
alias_for_import_name(struct compiling * c,const node * n,int store)3153 alias_for_import_name(struct compiling *c, const node *n, int store)
3154 {
3155     /*
3156       import_as_name: NAME ['as' NAME]
3157       dotted_as_name: dotted_name ['as' NAME]
3158       dotted_name: NAME ('.' NAME)*
3159     */
3160     identifier str, name;
3161 
3162  loop:
3163     switch (TYPE(n)) {
3164         case import_as_name: {
3165             node *name_node = CHILD(n, 0);
3166             str = NULL;
3167             name = NEW_IDENTIFIER(name_node);
3168             if (!name)
3169                 return NULL;
3170             if (NCH(n) == 3) {
3171                 node *str_node = CHILD(n, 2);
3172                 str = NEW_IDENTIFIER(str_node);
3173                 if (!str)
3174                     return NULL;
3175                 if (store && forbidden_name(c, str, str_node, 0))
3176                     return NULL;
3177             }
3178             else {
3179                 if (forbidden_name(c, name, name_node, 0))
3180                     return NULL;
3181             }
3182             return alias(name, str, c->c_arena);
3183         }
3184         case dotted_as_name:
3185             if (NCH(n) == 1) {
3186                 n = CHILD(n, 0);
3187                 goto loop;
3188             }
3189             else {
3190                 node *asname_node = CHILD(n, 2);
3191                 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
3192                 if (!a)
3193                     return NULL;
3194                 assert(!a->asname);
3195                 a->asname = NEW_IDENTIFIER(asname_node);
3196                 if (!a->asname)
3197                     return NULL;
3198                 if (forbidden_name(c, a->asname, asname_node, 0))
3199                     return NULL;
3200                 return a;
3201             }
3202             break;
3203         case dotted_name:
3204             if (NCH(n) == 1) {
3205                 node *name_node = CHILD(n, 0);
3206                 name = NEW_IDENTIFIER(name_node);
3207                 if (!name)
3208                     return NULL;
3209                 if (store && forbidden_name(c, name, name_node, 0))
3210                     return NULL;
3211                 return alias(name, NULL, c->c_arena);
3212             }
3213             else {
3214                 /* Create a string of the form "a.b.c" */
3215                 int i;
3216                 size_t len;
3217                 char *s;
3218                 PyObject *uni;
3219 
3220                 len = 0;
3221                 for (i = 0; i < NCH(n); i += 2)
3222                     /* length of string plus one for the dot */
3223                     len += strlen(STR(CHILD(n, i))) + 1;
3224                 len--; /* the last name doesn't have a dot */
3225                 str = PyBytes_FromStringAndSize(NULL, len);
3226                 if (!str)
3227                     return NULL;
3228                 s = PyBytes_AS_STRING(str);
3229                 if (!s)
3230                     return NULL;
3231                 for (i = 0; i < NCH(n); i += 2) {
3232                     char *sch = STR(CHILD(n, i));
3233                     strcpy(s, STR(CHILD(n, i)));
3234                     s += strlen(sch);
3235                     *s++ = '.';
3236                 }
3237                 --s;
3238                 *s = '\0';
3239                 uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str),
3240                                            PyBytes_GET_SIZE(str),
3241                                            NULL);
3242                 Py_DECREF(str);
3243                 if (!uni)
3244                     return NULL;
3245                 str = uni;
3246                 PyUnicode_InternInPlace(&str);
3247                 if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3248                     Py_DECREF(str);
3249                     return NULL;
3250                 }
3251                 return alias(str, NULL, c->c_arena);
3252             }
3253             break;
3254         case STAR:
3255             str = PyUnicode_InternFromString("*");
3256             if (!str)
3257                 return NULL;
3258             if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3259                 Py_DECREF(str);
3260                 return NULL;
3261             }
3262             return alias(str, NULL, c->c_arena);
3263         default:
3264             PyErr_Format(PyExc_SystemError,
3265                          "unexpected import name: %d", TYPE(n));
3266             return NULL;
3267     }
3268 
3269     PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
3270     return NULL;
3271 }
3272 
3273 static stmt_ty
ast_for_import_stmt(struct compiling * c,const node * n)3274 ast_for_import_stmt(struct compiling *c, const node *n)
3275 {
3276     /*
3277       import_stmt: import_name | import_from
3278       import_name: 'import' dotted_as_names
3279       import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+)
3280                    'import' ('*' | '(' import_as_names ')' | import_as_names)
3281     */
3282     int lineno;
3283     int col_offset;
3284     int i;
3285     asdl_seq *aliases;
3286 
3287     REQ(n, import_stmt);
3288     lineno = LINENO(n);
3289     col_offset = n->n_col_offset;
3290     n = CHILD(n, 0);
3291     if (TYPE(n) == import_name) {
3292         n = CHILD(n, 1);
3293         REQ(n, dotted_as_names);
3294         aliases = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3295         if (!aliases)
3296                 return NULL;
3297         for (i = 0; i < NCH(n); i += 2) {
3298             alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3299             if (!import_alias)
3300                 return NULL;
3301             asdl_seq_SET(aliases, i / 2, import_alias);
3302         }
3303         return Import(aliases, lineno, col_offset, c->c_arena);
3304     }
3305     else if (TYPE(n) == import_from) {
3306         int n_children;
3307         int idx, ndots = 0;
3308         alias_ty mod = NULL;
3309         identifier modname = NULL;
3310 
3311        /* Count the number of dots (for relative imports) and check for the
3312           optional module name */
3313         for (idx = 1; idx < NCH(n); idx++) {
3314             if (TYPE(CHILD(n, idx)) == dotted_name) {
3315                 mod = alias_for_import_name(c, CHILD(n, idx), 0);
3316                 if (!mod)
3317                     return NULL;
3318                 idx++;
3319                 break;
3320             } else if (TYPE(CHILD(n, idx)) == ELLIPSIS) {
3321                 /* three consecutive dots are tokenized as one ELLIPSIS */
3322                 ndots += 3;
3323                 continue;
3324             } else if (TYPE(CHILD(n, idx)) != DOT) {
3325                 break;
3326             }
3327             ndots++;
3328         }
3329         idx++; /* skip over the 'import' keyword */
3330         switch (TYPE(CHILD(n, idx))) {
3331         case STAR:
3332             /* from ... import * */
3333             n = CHILD(n, idx);
3334             n_children = 1;
3335             break;
3336         case LPAR:
3337             /* from ... import (x, y, z) */
3338             n = CHILD(n, idx + 1);
3339             n_children = NCH(n);
3340             break;
3341         case import_as_names:
3342             /* from ... import x, y, z */
3343             n = CHILD(n, idx);
3344             n_children = NCH(n);
3345             if (n_children % 2 == 0) {
3346                 ast_error(c, n, "trailing comma not allowed without"
3347                              " surrounding parentheses");
3348                 return NULL;
3349             }
3350             break;
3351         default:
3352             ast_error(c, n, "Unexpected node-type in from-import");
3353             return NULL;
3354         }
3355 
3356         aliases = _Py_asdl_seq_new((n_children + 1) / 2, c->c_arena);
3357         if (!aliases)
3358             return NULL;
3359 
3360         /* handle "from ... import *" special b/c there's no children */
3361         if (TYPE(n) == STAR) {
3362             alias_ty import_alias = alias_for_import_name(c, n, 1);
3363             if (!import_alias)
3364                 return NULL;
3365             asdl_seq_SET(aliases, 0, import_alias);
3366         }
3367         else {
3368             for (i = 0; i < NCH(n); i += 2) {
3369                 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3370                 if (!import_alias)
3371                     return NULL;
3372                 asdl_seq_SET(aliases, i / 2, import_alias);
3373             }
3374         }
3375         if (mod != NULL)
3376             modname = mod->name;
3377         return ImportFrom(modname, aliases, ndots, lineno, col_offset,
3378                           c->c_arena);
3379     }
3380     PyErr_Format(PyExc_SystemError,
3381                  "unknown import statement: starts with command '%s'",
3382                  STR(CHILD(n, 0)));
3383     return NULL;
3384 }
3385 
3386 static stmt_ty
ast_for_global_stmt(struct compiling * c,const node * n)3387 ast_for_global_stmt(struct compiling *c, const node *n)
3388 {
3389     /* global_stmt: 'global' NAME (',' NAME)* */
3390     identifier name;
3391     asdl_seq *s;
3392     int i;
3393 
3394     REQ(n, global_stmt);
3395     s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3396     if (!s)
3397         return NULL;
3398     for (i = 1; i < NCH(n); i += 2) {
3399         name = NEW_IDENTIFIER(CHILD(n, i));
3400         if (!name)
3401             return NULL;
3402         asdl_seq_SET(s, i / 2, name);
3403     }
3404     return Global(s, LINENO(n), n->n_col_offset, c->c_arena);
3405 }
3406 
3407 static stmt_ty
ast_for_nonlocal_stmt(struct compiling * c,const node * n)3408 ast_for_nonlocal_stmt(struct compiling *c, const node *n)
3409 {
3410     /* nonlocal_stmt: 'nonlocal' NAME (',' NAME)* */
3411     identifier name;
3412     asdl_seq *s;
3413     int i;
3414 
3415     REQ(n, nonlocal_stmt);
3416     s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3417     if (!s)
3418         return NULL;
3419     for (i = 1; i < NCH(n); i += 2) {
3420         name = NEW_IDENTIFIER(CHILD(n, i));
3421         if (!name)
3422             return NULL;
3423         asdl_seq_SET(s, i / 2, name);
3424     }
3425     return Nonlocal(s, LINENO(n), n->n_col_offset, c->c_arena);
3426 }
3427 
3428 static stmt_ty
ast_for_assert_stmt(struct compiling * c,const node * n)3429 ast_for_assert_stmt(struct compiling *c, const node *n)
3430 {
3431     /* assert_stmt: 'assert' test [',' test] */
3432     REQ(n, assert_stmt);
3433     if (NCH(n) == 2) {
3434         expr_ty expression = ast_for_expr(c, CHILD(n, 1));
3435         if (!expression)
3436             return NULL;
3437         return Assert(expression, NULL, LINENO(n), n->n_col_offset, c->c_arena);
3438     }
3439     else if (NCH(n) == 4) {
3440         expr_ty expr1, expr2;
3441 
3442         expr1 = ast_for_expr(c, CHILD(n, 1));
3443         if (!expr1)
3444             return NULL;
3445         expr2 = ast_for_expr(c, CHILD(n, 3));
3446         if (!expr2)
3447             return NULL;
3448 
3449         return Assert(expr1, expr2, LINENO(n), n->n_col_offset, c->c_arena);
3450     }
3451     PyErr_Format(PyExc_SystemError,
3452                  "improper number of parts to 'assert' statement: %d",
3453                  NCH(n));
3454     return NULL;
3455 }
3456 
3457 static asdl_seq *
ast_for_suite(struct compiling * c,const node * n)3458 ast_for_suite(struct compiling *c, const node *n)
3459 {
3460     /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
3461     asdl_seq *seq;
3462     stmt_ty s;
3463     int i, total, num, end, pos = 0;
3464     node *ch;
3465 
3466     REQ(n, suite);
3467 
3468     total = num_stmts(n);
3469     seq = _Py_asdl_seq_new(total, c->c_arena);
3470     if (!seq)
3471         return NULL;
3472     if (TYPE(CHILD(n, 0)) == simple_stmt) {
3473         n = CHILD(n, 0);
3474         /* simple_stmt always ends with a NEWLINE,
3475            and may have a trailing SEMI
3476         */
3477         end = NCH(n) - 1;
3478         if (TYPE(CHILD(n, end - 1)) == SEMI)
3479             end--;
3480         /* loop by 2 to skip semi-colons */
3481         for (i = 0; i < end; i += 2) {
3482             ch = CHILD(n, i);
3483             s = ast_for_stmt(c, ch);
3484             if (!s)
3485                 return NULL;
3486             asdl_seq_SET(seq, pos++, s);
3487         }
3488     }
3489     else {
3490         for (i = 2; i < (NCH(n) - 1); i++) {
3491             ch = CHILD(n, i);
3492             REQ(ch, stmt);
3493             num = num_stmts(ch);
3494             if (num == 1) {
3495                 /* small_stmt or compound_stmt with only one child */
3496                 s = ast_for_stmt(c, ch);
3497                 if (!s)
3498                     return NULL;
3499                 asdl_seq_SET(seq, pos++, s);
3500             }
3501             else {
3502                 int j;
3503                 ch = CHILD(ch, 0);
3504                 REQ(ch, simple_stmt);
3505                 for (j = 0; j < NCH(ch); j += 2) {
3506                     /* statement terminates with a semi-colon ';' */
3507                     if (NCH(CHILD(ch, j)) == 0) {
3508                         assert((j + 1) == NCH(ch));
3509                         break;
3510                     }
3511                     s = ast_for_stmt(c, CHILD(ch, j));
3512                     if (!s)
3513                         return NULL;
3514                     asdl_seq_SET(seq, pos++, s);
3515                 }
3516             }
3517         }
3518     }
3519     assert(pos == seq->size);
3520     return seq;
3521 }
3522 
3523 static stmt_ty
ast_for_if_stmt(struct compiling * c,const node * n)3524 ast_for_if_stmt(struct compiling *c, const node *n)
3525 {
3526     /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
3527        ['else' ':' suite]
3528     */
3529     char *s;
3530 
3531     REQ(n, if_stmt);
3532 
3533     if (NCH(n) == 4) {
3534         expr_ty expression;
3535         asdl_seq *suite_seq;
3536 
3537         expression = ast_for_expr(c, CHILD(n, 1));
3538         if (!expression)
3539             return NULL;
3540         suite_seq = ast_for_suite(c, CHILD(n, 3));
3541         if (!suite_seq)
3542             return NULL;
3543 
3544         return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
3545                   c->c_arena);
3546     }
3547 
3548     s = STR(CHILD(n, 4));
3549     /* s[2], the third character in the string, will be
3550        's' for el_s_e, or
3551        'i' for el_i_f
3552     */
3553     if (s[2] == 's') {
3554         expr_ty expression;
3555         asdl_seq *seq1, *seq2;
3556 
3557         expression = ast_for_expr(c, CHILD(n, 1));
3558         if (!expression)
3559             return NULL;
3560         seq1 = ast_for_suite(c, CHILD(n, 3));
3561         if (!seq1)
3562             return NULL;
3563         seq2 = ast_for_suite(c, CHILD(n, 6));
3564         if (!seq2)
3565             return NULL;
3566 
3567         return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
3568                   c->c_arena);
3569     }
3570     else if (s[2] == 'i') {
3571         int i, n_elif, has_else = 0;
3572         expr_ty expression;
3573         asdl_seq *suite_seq;
3574         asdl_seq *orelse = NULL;
3575         n_elif = NCH(n) - 4;
3576         /* must reference the child n_elif+1 since 'else' token is third,
3577            not fourth, child from the end. */
3578         if (TYPE(CHILD(n, (n_elif + 1))) == NAME
3579             && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
3580             has_else = 1;
3581             n_elif -= 3;
3582         }
3583         n_elif /= 4;
3584 
3585         if (has_else) {
3586             asdl_seq *suite_seq2;
3587 
3588             orelse = _Py_asdl_seq_new(1, c->c_arena);
3589             if (!orelse)
3590                 return NULL;
3591             expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
3592             if (!expression)
3593                 return NULL;
3594             suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
3595             if (!suite_seq)
3596                 return NULL;
3597             suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
3598             if (!suite_seq2)
3599                 return NULL;
3600 
3601             asdl_seq_SET(orelse, 0,
3602                          If(expression, suite_seq, suite_seq2,
3603                             LINENO(CHILD(n, NCH(n) - 6)),
3604                             CHILD(n, NCH(n) - 6)->n_col_offset,
3605                             c->c_arena));
3606             /* the just-created orelse handled the last elif */
3607             n_elif--;
3608         }
3609 
3610         for (i = 0; i < n_elif; i++) {
3611             int off = 5 + (n_elif - i - 1) * 4;
3612             asdl_seq *newobj = _Py_asdl_seq_new(1, c->c_arena);
3613             if (!newobj)
3614                 return NULL;
3615             expression = ast_for_expr(c, CHILD(n, off));
3616             if (!expression)
3617                 return NULL;
3618             suite_seq = ast_for_suite(c, CHILD(n, off + 2));
3619             if (!suite_seq)
3620                 return NULL;
3621 
3622             asdl_seq_SET(newobj, 0,
3623                          If(expression, suite_seq, orelse,
3624                             LINENO(CHILD(n, off)),
3625                             CHILD(n, off)->n_col_offset, c->c_arena));
3626             orelse = newobj;
3627         }
3628         expression = ast_for_expr(c, CHILD(n, 1));
3629         if (!expression)
3630             return NULL;
3631         suite_seq = ast_for_suite(c, CHILD(n, 3));
3632         if (!suite_seq)
3633             return NULL;
3634         return If(expression, suite_seq, orelse,
3635                   LINENO(n), n->n_col_offset, c->c_arena);
3636     }
3637 
3638     PyErr_Format(PyExc_SystemError,
3639                  "unexpected token in 'if' statement: %s", s);
3640     return NULL;
3641 }
3642 
3643 static stmt_ty
ast_for_while_stmt(struct compiling * c,const node * n)3644 ast_for_while_stmt(struct compiling *c, const node *n)
3645 {
3646     /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
3647     REQ(n, while_stmt);
3648 
3649     if (NCH(n) == 4) {
3650         expr_ty expression;
3651         asdl_seq *suite_seq;
3652 
3653         expression = ast_for_expr(c, CHILD(n, 1));
3654         if (!expression)
3655             return NULL;
3656         suite_seq = ast_for_suite(c, CHILD(n, 3));
3657         if (!suite_seq)
3658             return NULL;
3659         return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset, c->c_arena);
3660     }
3661     else if (NCH(n) == 7) {
3662         expr_ty expression;
3663         asdl_seq *seq1, *seq2;
3664 
3665         expression = ast_for_expr(c, CHILD(n, 1));
3666         if (!expression)
3667             return NULL;
3668         seq1 = ast_for_suite(c, CHILD(n, 3));
3669         if (!seq1)
3670             return NULL;
3671         seq2 = ast_for_suite(c, CHILD(n, 6));
3672         if (!seq2)
3673             return NULL;
3674 
3675         return While(expression, seq1, seq2, LINENO(n), n->n_col_offset, c->c_arena);
3676     }
3677 
3678     PyErr_Format(PyExc_SystemError,
3679                  "wrong number of tokens for 'while' statement: %d",
3680                  NCH(n));
3681     return NULL;
3682 }
3683 
3684 static stmt_ty
ast_for_for_stmt(struct compiling * c,const node * n0,bool is_async)3685 ast_for_for_stmt(struct compiling *c, const node *n0, bool is_async)
3686 {
3687     const node * const n = is_async ? CHILD(n0, 1) : n0;
3688     asdl_seq *_target, *seq = NULL, *suite_seq;
3689     expr_ty expression;
3690     expr_ty target, first;
3691     const node *node_target;
3692     /* for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] */
3693     REQ(n, for_stmt);
3694 
3695     if (NCH(n) == 9) {
3696         seq = ast_for_suite(c, CHILD(n, 8));
3697         if (!seq)
3698             return NULL;
3699     }
3700 
3701     node_target = CHILD(n, 1);
3702     _target = ast_for_exprlist(c, node_target, Store);
3703     if (!_target)
3704         return NULL;
3705     /* Check the # of children rather than the length of _target, since
3706        for x, in ... has 1 element in _target, but still requires a Tuple. */
3707     first = (expr_ty)asdl_seq_GET(_target, 0);
3708     if (NCH(node_target) == 1)
3709         target = first;
3710     else
3711         target = Tuple(_target, Store, first->lineno, first->col_offset, c->c_arena);
3712 
3713     expression = ast_for_testlist(c, CHILD(n, 3));
3714     if (!expression)
3715         return NULL;
3716     suite_seq = ast_for_suite(c, CHILD(n, 5));
3717     if (!suite_seq)
3718         return NULL;
3719 
3720     if (is_async)
3721         return AsyncFor(target, expression, suite_seq, seq,
3722                         LINENO(n0), n0->n_col_offset,
3723                         c->c_arena);
3724     else
3725         return For(target, expression, suite_seq, seq,
3726                    LINENO(n), n->n_col_offset,
3727                    c->c_arena);
3728 }
3729 
3730 static excepthandler_ty
ast_for_except_clause(struct compiling * c,const node * exc,node * body)3731 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
3732 {
3733     /* except_clause: 'except' [test ['as' test]] */
3734     REQ(exc, except_clause);
3735     REQ(body, suite);
3736 
3737     if (NCH(exc) == 1) {
3738         asdl_seq *suite_seq = ast_for_suite(c, body);
3739         if (!suite_seq)
3740             return NULL;
3741 
3742         return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
3743                              exc->n_col_offset, c->c_arena);
3744     }
3745     else if (NCH(exc) == 2) {
3746         expr_ty expression;
3747         asdl_seq *suite_seq;
3748 
3749         expression = ast_for_expr(c, CHILD(exc, 1));
3750         if (!expression)
3751             return NULL;
3752         suite_seq = ast_for_suite(c, body);
3753         if (!suite_seq)
3754             return NULL;
3755 
3756         return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
3757                              exc->n_col_offset, c->c_arena);
3758     }
3759     else if (NCH(exc) == 4) {
3760         asdl_seq *suite_seq;
3761         expr_ty expression;
3762         identifier e = NEW_IDENTIFIER(CHILD(exc, 3));
3763         if (!e)
3764             return NULL;
3765         if (forbidden_name(c, e, CHILD(exc, 3), 0))
3766             return NULL;
3767         expression = ast_for_expr(c, CHILD(exc, 1));
3768         if (!expression)
3769             return NULL;
3770         suite_seq = ast_for_suite(c, body);
3771         if (!suite_seq)
3772             return NULL;
3773 
3774         return ExceptHandler(expression, e, suite_seq, LINENO(exc),
3775                              exc->n_col_offset, c->c_arena);
3776     }
3777 
3778     PyErr_Format(PyExc_SystemError,
3779                  "wrong number of children for 'except' clause: %d",
3780                  NCH(exc));
3781     return NULL;
3782 }
3783 
3784 static stmt_ty
ast_for_try_stmt(struct compiling * c,const node * n)3785 ast_for_try_stmt(struct compiling *c, const node *n)
3786 {
3787     const int nch = NCH(n);
3788     int n_except = (nch - 3)/3;
3789     asdl_seq *body, *handlers = NULL, *orelse = NULL, *finally = NULL;
3790 
3791     REQ(n, try_stmt);
3792 
3793     body = ast_for_suite(c, CHILD(n, 2));
3794     if (body == NULL)
3795         return NULL;
3796 
3797     if (TYPE(CHILD(n, nch - 3)) == NAME) {
3798         if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
3799             if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
3800                 /* we can assume it's an "else",
3801                    because nch >= 9 for try-else-finally and
3802                    it would otherwise have a type of except_clause */
3803                 orelse = ast_for_suite(c, CHILD(n, nch - 4));
3804                 if (orelse == NULL)
3805                     return NULL;
3806                 n_except--;
3807             }
3808 
3809             finally = ast_for_suite(c, CHILD(n, nch - 1));
3810             if (finally == NULL)
3811                 return NULL;
3812             n_except--;
3813         }
3814         else {
3815             /* we can assume it's an "else",
3816                otherwise it would have a type of except_clause */
3817             orelse = ast_for_suite(c, CHILD(n, nch - 1));
3818             if (orelse == NULL)
3819                 return NULL;
3820             n_except--;
3821         }
3822     }
3823     else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
3824         ast_error(c, n, "malformed 'try' statement");
3825         return NULL;
3826     }
3827 
3828     if (n_except > 0) {
3829         int i;
3830         /* process except statements to create a try ... except */
3831         handlers = _Py_asdl_seq_new(n_except, c->c_arena);
3832         if (handlers == NULL)
3833             return NULL;
3834 
3835         for (i = 0; i < n_except; i++) {
3836             excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
3837                                                        CHILD(n, 5 + i * 3));
3838             if (!e)
3839                 return NULL;
3840             asdl_seq_SET(handlers, i, e);
3841         }
3842     }
3843 
3844     assert(finally != NULL || asdl_seq_LEN(handlers));
3845     return Try(body, handlers, orelse, finally, LINENO(n), n->n_col_offset, c->c_arena);
3846 }
3847 
3848 /* with_item: test ['as' expr] */
3849 static withitem_ty
ast_for_with_item(struct compiling * c,const node * n)3850 ast_for_with_item(struct compiling *c, const node *n)
3851 {
3852     expr_ty context_expr, optional_vars = NULL;
3853 
3854     REQ(n, with_item);
3855     context_expr = ast_for_expr(c, CHILD(n, 0));
3856     if (!context_expr)
3857         return NULL;
3858     if (NCH(n) == 3) {
3859         optional_vars = ast_for_expr(c, CHILD(n, 2));
3860 
3861         if (!optional_vars) {
3862             return NULL;
3863         }
3864         if (!set_context(c, optional_vars, Store, n)) {
3865             return NULL;
3866         }
3867     }
3868 
3869     return withitem(context_expr, optional_vars, c->c_arena);
3870 }
3871 
3872 /* with_stmt: 'with' with_item (',' with_item)* ':' suite */
3873 static stmt_ty
ast_for_with_stmt(struct compiling * c,const node * n0,bool is_async)3874 ast_for_with_stmt(struct compiling *c, const node *n0, bool is_async)
3875 {
3876     const node * const n = is_async ? CHILD(n0, 1) : n0;
3877     int i, n_items;
3878     asdl_seq *items, *body;
3879 
3880     REQ(n, with_stmt);
3881 
3882     n_items = (NCH(n) - 2) / 2;
3883     items = _Py_asdl_seq_new(n_items, c->c_arena);
3884     if (!items)
3885         return NULL;
3886     for (i = 1; i < NCH(n) - 2; i += 2) {
3887         withitem_ty item = ast_for_with_item(c, CHILD(n, i));
3888         if (!item)
3889             return NULL;
3890         asdl_seq_SET(items, (i - 1) / 2, item);
3891     }
3892 
3893     body = ast_for_suite(c, CHILD(n, NCH(n) - 1));
3894     if (!body)
3895         return NULL;
3896 
3897     if (is_async)
3898         return AsyncWith(items, body, LINENO(n0), n0->n_col_offset, c->c_arena);
3899     else
3900         return With(items, body, LINENO(n), n->n_col_offset, c->c_arena);
3901 }
3902 
3903 static stmt_ty
ast_for_classdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)3904 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
3905 {
3906     /* classdef: 'class' NAME ['(' arglist ')'] ':' suite */
3907     PyObject *classname;
3908     asdl_seq *s;
3909     expr_ty call;
3910 
3911     REQ(n, classdef);
3912 
3913     if (NCH(n) == 4) { /* class NAME ':' suite */
3914         s = ast_for_suite(c, CHILD(n, 3));
3915         if (!s)
3916             return NULL;
3917         classname = NEW_IDENTIFIER(CHILD(n, 1));
3918         if (!classname)
3919             return NULL;
3920         if (forbidden_name(c, classname, CHILD(n, 3), 0))
3921             return NULL;
3922         return ClassDef(classname, NULL, NULL, s, decorator_seq,
3923                         LINENO(n), n->n_col_offset, c->c_arena);
3924     }
3925 
3926     if (TYPE(CHILD(n, 3)) == RPAR) { /* class NAME '(' ')' ':' suite */
3927         s = ast_for_suite(c, CHILD(n, 5));
3928         if (!s)
3929             return NULL;
3930         classname = NEW_IDENTIFIER(CHILD(n, 1));
3931         if (!classname)
3932             return NULL;
3933         if (forbidden_name(c, classname, CHILD(n, 3), 0))
3934             return NULL;
3935         return ClassDef(classname, NULL, NULL, s, decorator_seq,
3936                         LINENO(n), n->n_col_offset, c->c_arena);
3937     }
3938 
3939     /* class NAME '(' arglist ')' ':' suite */
3940     /* build up a fake Call node so we can extract its pieces */
3941     {
3942         PyObject *dummy_name;
3943         expr_ty dummy;
3944         dummy_name = NEW_IDENTIFIER(CHILD(n, 1));
3945         if (!dummy_name)
3946             return NULL;
3947         dummy = Name(dummy_name, Load, LINENO(n), n->n_col_offset, c->c_arena);
3948         call = ast_for_call(c, CHILD(n, 3), dummy, false);
3949         if (!call)
3950             return NULL;
3951     }
3952     s = ast_for_suite(c, CHILD(n, 6));
3953     if (!s)
3954         return NULL;
3955     classname = NEW_IDENTIFIER(CHILD(n, 1));
3956     if (!classname)
3957         return NULL;
3958     if (forbidden_name(c, classname, CHILD(n, 1), 0))
3959         return NULL;
3960 
3961     return ClassDef(classname, call->v.Call.args, call->v.Call.keywords, s,
3962                     decorator_seq, LINENO(n), n->n_col_offset, c->c_arena);
3963 }
3964 
3965 static stmt_ty
ast_for_stmt(struct compiling * c,const node * n)3966 ast_for_stmt(struct compiling *c, const node *n)
3967 {
3968     if (TYPE(n) == stmt) {
3969         assert(NCH(n) == 1);
3970         n = CHILD(n, 0);
3971     }
3972     if (TYPE(n) == simple_stmt) {
3973         assert(num_stmts(n) == 1);
3974         n = CHILD(n, 0);
3975     }
3976     if (TYPE(n) == small_stmt) {
3977         n = CHILD(n, 0);
3978         /* small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt
3979                   | import_stmt | global_stmt | nonlocal_stmt | assert_stmt
3980         */
3981         switch (TYPE(n)) {
3982             case expr_stmt:
3983                 return ast_for_expr_stmt(c, n);
3984             case del_stmt:
3985                 return ast_for_del_stmt(c, n);
3986             case pass_stmt:
3987                 return Pass(LINENO(n), n->n_col_offset, c->c_arena);
3988             case flow_stmt:
3989                 return ast_for_flow_stmt(c, n);
3990             case import_stmt:
3991                 return ast_for_import_stmt(c, n);
3992             case global_stmt:
3993                 return ast_for_global_stmt(c, n);
3994             case nonlocal_stmt:
3995                 return ast_for_nonlocal_stmt(c, n);
3996             case assert_stmt:
3997                 return ast_for_assert_stmt(c, n);
3998             default:
3999                 PyErr_Format(PyExc_SystemError,
4000                              "unhandled small_stmt: TYPE=%d NCH=%d\n",
4001                              TYPE(n), NCH(n));
4002                 return NULL;
4003         }
4004     }
4005     else {
4006         /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
4007                         | funcdef | classdef | decorated | async_stmt
4008         */
4009         node *ch = CHILD(n, 0);
4010         REQ(n, compound_stmt);
4011         switch (TYPE(ch)) {
4012             case if_stmt:
4013                 return ast_for_if_stmt(c, ch);
4014             case while_stmt:
4015                 return ast_for_while_stmt(c, ch);
4016             case for_stmt:
4017                 return ast_for_for_stmt(c, ch, 0);
4018             case try_stmt:
4019                 return ast_for_try_stmt(c, ch);
4020             case with_stmt:
4021                 return ast_for_with_stmt(c, ch, 0);
4022             case funcdef:
4023                 return ast_for_funcdef(c, ch, NULL);
4024             case classdef:
4025                 return ast_for_classdef(c, ch, NULL);
4026             case decorated:
4027                 return ast_for_decorated(c, ch);
4028             case async_stmt:
4029                 return ast_for_async_stmt(c, ch);
4030             default:
4031                 PyErr_Format(PyExc_SystemError,
4032                              "unhandled small_stmt: TYPE=%d NCH=%d\n",
4033                              TYPE(n), NCH(n));
4034                 return NULL;
4035         }
4036     }
4037 }
4038 
4039 static PyObject *
parsenumber_raw(struct compiling * c,const char * s)4040 parsenumber_raw(struct compiling *c, const char *s)
4041 {
4042     const char *end;
4043     long x;
4044     double dx;
4045     Py_complex compl;
4046     int imflag;
4047 
4048     assert(s != NULL);
4049     errno = 0;
4050     end = s + strlen(s) - 1;
4051     imflag = *end == 'j' || *end == 'J';
4052     if (s[0] == '0') {
4053         x = (long) PyOS_strtoul(s, (char **)&end, 0);
4054         if (x < 0 && errno == 0) {
4055             return PyLong_FromString(s, (char **)0, 0);
4056         }
4057     }
4058     else
4059         x = PyOS_strtol(s, (char **)&end, 0);
4060     if (*end == '\0') {
4061         if (errno != 0)
4062             return PyLong_FromString(s, (char **)0, 0);
4063         return PyLong_FromLong(x);
4064     }
4065     /* XXX Huge floats may silently fail */
4066     if (imflag) {
4067         compl.real = 0.;
4068         compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
4069         if (compl.imag == -1.0 && PyErr_Occurred())
4070             return NULL;
4071         return PyComplex_FromCComplex(compl);
4072     }
4073     else
4074     {
4075         dx = PyOS_string_to_double(s, NULL, NULL);
4076         if (dx == -1.0 && PyErr_Occurred())
4077             return NULL;
4078         return PyFloat_FromDouble(dx);
4079     }
4080 }
4081 
4082 static PyObject *
parsenumber(struct compiling * c,const char * s)4083 parsenumber(struct compiling *c, const char *s)
4084 {
4085     char *dup, *end;
4086     PyObject *res = NULL;
4087 
4088     assert(s != NULL);
4089 
4090     if (strchr(s, '_') == NULL) {
4091         return parsenumber_raw(c, s);
4092     }
4093     /* Create a duplicate without underscores. */
4094     dup = PyMem_Malloc(strlen(s) + 1);
4095     if (dup == NULL) {
4096         return PyErr_NoMemory();
4097     }
4098     end = dup;
4099     for (; *s; s++) {
4100         if (*s != '_') {
4101             *end++ = *s;
4102         }
4103     }
4104     *end = '\0';
4105     res = parsenumber_raw(c, dup);
4106     PyMem_Free(dup);
4107     return res;
4108 }
4109 
4110 static PyObject *
decode_utf8(struct compiling * c,const char ** sPtr,const char * end)4111 decode_utf8(struct compiling *c, const char **sPtr, const char *end)
4112 {
4113     const char *s, *t;
4114     t = s = *sPtr;
4115     /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
4116     while (s < end && (*s & 0x80)) s++;
4117     *sPtr = s;
4118     return PyUnicode_DecodeUTF8(t, s - t, NULL);
4119 }
4120 
4121 static int
warn_invalid_escape_sequence(struct compiling * c,const node * n,unsigned char first_invalid_escape_char)4122 warn_invalid_escape_sequence(struct compiling *c, const node *n,
4123                              unsigned char first_invalid_escape_char)
4124 {
4125     PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c",
4126                                          first_invalid_escape_char);
4127     if (msg == NULL) {
4128         return -1;
4129     }
4130     if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
4131                                    c->c_filename, LINENO(n),
4132                                    NULL, NULL) < 0)
4133     {
4134         if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
4135             const char *s;
4136 
4137             /* Replace the DeprecationWarning exception with a SyntaxError
4138                to get a more accurate error report */
4139             PyErr_Clear();
4140 
4141             s = PyUnicode_AsUTF8(msg);
4142             if (s != NULL) {
4143                 ast_error(c, n, s);
4144             }
4145         }
4146         Py_DECREF(msg);
4147         return -1;
4148     }
4149     Py_DECREF(msg);
4150     return 0;
4151 }
4152 
4153 static PyObject *
decode_unicode_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4154 decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
4155                             size_t len)
4156 {
4157     PyObject *v, *u;
4158     char *buf;
4159     char *p;
4160     const char *end;
4161 
4162     /* check for integer overflow */
4163     if (len > SIZE_MAX / 6)
4164         return NULL;
4165     /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
4166        "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
4167     u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
4168     if (u == NULL)
4169         return NULL;
4170     p = buf = PyBytes_AsString(u);
4171     end = s + len;
4172     while (s < end) {
4173         if (*s == '\\') {
4174             *p++ = *s++;
4175             if (s >= end || *s & 0x80) {
4176                 strcpy(p, "u005c");
4177                 p += 5;
4178                 if (s >= end)
4179                     break;
4180             }
4181         }
4182         if (*s & 0x80) { /* XXX inefficient */
4183             PyObject *w;
4184             int kind;
4185             void *data;
4186             Py_ssize_t len, i;
4187             w = decode_utf8(c, &s, end);
4188             if (w == NULL) {
4189                 Py_DECREF(u);
4190                 return NULL;
4191             }
4192             kind = PyUnicode_KIND(w);
4193             data = PyUnicode_DATA(w);
4194             len = PyUnicode_GET_LENGTH(w);
4195             for (i = 0; i < len; i++) {
4196                 Py_UCS4 chr = PyUnicode_READ(kind, data, i);
4197                 sprintf(p, "\\U%08x", chr);
4198                 p += 10;
4199             }
4200             /* Should be impossible to overflow */
4201             assert(p - buf <= PyBytes_GET_SIZE(u));
4202             Py_DECREF(w);
4203         } else {
4204             *p++ = *s++;
4205         }
4206     }
4207     len = p - buf;
4208     s = buf;
4209 
4210     const char *first_invalid_escape;
4211     v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
4212 
4213     if (v != NULL && first_invalid_escape != NULL) {
4214         if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4215             /* We have not decref u before because first_invalid_escape points
4216                inside u. */
4217             Py_XDECREF(u);
4218             Py_DECREF(v);
4219             return NULL;
4220         }
4221     }
4222     Py_XDECREF(u);
4223     return v;
4224 }
4225 
4226 static PyObject *
decode_bytes_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4227 decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
4228                           size_t len)
4229 {
4230     const char *first_invalid_escape;
4231     PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
4232                                              &first_invalid_escape);
4233     if (result == NULL)
4234         return NULL;
4235 
4236     if (first_invalid_escape != NULL) {
4237         if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4238             Py_DECREF(result);
4239             return NULL;
4240         }
4241     }
4242     return result;
4243 }
4244 
4245 /* Shift locations for the given node and all its children by adding `lineno`
4246    and `col_offset` to existing locations. */
fstring_shift_node_locations(node * n,int lineno,int col_offset)4247 static void fstring_shift_node_locations(node *n, int lineno, int col_offset)
4248 {
4249     n->n_col_offset = n->n_col_offset + col_offset;
4250     for (int i = 0; i < NCH(n); ++i) {
4251         if (n->n_lineno && n->n_lineno < CHILD(n, i)->n_lineno) {
4252             /* Shifting column offsets unnecessary if there's been newlines. */
4253             col_offset = 0;
4254         }
4255         fstring_shift_node_locations(CHILD(n, i), lineno, col_offset);
4256     }
4257     n->n_lineno = n->n_lineno + lineno;
4258 }
4259 
4260 /* Fix locations for the given node and its children.
4261 
4262    `parent` is the enclosing node.
4263    `n` is the node which locations are going to be fixed relative to parent.
4264    `expr_str` is the child node's string representation, including braces.
4265 */
4266 static void
fstring_fix_node_location(const node * parent,node * n,char * expr_str)4267 fstring_fix_node_location(const node *parent, node *n, char *expr_str)
4268 {
4269     char *substr = NULL;
4270     char *start;
4271     int lines = LINENO(parent) - 1;
4272     int cols = parent->n_col_offset;
4273     /* Find the full fstring to fix location information in `n`. */
4274     while (parent && parent->n_type != STRING)
4275         parent = parent->n_child;
4276     if (parent && parent->n_str) {
4277         substr = strstr(parent->n_str, expr_str);
4278         if (substr) {
4279             start = substr;
4280             while (start > parent->n_str) {
4281                 if (start[0] == '\n')
4282                     break;
4283                 start--;
4284             }
4285             cols += substr - start;
4286             /* Fix lineno in mulitline strings. */
4287             while ((substr = strchr(substr + 1, '\n')))
4288                 lines--;
4289         }
4290     }
4291     fstring_shift_node_locations(n, lines, cols);
4292 }
4293 
4294 /* Compile this expression in to an expr_ty.  Add parens around the
4295    expression, in order to allow leading spaces in the expression. */
4296 static expr_ty
fstring_compile_expr(const char * expr_start,const char * expr_end,struct compiling * c,const node * n)4297 fstring_compile_expr(const char *expr_start, const char *expr_end,
4298                      struct compiling *c, const node *n)
4299 
4300 {
4301     PyCompilerFlags cf;
4302     node *mod_n;
4303     mod_ty mod;
4304     char *str;
4305     Py_ssize_t len;
4306     const char *s;
4307 
4308     assert(expr_end >= expr_start);
4309     assert(*(expr_start-1) == '{');
4310     assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':');
4311 
4312     /* If the substring is all whitespace, it's an error.  We need to catch this
4313        here, and not when we call PyParser_SimpleParseStringFlagsFilename,
4314        because turning the expression '' in to '()' would go from being invalid
4315        to valid. */
4316     for (s = expr_start; s != expr_end; s++) {
4317         char c = *s;
4318         /* The Python parser ignores only the following whitespace
4319            characters (\r already is converted to \n). */
4320         if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) {
4321             break;
4322         }
4323     }
4324     if (s == expr_end) {
4325         ast_error(c, n, "f-string: empty expression not allowed");
4326         return NULL;
4327     }
4328 
4329     len = expr_end - expr_start;
4330     /* Allocate 3 extra bytes: open paren, close paren, null byte. */
4331     str = PyMem_RawMalloc(len + 3);
4332     if (str == NULL) {
4333         PyErr_NoMemory();
4334         return NULL;
4335     }
4336 
4337     str[0] = '(';
4338     memcpy(str+1, expr_start, len);
4339     str[len+1] = ')';
4340     str[len+2] = 0;
4341 
4342     cf.cf_flags = PyCF_ONLY_AST;
4343     mod_n = PyParser_SimpleParseStringFlagsFilename(str, "<fstring>",
4344                                                     Py_eval_input, 0);
4345     if (!mod_n) {
4346         PyMem_RawFree(str);
4347         return NULL;
4348     }
4349     /* Reuse str to find the correct column offset. */
4350     str[0] = '{';
4351     str[len+1] = '}';
4352     fstring_fix_node_location(n, mod_n, str);
4353     mod = PyAST_FromNode(mod_n, &cf, "<fstring>", c->c_arena);
4354     PyMem_RawFree(str);
4355     PyNode_Free(mod_n);
4356     if (!mod)
4357         return NULL;
4358     return mod->v.Expression.body;
4359 }
4360 
4361 /* Return -1 on error.
4362 
4363    Return 0 if we reached the end of the literal.
4364 
4365    Return 1 if we haven't reached the end of the literal, but we want
4366    the caller to process the literal up to this point. Used for
4367    doubled braces.
4368 */
4369 static int
fstring_find_literal(const char ** str,const char * end,int raw,PyObject ** literal,int recurse_lvl,struct compiling * c,const node * n)4370 fstring_find_literal(const char **str, const char *end, int raw,
4371                      PyObject **literal, int recurse_lvl,
4372                      struct compiling *c, const node *n)
4373 {
4374     /* Get any literal string. It ends when we hit an un-doubled left
4375        brace (which isn't part of a unicode name escape such as
4376        "\N{EULER CONSTANT}"), or the end of the string. */
4377 
4378     const char *s = *str;
4379     const char *literal_start = s;
4380     int result = 0;
4381 
4382     assert(*literal == NULL);
4383     while (s < end) {
4384         char ch = *s++;
4385         if (!raw && ch == '\\' && s < end) {
4386             ch = *s++;
4387             if (ch == 'N') {
4388                 if (s < end && *s++ == '{') {
4389                     while (s < end && *s++ != '}') {
4390                     }
4391                     continue;
4392                 }
4393                 break;
4394             }
4395             if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
4396                 return -1;
4397             }
4398         }
4399         if (ch == '{' || ch == '}') {
4400             /* Check for doubled braces, but only at the top level. If
4401                we checked at every level, then f'{0:{3}}' would fail
4402                with the two closing braces. */
4403             if (recurse_lvl == 0) {
4404                 if (s < end && *s == ch) {
4405                     /* We're going to tell the caller that the literal ends
4406                        here, but that they should continue scanning. But also
4407                        skip over the second brace when we resume scanning. */
4408                     *str = s + 1;
4409                     result = 1;
4410                     goto done;
4411                 }
4412 
4413                 /* Where a single '{' is the start of a new expression, a
4414                    single '}' is not allowed. */
4415                 if (ch == '}') {
4416                     *str = s - 1;
4417                     ast_error(c, n, "f-string: single '}' is not allowed");
4418                     return -1;
4419                 }
4420             }
4421             /* We're either at a '{', which means we're starting another
4422                expression; or a '}', which means we're at the end of this
4423                f-string (for a nested format_spec). */
4424             s--;
4425             break;
4426         }
4427     }
4428     *str = s;
4429     assert(s <= end);
4430     assert(s == end || *s == '{' || *s == '}');
4431 done:
4432     if (literal_start != s) {
4433         if (raw)
4434             *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
4435                                                     s - literal_start,
4436                                                     NULL, NULL);
4437         else
4438             *literal = decode_unicode_with_escapes(c, n, literal_start,
4439                                                    s - literal_start);
4440         if (!*literal)
4441             return -1;
4442     }
4443     return result;
4444 }
4445 
4446 /* Forward declaration because parsing is recursive. */
4447 static expr_ty
4448 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
4449               struct compiling *c, const node *n);
4450 
4451 /* Parse the f-string at *str, ending at end.  We know *str starts an
4452    expression (so it must be a '{'). Returns the FormattedValue node,
4453    which includes the expression, conversion character, and
4454    format_spec expression.
4455 
4456    Note that I don't do a perfect job here: I don't make sure that a
4457    closing brace doesn't match an opening paren, for example. It
4458    doesn't need to error on all invalid expressions, just correctly
4459    find the end of all valid ones. Any errors inside the expression
4460    will be caught when we parse it later. */
4461 static int
fstring_find_expr(const char ** str,const char * end,int raw,int recurse_lvl,expr_ty * expression,struct compiling * c,const node * n)4462 fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
4463                   expr_ty *expression, struct compiling *c, const node *n)
4464 {
4465     /* Return -1 on error, else 0. */
4466 
4467     const char *expr_start;
4468     const char *expr_end;
4469     expr_ty simple_expression;
4470     expr_ty format_spec = NULL; /* Optional format specifier. */
4471     int conversion = -1; /* The conversion char. -1 if not specified. */
4472 
4473     /* 0 if we're not in a string, else the quote char we're trying to
4474        match (single or double quote). */
4475     char quote_char = 0;
4476 
4477     /* If we're inside a string, 1=normal, 3=triple-quoted. */
4478     int string_type = 0;
4479 
4480     /* Keep track of nesting level for braces/parens/brackets in
4481        expressions. */
4482     Py_ssize_t nested_depth = 0;
4483 
4484     /* Can only nest one level deep. */
4485     if (recurse_lvl >= 2) {
4486         ast_error(c, n, "f-string: expressions nested too deeply");
4487         return -1;
4488     }
4489 
4490     /* The first char must be a left brace, or we wouldn't have gotten
4491        here. Skip over it. */
4492     assert(**str == '{');
4493     *str += 1;
4494 
4495     expr_start = *str;
4496     for (; *str < end; (*str)++) {
4497         char ch;
4498 
4499         /* Loop invariants. */
4500         assert(nested_depth >= 0);
4501         assert(*str >= expr_start && *str < end);
4502         if (quote_char)
4503             assert(string_type == 1 || string_type == 3);
4504         else
4505             assert(string_type == 0);
4506 
4507         ch = **str;
4508         /* Nowhere inside an expression is a backslash allowed. */
4509         if (ch == '\\') {
4510             /* Error: can't include a backslash character, inside
4511                parens or strings or not. */
4512             ast_error(c, n, "f-string expression part "
4513                             "cannot include a backslash");
4514             return -1;
4515         }
4516         if (quote_char) {
4517             /* We're inside a string. See if we're at the end. */
4518             /* This code needs to implement the same non-error logic
4519                as tok_get from tokenizer.c, at the letter_quote
4520                label. To actually share that code would be a
4521                nightmare. But, it's unlikely to change and is small,
4522                so duplicate it here. Note we don't need to catch all
4523                of the errors, since they'll be caught when parsing the
4524                expression. We just need to match the non-error
4525                cases. Thus we can ignore \n in single-quoted strings,
4526                for example. Or non-terminated strings. */
4527             if (ch == quote_char) {
4528                 /* Does this match the string_type (single or triple
4529                    quoted)? */
4530                 if (string_type == 3) {
4531                     if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
4532                         /* We're at the end of a triple quoted string. */
4533                         *str += 2;
4534                         string_type = 0;
4535                         quote_char = 0;
4536                         continue;
4537                     }
4538                 } else {
4539                     /* We're at the end of a normal string. */
4540                     quote_char = 0;
4541                     string_type = 0;
4542                     continue;
4543                 }
4544             }
4545         } else if (ch == '\'' || ch == '"') {
4546             /* Is this a triple quoted string? */
4547             if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
4548                 string_type = 3;
4549                 *str += 2;
4550             } else {
4551                 /* Start of a normal string. */
4552                 string_type = 1;
4553             }
4554             /* Start looking for the end of the string. */
4555             quote_char = ch;
4556         } else if (ch == '[' || ch == '{' || ch == '(') {
4557             nested_depth++;
4558         } else if (nested_depth != 0 &&
4559                    (ch == ']' || ch == '}' || ch == ')')) {
4560             nested_depth--;
4561         } else if (ch == '#') {
4562             /* Error: can't include a comment character, inside parens
4563                or not. */
4564             ast_error(c, n, "f-string expression part cannot include '#'");
4565             return -1;
4566         } else if (nested_depth == 0 &&
4567                    (ch == '!' || ch == ':' || ch == '}')) {
4568             /* First, test for the special case of "!=". Since '=' is
4569                not an allowed conversion character, nothing is lost in
4570                this test. */
4571             if (ch == '!' && *str+1 < end && *(*str+1) == '=') {
4572                 /* This isn't a conversion character, just continue. */
4573                 continue;
4574             }
4575             /* Normal way out of this loop. */
4576             break;
4577         } else {
4578             /* Just consume this char and loop around. */
4579         }
4580     }
4581     expr_end = *str;
4582     /* If we leave this loop in a string or with mismatched parens, we
4583        don't care. We'll get a syntax error when compiling the
4584        expression. But, we can produce a better error message, so
4585        let's just do that.*/
4586     if (quote_char) {
4587         ast_error(c, n, "f-string: unterminated string");
4588         return -1;
4589     }
4590     if (nested_depth) {
4591         ast_error(c, n, "f-string: mismatched '(', '{', or '['");
4592         return -1;
4593     }
4594 
4595     if (*str >= end)
4596         goto unexpected_end_of_string;
4597 
4598     /* Compile the expression as soon as possible, so we show errors
4599        related to the expression before errors related to the
4600        conversion or format_spec. */
4601     simple_expression = fstring_compile_expr(expr_start, expr_end, c, n);
4602     if (!simple_expression)
4603         return -1;
4604 
4605     /* Check for a conversion char, if present. */
4606     if (**str == '!') {
4607         *str += 1;
4608         if (*str >= end)
4609             goto unexpected_end_of_string;
4610 
4611         conversion = **str;
4612         *str += 1;
4613 
4614         /* Validate the conversion. */
4615         if (!(conversion == 's' || conversion == 'r'
4616               || conversion == 'a')) {
4617             ast_error(c, n, "f-string: invalid conversion character: "
4618                             "expected 's', 'r', or 'a'");
4619             return -1;
4620         }
4621     }
4622 
4623     /* Check for the format spec, if present. */
4624     if (*str >= end)
4625         goto unexpected_end_of_string;
4626     if (**str == ':') {
4627         *str += 1;
4628         if (*str >= end)
4629             goto unexpected_end_of_string;
4630 
4631         /* Parse the format spec. */
4632         format_spec = fstring_parse(str, end, raw, recurse_lvl+1, c, n);
4633         if (!format_spec)
4634             return -1;
4635     }
4636 
4637     if (*str >= end || **str != '}')
4638         goto unexpected_end_of_string;
4639 
4640     /* We're at a right brace. Consume it. */
4641     assert(*str < end);
4642     assert(**str == '}');
4643     *str += 1;
4644 
4645     /* And now create the FormattedValue node that represents this
4646        entire expression with the conversion and format spec. */
4647     *expression = FormattedValue(simple_expression, conversion,
4648                                  format_spec, LINENO(n), n->n_col_offset,
4649                                  c->c_arena);
4650     if (!*expression)
4651         return -1;
4652 
4653     return 0;
4654 
4655 unexpected_end_of_string:
4656     ast_error(c, n, "f-string: expecting '}'");
4657     return -1;
4658 }
4659 
4660 /* Return -1 on error.
4661 
4662    Return 0 if we have a literal (possible zero length) and an
4663    expression (zero length if at the end of the string.
4664 
4665    Return 1 if we have a literal, but no expression, and we want the
4666    caller to call us again. This is used to deal with doubled
4667    braces.
4668 
4669    When called multiple times on the string 'a{{b{0}c', this function
4670    will return:
4671 
4672    1. the literal 'a{' with no expression, and a return value
4673       of 1. Despite the fact that there's no expression, the return
4674       value of 1 means we're not finished yet.
4675 
4676    2. the literal 'b' and the expression '0', with a return value of
4677       0. The fact that there's an expression means we're not finished.
4678 
4679    3. literal 'c' with no expression and a return value of 0. The
4680       combination of the return value of 0 with no expression means
4681       we're finished.
4682 */
4683 static int
fstring_find_literal_and_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** literal,expr_ty * expression,struct compiling * c,const node * n)4684 fstring_find_literal_and_expr(const char **str, const char *end, int raw,
4685                               int recurse_lvl, PyObject **literal,
4686                               expr_ty *expression,
4687                               struct compiling *c, const node *n)
4688 {
4689     int result;
4690 
4691     assert(*literal == NULL && *expression == NULL);
4692 
4693     /* Get any literal string. */
4694     result = fstring_find_literal(str, end, raw, literal, recurse_lvl, c, n);
4695     if (result < 0)
4696         goto error;
4697 
4698     assert(result == 0 || result == 1);
4699 
4700     if (result == 1)
4701         /* We have a literal, but don't look at the expression. */
4702         return 1;
4703 
4704     if (*str >= end || **str == '}')
4705         /* We're at the end of the string or the end of a nested
4706            f-string: no expression. The top-level error case where we
4707            expect to be at the end of the string but we're at a '}' is
4708            handled later. */
4709         return 0;
4710 
4711     /* We must now be the start of an expression, on a '{'. */
4712     assert(**str == '{');
4713 
4714     if (fstring_find_expr(str, end, raw, recurse_lvl, expression, c, n) < 0)
4715         goto error;
4716 
4717     return 0;
4718 
4719 error:
4720     Py_CLEAR(*literal);
4721     return -1;
4722 }
4723 
4724 #define EXPRLIST_N_CACHED  64
4725 
4726 typedef struct {
4727     /* Incrementally build an array of expr_ty, so be used in an
4728        asdl_seq. Cache some small but reasonably sized number of
4729        expr_ty's, and then after that start dynamically allocating,
4730        doubling the number allocated each time. Note that the f-string
4731        f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
4732        Str for the literal 'a'. So you add expr_ty's about twice as
4733        fast as you add exressions in an f-string. */
4734 
4735     Py_ssize_t allocated;  /* Number we've allocated. */
4736     Py_ssize_t size;       /* Number we've used. */
4737     expr_ty    *p;         /* Pointer to the memory we're actually
4738                               using. Will point to 'data' until we
4739                               start dynamically allocating. */
4740     expr_ty    data[EXPRLIST_N_CACHED];
4741 } ExprList;
4742 
4743 #ifdef NDEBUG
4744 #define ExprList_check_invariants(l)
4745 #else
4746 static void
ExprList_check_invariants(ExprList * l)4747 ExprList_check_invariants(ExprList *l)
4748 {
4749     /* Check our invariants. Make sure this object is "live", and
4750        hasn't been deallocated. */
4751     assert(l->size >= 0);
4752     assert(l->p != NULL);
4753     if (l->size <= EXPRLIST_N_CACHED)
4754         assert(l->data == l->p);
4755 }
4756 #endif
4757 
4758 static void
ExprList_Init(ExprList * l)4759 ExprList_Init(ExprList *l)
4760 {
4761     l->allocated = EXPRLIST_N_CACHED;
4762     l->size = 0;
4763 
4764     /* Until we start allocating dynamically, p points to data. */
4765     l->p = l->data;
4766 
4767     ExprList_check_invariants(l);
4768 }
4769 
4770 static int
ExprList_Append(ExprList * l,expr_ty exp)4771 ExprList_Append(ExprList *l, expr_ty exp)
4772 {
4773     ExprList_check_invariants(l);
4774     if (l->size >= l->allocated) {
4775         /* We need to alloc (or realloc) the memory. */
4776         Py_ssize_t new_size = l->allocated * 2;
4777 
4778         /* See if we've ever allocated anything dynamically. */
4779         if (l->p == l->data) {
4780             Py_ssize_t i;
4781             /* We're still using the cached data. Switch to
4782                alloc-ing. */
4783             l->p = PyMem_RawMalloc(sizeof(expr_ty) * new_size);
4784             if (!l->p)
4785                 return -1;
4786             /* Copy the cached data into the new buffer. */
4787             for (i = 0; i < l->size; i++)
4788                 l->p[i] = l->data[i];
4789         } else {
4790             /* Just realloc. */
4791             expr_ty *tmp = PyMem_RawRealloc(l->p, sizeof(expr_ty) * new_size);
4792             if (!tmp) {
4793                 PyMem_RawFree(l->p);
4794                 l->p = NULL;
4795                 return -1;
4796             }
4797             l->p = tmp;
4798         }
4799 
4800         l->allocated = new_size;
4801         assert(l->allocated == 2 * l->size);
4802     }
4803 
4804     l->p[l->size++] = exp;
4805 
4806     ExprList_check_invariants(l);
4807     return 0;
4808 }
4809 
4810 static void
ExprList_Dealloc(ExprList * l)4811 ExprList_Dealloc(ExprList *l)
4812 {
4813     ExprList_check_invariants(l);
4814 
4815     /* If there's been an error, or we've never dynamically allocated,
4816        do nothing. */
4817     if (!l->p || l->p == l->data) {
4818         /* Do nothing. */
4819     } else {
4820         /* We have dynamically allocated. Free the memory. */
4821         PyMem_RawFree(l->p);
4822     }
4823     l->p = NULL;
4824     l->size = -1;
4825 }
4826 
4827 static asdl_seq *
ExprList_Finish(ExprList * l,PyArena * arena)4828 ExprList_Finish(ExprList *l, PyArena *arena)
4829 {
4830     asdl_seq *seq;
4831 
4832     ExprList_check_invariants(l);
4833 
4834     /* Allocate the asdl_seq and copy the expressions in to it. */
4835     seq = _Py_asdl_seq_new(l->size, arena);
4836     if (seq) {
4837         Py_ssize_t i;
4838         for (i = 0; i < l->size; i++)
4839             asdl_seq_SET(seq, i, l->p[i]);
4840     }
4841     ExprList_Dealloc(l);
4842     return seq;
4843 }
4844 
4845 /* The FstringParser is designed to add a mix of strings and
4846    f-strings, and concat them together as needed. Ultimately, it
4847    generates an expr_ty. */
4848 typedef struct {
4849     PyObject *last_str;
4850     ExprList expr_list;
4851     int fmode;
4852 } FstringParser;
4853 
4854 #ifdef NDEBUG
4855 #define FstringParser_check_invariants(state)
4856 #else
4857 static void
FstringParser_check_invariants(FstringParser * state)4858 FstringParser_check_invariants(FstringParser *state)
4859 {
4860     if (state->last_str)
4861         assert(PyUnicode_CheckExact(state->last_str));
4862     ExprList_check_invariants(&state->expr_list);
4863 }
4864 #endif
4865 
4866 static void
FstringParser_Init(FstringParser * state)4867 FstringParser_Init(FstringParser *state)
4868 {
4869     state->last_str = NULL;
4870     state->fmode = 0;
4871     ExprList_Init(&state->expr_list);
4872     FstringParser_check_invariants(state);
4873 }
4874 
4875 static void
FstringParser_Dealloc(FstringParser * state)4876 FstringParser_Dealloc(FstringParser *state)
4877 {
4878     FstringParser_check_invariants(state);
4879 
4880     Py_XDECREF(state->last_str);
4881     ExprList_Dealloc(&state->expr_list);
4882 }
4883 
4884 /* Make a Str node, but decref the PyUnicode object being added. */
4885 static expr_ty
make_str_node_and_del(PyObject ** str,struct compiling * c,const node * n)4886 make_str_node_and_del(PyObject **str, struct compiling *c, const node* n)
4887 {
4888     PyObject *s = *str;
4889     *str = NULL;
4890     assert(PyUnicode_CheckExact(s));
4891     if (PyArena_AddPyObject(c->c_arena, s) < 0) {
4892         Py_DECREF(s);
4893         return NULL;
4894     }
4895     return Str(s, LINENO(n), n->n_col_offset, c->c_arena);
4896 }
4897 
4898 /* Add a non-f-string (that is, a regular literal string). str is
4899    decref'd. */
4900 static int
FstringParser_ConcatAndDel(FstringParser * state,PyObject * str)4901 FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
4902 {
4903     FstringParser_check_invariants(state);
4904 
4905     assert(PyUnicode_CheckExact(str));
4906 
4907     if (PyUnicode_GET_LENGTH(str) == 0) {
4908         Py_DECREF(str);
4909         return 0;
4910     }
4911 
4912     if (!state->last_str) {
4913         /* We didn't have a string before, so just remember this one. */
4914         state->last_str = str;
4915     } else {
4916         /* Concatenate this with the previous string. */
4917         PyUnicode_AppendAndDel(&state->last_str, str);
4918         if (!state->last_str)
4919             return -1;
4920     }
4921     FstringParser_check_invariants(state);
4922     return 0;
4923 }
4924 
4925 /* Parse an f-string. The f-string is in *str to end, with no
4926    'f' or quotes. */
4927 static int
FstringParser_ConcatFstring(FstringParser * state,const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)4928 FstringParser_ConcatFstring(FstringParser *state, const char **str,
4929                             const char *end, int raw, int recurse_lvl,
4930                             struct compiling *c, const node *n)
4931 {
4932     FstringParser_check_invariants(state);
4933     state->fmode = 1;
4934 
4935     /* Parse the f-string. */
4936     while (1) {
4937         PyObject *literal = NULL;
4938         expr_ty expression = NULL;
4939 
4940         /* If there's a zero length literal in front of the
4941            expression, literal will be NULL. If we're at the end of
4942            the f-string, expression will be NULL (unless result == 1,
4943            see below). */
4944         int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl,
4945                                                    &literal, &expression,
4946                                                    c, n);
4947         if (result < 0)
4948             return -1;
4949 
4950         /* Add the literal, if any. */
4951         if (!literal) {
4952             /* Do nothing. Just leave last_str alone (and possibly
4953                NULL). */
4954         } else if (!state->last_str) {
4955             /*  Note that the literal can be zero length, if the
4956                 input string is "\\\n" or "\\\r", among others. */
4957             state->last_str = literal;
4958             literal = NULL;
4959         } else {
4960             /* We have a literal, concatenate it. */
4961             assert(PyUnicode_GET_LENGTH(literal) != 0);
4962             if (FstringParser_ConcatAndDel(state, literal) < 0)
4963                 return -1;
4964             literal = NULL;
4965         }
4966 
4967         /* We've dealt with the literal now. It can't be leaked on further
4968            errors. */
4969         assert(literal == NULL);
4970 
4971         /* See if we should just loop around to get the next literal
4972            and expression, while ignoring the expression this
4973            time. This is used for un-doubling braces, as an
4974            optimization. */
4975         if (result == 1)
4976             continue;
4977 
4978         if (!expression)
4979             /* We're done with this f-string. */
4980             break;
4981 
4982         /* We know we have an expression. Convert any existing string
4983            to a Str node. */
4984         if (!state->last_str) {
4985             /* Do nothing. No previous literal. */
4986         } else {
4987             /* Convert the existing last_str literal to a Str node. */
4988             expr_ty str = make_str_node_and_del(&state->last_str, c, n);
4989             if (!str || ExprList_Append(&state->expr_list, str) < 0)
4990                 return -1;
4991         }
4992 
4993         if (ExprList_Append(&state->expr_list, expression) < 0)
4994             return -1;
4995     }
4996 
4997     /* If recurse_lvl is zero, then we must be at the end of the
4998        string. Otherwise, we must be at a right brace. */
4999 
5000     if (recurse_lvl == 0 && *str < end-1) {
5001         ast_error(c, n, "f-string: unexpected end of string");
5002         return -1;
5003     }
5004     if (recurse_lvl != 0 && **str != '}') {
5005         ast_error(c, n, "f-string: expecting '}'");
5006         return -1;
5007     }
5008 
5009     FstringParser_check_invariants(state);
5010     return 0;
5011 }
5012 
5013 /* Convert the partial state reflected in last_str and expr_list to an
5014    expr_ty. The expr_ty can be a Str, or a JoinedStr. */
5015 static expr_ty
FstringParser_Finish(FstringParser * state,struct compiling * c,const node * n)5016 FstringParser_Finish(FstringParser *state, struct compiling *c,
5017                      const node *n)
5018 {
5019     asdl_seq *seq;
5020 
5021     FstringParser_check_invariants(state);
5022 
5023     /* If we're just a constant string with no expressions, return
5024        that. */
5025     if (!state->fmode) {
5026         assert(!state->expr_list.size);
5027         if (!state->last_str) {
5028             /* Create a zero length string. */
5029             state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
5030             if (!state->last_str)
5031                 goto error;
5032         }
5033         return make_str_node_and_del(&state->last_str, c, n);
5034     }
5035 
5036     /* Create a Str node out of last_str, if needed. It will be the
5037        last node in our expression list. */
5038     if (state->last_str) {
5039         expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5040         if (!str || ExprList_Append(&state->expr_list, str) < 0)
5041             goto error;
5042     }
5043     /* This has already been freed. */
5044     assert(state->last_str == NULL);
5045 
5046     seq = ExprList_Finish(&state->expr_list, c->c_arena);
5047     if (!seq)
5048         goto error;
5049 
5050     return JoinedStr(seq, LINENO(n), n->n_col_offset, c->c_arena);
5051 
5052 error:
5053     FstringParser_Dealloc(state);
5054     return NULL;
5055 }
5056 
5057 /* Given an f-string (with no 'f' or quotes) that's in *str and ends
5058    at end, parse it into an expr_ty.  Return NULL on error.  Adjust
5059    str to point past the parsed portion. */
5060 static expr_ty
fstring_parse(const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5061 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
5062               struct compiling *c, const node *n)
5063 {
5064     FstringParser state;
5065 
5066     FstringParser_Init(&state);
5067     if (FstringParser_ConcatFstring(&state, str, end, raw, recurse_lvl,
5068                                     c, n) < 0) {
5069         FstringParser_Dealloc(&state);
5070         return NULL;
5071     }
5072 
5073     return FstringParser_Finish(&state, c, n);
5074 }
5075 
5076 /* n is a Python string literal, including the bracketing quote
5077    characters, and r, b, u, &/or f prefixes (if any), and embedded
5078    escape sequences (if any). parsestr parses it, and sets *result to
5079    decoded Python string object.  If the string is an f-string, set
5080    *fstr and *fstrlen to the unparsed string object.  Return 0 if no
5081    errors occurred.
5082 */
5083 static int
parsestr(struct compiling * c,const node * n,int * bytesmode,int * rawmode,PyObject ** result,const char ** fstr,Py_ssize_t * fstrlen)5084 parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode,
5085          PyObject **result, const char **fstr, Py_ssize_t *fstrlen)
5086 {
5087     size_t len;
5088     const char *s = STR(n);
5089     int quote = Py_CHARMASK(*s);
5090     int fmode = 0;
5091     *bytesmode = 0;
5092     *rawmode = 0;
5093     *result = NULL;
5094     *fstr = NULL;
5095     if (Py_ISALPHA(quote)) {
5096         while (!*bytesmode || !*rawmode) {
5097             if (quote == 'b' || quote == 'B') {
5098                 quote = *++s;
5099                 *bytesmode = 1;
5100             }
5101             else if (quote == 'u' || quote == 'U') {
5102                 quote = *++s;
5103             }
5104             else if (quote == 'r' || quote == 'R') {
5105                 quote = *++s;
5106                 *rawmode = 1;
5107             }
5108             else if (quote == 'f' || quote == 'F') {
5109                 quote = *++s;
5110                 fmode = 1;
5111             }
5112             else {
5113                 break;
5114             }
5115         }
5116     }
5117     if (fmode && *bytesmode) {
5118         PyErr_BadInternalCall();
5119         return -1;
5120     }
5121     if (quote != '\'' && quote != '\"') {
5122         PyErr_BadInternalCall();
5123         return -1;
5124     }
5125     /* Skip the leading quote char. */
5126     s++;
5127     len = strlen(s);
5128     if (len > INT_MAX) {
5129         PyErr_SetString(PyExc_OverflowError,
5130                         "string to parse is too long");
5131         return -1;
5132     }
5133     if (s[--len] != quote) {
5134         /* Last quote char must match the first. */
5135         PyErr_BadInternalCall();
5136         return -1;
5137     }
5138     if (len >= 4 && s[0] == quote && s[1] == quote) {
5139         /* A triple quoted string. We've already skipped one quote at
5140            the start and one at the end of the string. Now skip the
5141            two at the start. */
5142         s += 2;
5143         len -= 2;
5144         /* And check that the last two match. */
5145         if (s[--len] != quote || s[--len] != quote) {
5146             PyErr_BadInternalCall();
5147             return -1;
5148         }
5149     }
5150 
5151     if (fmode) {
5152         /* Just return the bytes. The caller will parse the resulting
5153            string. */
5154         *fstr = s;
5155         *fstrlen = len;
5156         return 0;
5157     }
5158 
5159     /* Not an f-string. */
5160     /* Avoid invoking escape decoding routines if possible. */
5161     *rawmode = *rawmode || strchr(s, '\\') == NULL;
5162     if (*bytesmode) {
5163         /* Disallow non-ASCII characters. */
5164         const char *ch;
5165         for (ch = s; *ch; ch++) {
5166             if (Py_CHARMASK(*ch) >= 0x80) {
5167                 ast_error(c, n, "bytes can only contain ASCII "
5168                           "literal characters.");
5169                 return -1;
5170             }
5171         }
5172         if (*rawmode)
5173             *result = PyBytes_FromStringAndSize(s, len);
5174         else
5175             *result = decode_bytes_with_escapes(c, n, s, len);
5176     } else {
5177         if (*rawmode)
5178             *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
5179         else
5180             *result = decode_unicode_with_escapes(c, n, s, len);
5181     }
5182     return *result == NULL ? -1 : 0;
5183 }
5184 
5185 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through
5186    each STRING atom, and process it as needed. For bytes, just
5187    concatenate them together, and the result will be a Bytes node. For
5188    normal strings and f-strings, concatenate them together. The result
5189    will be a Str node if there were no f-strings; a FormattedValue
5190    node if there's just an f-string (with no leading or trailing
5191    literals), or a JoinedStr node if there are multiple f-strings or
5192    any literals involved. */
5193 static expr_ty
parsestrplus(struct compiling * c,const node * n)5194 parsestrplus(struct compiling *c, const node *n)
5195 {
5196     int bytesmode = 0;
5197     PyObject *bytes_str = NULL;
5198     int i;
5199 
5200     FstringParser state;
5201     FstringParser_Init(&state);
5202 
5203     for (i = 0; i < NCH(n); i++) {
5204         int this_bytesmode;
5205         int this_rawmode;
5206         PyObject *s;
5207         const char *fstr;
5208         Py_ssize_t fstrlen = -1;  /* Silence a compiler warning. */
5209 
5210         REQ(CHILD(n, i), STRING);
5211         if (parsestr(c, CHILD(n, i), &this_bytesmode, &this_rawmode, &s,
5212                      &fstr, &fstrlen) != 0)
5213             goto error;
5214 
5215         /* Check that we're not mixing bytes with unicode. */
5216         if (i != 0 && bytesmode != this_bytesmode) {
5217             ast_error(c, n, "cannot mix bytes and nonbytes literals");
5218             /* s is NULL if the current string part is an f-string. */
5219             Py_XDECREF(s);
5220             goto error;
5221         }
5222         bytesmode = this_bytesmode;
5223 
5224         if (fstr != NULL) {
5225             int result;
5226             assert(s == NULL && !bytesmode);
5227             /* This is an f-string. Parse and concatenate it. */
5228             result = FstringParser_ConcatFstring(&state, &fstr, fstr+fstrlen,
5229                                                  this_rawmode, 0, c, n);
5230             if (result < 0)
5231                 goto error;
5232         } else {
5233             /* A string or byte string. */
5234             assert(s != NULL && fstr == NULL);
5235 
5236             assert(bytesmode ? PyBytes_CheckExact(s) :
5237                    PyUnicode_CheckExact(s));
5238 
5239             if (bytesmode) {
5240                 /* For bytes, concat as we go. */
5241                 if (i == 0) {
5242                     /* First time, just remember this value. */
5243                     bytes_str = s;
5244                 } else {
5245                     PyBytes_ConcatAndDel(&bytes_str, s);
5246                     if (!bytes_str)
5247                         goto error;
5248                 }
5249             } else {
5250                 /* This is a regular string. Concatenate it. */
5251                 if (FstringParser_ConcatAndDel(&state, s) < 0)
5252                     goto error;
5253             }
5254         }
5255     }
5256     if (bytesmode) {
5257         /* Just return the bytes object and we're done. */
5258         if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0)
5259             goto error;
5260         return Bytes(bytes_str, LINENO(n), n->n_col_offset, c->c_arena);
5261     }
5262 
5263     /* We're not a bytes string, bytes_str should never have been set. */
5264     assert(bytes_str == NULL);
5265 
5266     return FstringParser_Finish(&state, c, n);
5267 
5268 error:
5269     Py_XDECREF(bytes_str);
5270     FstringParser_Dealloc(&state);
5271     return NULL;
5272 }
5273