1 /*
2 * This file includes functions to transform a concrete syntax tree (CST) to
3 * an abstract syntax tree (AST). The main function is PyAST_FromNode().
4 *
5 */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "node.h"
9 #include "ast.h"
10 #include "token.h"
11 #include "pythonrun.h"
12
13 #include <assert.h>
14 #include <stdbool.h>
15
16 static int validate_stmts(asdl_seq *);
17 static int validate_exprs(asdl_seq *, expr_context_ty, int);
18 static int validate_nonempty_seq(asdl_seq *, const char *, const char *);
19 static int validate_stmt(stmt_ty);
20 static int validate_expr(expr_ty, expr_context_ty);
21
22 static int
validate_comprehension(asdl_seq * gens)23 validate_comprehension(asdl_seq *gens)
24 {
25 int i;
26 if (!asdl_seq_LEN(gens)) {
27 PyErr_SetString(PyExc_ValueError, "comprehension with no generators");
28 return 0;
29 }
30 for (i = 0; i < asdl_seq_LEN(gens); i++) {
31 comprehension_ty comp = asdl_seq_GET(gens, i);
32 if (!validate_expr(comp->target, Store) ||
33 !validate_expr(comp->iter, Load) ||
34 !validate_exprs(comp->ifs, Load, 0))
35 return 0;
36 }
37 return 1;
38 }
39
40 static int
validate_slice(slice_ty slice)41 validate_slice(slice_ty slice)
42 {
43 switch (slice->kind) {
44 case Slice_kind:
45 return (!slice->v.Slice.lower || validate_expr(slice->v.Slice.lower, Load)) &&
46 (!slice->v.Slice.upper || validate_expr(slice->v.Slice.upper, Load)) &&
47 (!slice->v.Slice.step || validate_expr(slice->v.Slice.step, Load));
48 case ExtSlice_kind: {
49 int i;
50 if (!validate_nonempty_seq(slice->v.ExtSlice.dims, "dims", "ExtSlice"))
51 return 0;
52 for (i = 0; i < asdl_seq_LEN(slice->v.ExtSlice.dims); i++)
53 if (!validate_slice(asdl_seq_GET(slice->v.ExtSlice.dims, i)))
54 return 0;
55 return 1;
56 }
57 case Index_kind:
58 return validate_expr(slice->v.Index.value, Load);
59 default:
60 PyErr_SetString(PyExc_SystemError, "unknown slice node");
61 return 0;
62 }
63 }
64
65 static int
validate_keywords(asdl_seq * keywords)66 validate_keywords(asdl_seq *keywords)
67 {
68 int i;
69 for (i = 0; i < asdl_seq_LEN(keywords); i++)
70 if (!validate_expr(((keyword_ty)asdl_seq_GET(keywords, i))->value, Load))
71 return 0;
72 return 1;
73 }
74
75 static int
validate_args(asdl_seq * args)76 validate_args(asdl_seq *args)
77 {
78 int i;
79 for (i = 0; i < asdl_seq_LEN(args); i++) {
80 arg_ty arg = asdl_seq_GET(args, i);
81 if (arg->annotation && !validate_expr(arg->annotation, Load))
82 return 0;
83 }
84 return 1;
85 }
86
87 static const char *
expr_context_name(expr_context_ty ctx)88 expr_context_name(expr_context_ty ctx)
89 {
90 switch (ctx) {
91 case Load:
92 return "Load";
93 case Store:
94 return "Store";
95 case Del:
96 return "Del";
97 case AugLoad:
98 return "AugLoad";
99 case AugStore:
100 return "AugStore";
101 case Param:
102 return "Param";
103 default:
104 Py_UNREACHABLE();
105 }
106 }
107
108 static int
validate_arguments(arguments_ty args)109 validate_arguments(arguments_ty args)
110 {
111 if (!validate_args(args->args))
112 return 0;
113 if (args->vararg && args->vararg->annotation
114 && !validate_expr(args->vararg->annotation, Load)) {
115 return 0;
116 }
117 if (!validate_args(args->kwonlyargs))
118 return 0;
119 if (args->kwarg && args->kwarg->annotation
120 && !validate_expr(args->kwarg->annotation, Load)) {
121 return 0;
122 }
123 if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->args)) {
124 PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments");
125 return 0;
126 }
127 if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) {
128 PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as "
129 "kw_defaults on arguments");
130 return 0;
131 }
132 return validate_exprs(args->defaults, Load, 0) && validate_exprs(args->kw_defaults, Load, 1);
133 }
134
135 static int
validate_constant(PyObject * value)136 validate_constant(PyObject *value)
137 {
138 if (value == Py_None || value == Py_Ellipsis)
139 return 1;
140
141 if (PyLong_CheckExact(value)
142 || PyFloat_CheckExact(value)
143 || PyComplex_CheckExact(value)
144 || PyBool_Check(value)
145 || PyUnicode_CheckExact(value)
146 || PyBytes_CheckExact(value))
147 return 1;
148
149 if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) {
150 PyObject *it;
151
152 it = PyObject_GetIter(value);
153 if (it == NULL)
154 return 0;
155
156 while (1) {
157 PyObject *item = PyIter_Next(it);
158 if (item == NULL) {
159 if (PyErr_Occurred()) {
160 Py_DECREF(it);
161 return 0;
162 }
163 break;
164 }
165
166 if (!validate_constant(item)) {
167 Py_DECREF(it);
168 Py_DECREF(item);
169 return 0;
170 }
171 Py_DECREF(item);
172 }
173
174 Py_DECREF(it);
175 return 1;
176 }
177
178 return 0;
179 }
180
181 static int
validate_expr(expr_ty exp,expr_context_ty ctx)182 validate_expr(expr_ty exp, expr_context_ty ctx)
183 {
184 int check_ctx = 1;
185 expr_context_ty actual_ctx;
186
187 /* First check expression context. */
188 switch (exp->kind) {
189 case Attribute_kind:
190 actual_ctx = exp->v.Attribute.ctx;
191 break;
192 case Subscript_kind:
193 actual_ctx = exp->v.Subscript.ctx;
194 break;
195 case Starred_kind:
196 actual_ctx = exp->v.Starred.ctx;
197 break;
198 case Name_kind:
199 actual_ctx = exp->v.Name.ctx;
200 break;
201 case List_kind:
202 actual_ctx = exp->v.List.ctx;
203 break;
204 case Tuple_kind:
205 actual_ctx = exp->v.Tuple.ctx;
206 break;
207 default:
208 if (ctx != Load) {
209 PyErr_Format(PyExc_ValueError, "expression which can't be "
210 "assigned to in %s context", expr_context_name(ctx));
211 return 0;
212 }
213 check_ctx = 0;
214 /* set actual_ctx to prevent gcc warning */
215 actual_ctx = 0;
216 }
217 if (check_ctx && actual_ctx != ctx) {
218 PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead",
219 expr_context_name(ctx), expr_context_name(actual_ctx));
220 return 0;
221 }
222
223 /* Now validate expression. */
224 switch (exp->kind) {
225 case BoolOp_kind:
226 if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) {
227 PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values");
228 return 0;
229 }
230 return validate_exprs(exp->v.BoolOp.values, Load, 0);
231 case BinOp_kind:
232 return validate_expr(exp->v.BinOp.left, Load) &&
233 validate_expr(exp->v.BinOp.right, Load);
234 case UnaryOp_kind:
235 return validate_expr(exp->v.UnaryOp.operand, Load);
236 case Lambda_kind:
237 return validate_arguments(exp->v.Lambda.args) &&
238 validate_expr(exp->v.Lambda.body, Load);
239 case IfExp_kind:
240 return validate_expr(exp->v.IfExp.test, Load) &&
241 validate_expr(exp->v.IfExp.body, Load) &&
242 validate_expr(exp->v.IfExp.orelse, Load);
243 case Dict_kind:
244 if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) {
245 PyErr_SetString(PyExc_ValueError,
246 "Dict doesn't have the same number of keys as values");
247 return 0;
248 }
249 /* null_ok=1 for keys expressions to allow dict unpacking to work in
250 dict literals, i.e. ``{**{a:b}}`` */
251 return validate_exprs(exp->v.Dict.keys, Load, /*null_ok=*/ 1) &&
252 validate_exprs(exp->v.Dict.values, Load, /*null_ok=*/ 0);
253 case Set_kind:
254 return validate_exprs(exp->v.Set.elts, Load, 0);
255 #define COMP(NAME) \
256 case NAME ## _kind: \
257 return validate_comprehension(exp->v.NAME.generators) && \
258 validate_expr(exp->v.NAME.elt, Load);
259 COMP(ListComp)
260 COMP(SetComp)
261 COMP(GeneratorExp)
262 #undef COMP
263 case DictComp_kind:
264 return validate_comprehension(exp->v.DictComp.generators) &&
265 validate_expr(exp->v.DictComp.key, Load) &&
266 validate_expr(exp->v.DictComp.value, Load);
267 case Yield_kind:
268 return !exp->v.Yield.value || validate_expr(exp->v.Yield.value, Load);
269 case YieldFrom_kind:
270 return validate_expr(exp->v.YieldFrom.value, Load);
271 case Await_kind:
272 return validate_expr(exp->v.Await.value, Load);
273 case Compare_kind:
274 if (!asdl_seq_LEN(exp->v.Compare.comparators)) {
275 PyErr_SetString(PyExc_ValueError, "Compare with no comparators");
276 return 0;
277 }
278 if (asdl_seq_LEN(exp->v.Compare.comparators) !=
279 asdl_seq_LEN(exp->v.Compare.ops)) {
280 PyErr_SetString(PyExc_ValueError, "Compare has a different number "
281 "of comparators and operands");
282 return 0;
283 }
284 return validate_exprs(exp->v.Compare.comparators, Load, 0) &&
285 validate_expr(exp->v.Compare.left, Load);
286 case Call_kind:
287 return validate_expr(exp->v.Call.func, Load) &&
288 validate_exprs(exp->v.Call.args, Load, 0) &&
289 validate_keywords(exp->v.Call.keywords);
290 case Constant_kind:
291 if (!validate_constant(exp->v.Constant.value)) {
292 PyErr_Format(PyExc_TypeError,
293 "got an invalid type in Constant: %s",
294 Py_TYPE(exp->v.Constant.value)->tp_name);
295 return 0;
296 }
297 return 1;
298 case Num_kind: {
299 PyObject *n = exp->v.Num.n;
300 if (!PyLong_CheckExact(n) && !PyFloat_CheckExact(n) &&
301 !PyComplex_CheckExact(n)) {
302 PyErr_SetString(PyExc_TypeError, "non-numeric type in Num");
303 return 0;
304 }
305 return 1;
306 }
307 case Str_kind: {
308 PyObject *s = exp->v.Str.s;
309 if (!PyUnicode_CheckExact(s)) {
310 PyErr_SetString(PyExc_TypeError, "non-string type in Str");
311 return 0;
312 }
313 return 1;
314 }
315 case JoinedStr_kind:
316 return validate_exprs(exp->v.JoinedStr.values, Load, 0);
317 case FormattedValue_kind:
318 if (validate_expr(exp->v.FormattedValue.value, Load) == 0)
319 return 0;
320 if (exp->v.FormattedValue.format_spec)
321 return validate_expr(exp->v.FormattedValue.format_spec, Load);
322 return 1;
323 case Bytes_kind: {
324 PyObject *b = exp->v.Bytes.s;
325 if (!PyBytes_CheckExact(b)) {
326 PyErr_SetString(PyExc_TypeError, "non-bytes type in Bytes");
327 return 0;
328 }
329 return 1;
330 }
331 case Attribute_kind:
332 return validate_expr(exp->v.Attribute.value, Load);
333 case Subscript_kind:
334 return validate_slice(exp->v.Subscript.slice) &&
335 validate_expr(exp->v.Subscript.value, Load);
336 case Starred_kind:
337 return validate_expr(exp->v.Starred.value, ctx);
338 case List_kind:
339 return validate_exprs(exp->v.List.elts, ctx, 0);
340 case Tuple_kind:
341 return validate_exprs(exp->v.Tuple.elts, ctx, 0);
342 /* These last cases don't have any checking. */
343 case Name_kind:
344 case NameConstant_kind:
345 case Ellipsis_kind:
346 return 1;
347 default:
348 PyErr_SetString(PyExc_SystemError, "unexpected expression");
349 return 0;
350 }
351 }
352
353 static int
validate_nonempty_seq(asdl_seq * seq,const char * what,const char * owner)354 validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner)
355 {
356 if (asdl_seq_LEN(seq))
357 return 1;
358 PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner);
359 return 0;
360 }
361
362 static int
validate_assignlist(asdl_seq * targets,expr_context_ty ctx)363 validate_assignlist(asdl_seq *targets, expr_context_ty ctx)
364 {
365 return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") &&
366 validate_exprs(targets, ctx, 0);
367 }
368
369 static int
validate_body(asdl_seq * body,const char * owner)370 validate_body(asdl_seq *body, const char *owner)
371 {
372 return validate_nonempty_seq(body, "body", owner) && validate_stmts(body);
373 }
374
375 static int
validate_stmt(stmt_ty stmt)376 validate_stmt(stmt_ty stmt)
377 {
378 int i;
379 switch (stmt->kind) {
380 case FunctionDef_kind:
381 return validate_body(stmt->v.FunctionDef.body, "FunctionDef") &&
382 validate_arguments(stmt->v.FunctionDef.args) &&
383 validate_exprs(stmt->v.FunctionDef.decorator_list, Load, 0) &&
384 (!stmt->v.FunctionDef.returns ||
385 validate_expr(stmt->v.FunctionDef.returns, Load));
386 case ClassDef_kind:
387 return validate_body(stmt->v.ClassDef.body, "ClassDef") &&
388 validate_exprs(stmt->v.ClassDef.bases, Load, 0) &&
389 validate_keywords(stmt->v.ClassDef.keywords) &&
390 validate_exprs(stmt->v.ClassDef.decorator_list, Load, 0);
391 case Return_kind:
392 return !stmt->v.Return.value || validate_expr(stmt->v.Return.value, Load);
393 case Delete_kind:
394 return validate_assignlist(stmt->v.Delete.targets, Del);
395 case Assign_kind:
396 return validate_assignlist(stmt->v.Assign.targets, Store) &&
397 validate_expr(stmt->v.Assign.value, Load);
398 case AugAssign_kind:
399 return validate_expr(stmt->v.AugAssign.target, Store) &&
400 validate_expr(stmt->v.AugAssign.value, Load);
401 case AnnAssign_kind:
402 if (stmt->v.AnnAssign.target->kind != Name_kind &&
403 stmt->v.AnnAssign.simple) {
404 PyErr_SetString(PyExc_TypeError,
405 "AnnAssign with simple non-Name target");
406 return 0;
407 }
408 return validate_expr(stmt->v.AnnAssign.target, Store) &&
409 (!stmt->v.AnnAssign.value ||
410 validate_expr(stmt->v.AnnAssign.value, Load)) &&
411 validate_expr(stmt->v.AnnAssign.annotation, Load);
412 case For_kind:
413 return validate_expr(stmt->v.For.target, Store) &&
414 validate_expr(stmt->v.For.iter, Load) &&
415 validate_body(stmt->v.For.body, "For") &&
416 validate_stmts(stmt->v.For.orelse);
417 case AsyncFor_kind:
418 return validate_expr(stmt->v.AsyncFor.target, Store) &&
419 validate_expr(stmt->v.AsyncFor.iter, Load) &&
420 validate_body(stmt->v.AsyncFor.body, "AsyncFor") &&
421 validate_stmts(stmt->v.AsyncFor.orelse);
422 case While_kind:
423 return validate_expr(stmt->v.While.test, Load) &&
424 validate_body(stmt->v.While.body, "While") &&
425 validate_stmts(stmt->v.While.orelse);
426 case If_kind:
427 return validate_expr(stmt->v.If.test, Load) &&
428 validate_body(stmt->v.If.body, "If") &&
429 validate_stmts(stmt->v.If.orelse);
430 case With_kind:
431 if (!validate_nonempty_seq(stmt->v.With.items, "items", "With"))
432 return 0;
433 for (i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) {
434 withitem_ty item = asdl_seq_GET(stmt->v.With.items, i);
435 if (!validate_expr(item->context_expr, Load) ||
436 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
437 return 0;
438 }
439 return validate_body(stmt->v.With.body, "With");
440 case AsyncWith_kind:
441 if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith"))
442 return 0;
443 for (i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) {
444 withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i);
445 if (!validate_expr(item->context_expr, Load) ||
446 (item->optional_vars && !validate_expr(item->optional_vars, Store)))
447 return 0;
448 }
449 return validate_body(stmt->v.AsyncWith.body, "AsyncWith");
450 case Raise_kind:
451 if (stmt->v.Raise.exc) {
452 return validate_expr(stmt->v.Raise.exc, Load) &&
453 (!stmt->v.Raise.cause || validate_expr(stmt->v.Raise.cause, Load));
454 }
455 if (stmt->v.Raise.cause) {
456 PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception");
457 return 0;
458 }
459 return 1;
460 case Try_kind:
461 if (!validate_body(stmt->v.Try.body, "Try"))
462 return 0;
463 if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
464 !asdl_seq_LEN(stmt->v.Try.finalbody)) {
465 PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody");
466 return 0;
467 }
468 if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
469 asdl_seq_LEN(stmt->v.Try.orelse)) {
470 PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers");
471 return 0;
472 }
473 for (i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) {
474 excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i);
475 if ((handler->v.ExceptHandler.type &&
476 !validate_expr(handler->v.ExceptHandler.type, Load)) ||
477 !validate_body(handler->v.ExceptHandler.body, "ExceptHandler"))
478 return 0;
479 }
480 return (!asdl_seq_LEN(stmt->v.Try.finalbody) ||
481 validate_stmts(stmt->v.Try.finalbody)) &&
482 (!asdl_seq_LEN(stmt->v.Try.orelse) ||
483 validate_stmts(stmt->v.Try.orelse));
484 case Assert_kind:
485 return validate_expr(stmt->v.Assert.test, Load) &&
486 (!stmt->v.Assert.msg || validate_expr(stmt->v.Assert.msg, Load));
487 case Import_kind:
488 return validate_nonempty_seq(stmt->v.Import.names, "names", "Import");
489 case ImportFrom_kind:
490 if (stmt->v.ImportFrom.level < 0) {
491 PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level");
492 return 0;
493 }
494 return validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom");
495 case Global_kind:
496 return validate_nonempty_seq(stmt->v.Global.names, "names", "Global");
497 case Nonlocal_kind:
498 return validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal");
499 case Expr_kind:
500 return validate_expr(stmt->v.Expr.value, Load);
501 case AsyncFunctionDef_kind:
502 return validate_body(stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") &&
503 validate_arguments(stmt->v.AsyncFunctionDef.args) &&
504 validate_exprs(stmt->v.AsyncFunctionDef.decorator_list, Load, 0) &&
505 (!stmt->v.AsyncFunctionDef.returns ||
506 validate_expr(stmt->v.AsyncFunctionDef.returns, Load));
507 case Pass_kind:
508 case Break_kind:
509 case Continue_kind:
510 return 1;
511 default:
512 PyErr_SetString(PyExc_SystemError, "unexpected statement");
513 return 0;
514 }
515 }
516
517 static int
validate_stmts(asdl_seq * seq)518 validate_stmts(asdl_seq *seq)
519 {
520 int i;
521 for (i = 0; i < asdl_seq_LEN(seq); i++) {
522 stmt_ty stmt = asdl_seq_GET(seq, i);
523 if (stmt) {
524 if (!validate_stmt(stmt))
525 return 0;
526 }
527 else {
528 PyErr_SetString(PyExc_ValueError,
529 "None disallowed in statement list");
530 return 0;
531 }
532 }
533 return 1;
534 }
535
536 static int
validate_exprs(asdl_seq * exprs,expr_context_ty ctx,int null_ok)537 validate_exprs(asdl_seq *exprs, expr_context_ty ctx, int null_ok)
538 {
539 int i;
540 for (i = 0; i < asdl_seq_LEN(exprs); i++) {
541 expr_ty expr = asdl_seq_GET(exprs, i);
542 if (expr) {
543 if (!validate_expr(expr, ctx))
544 return 0;
545 }
546 else if (!null_ok) {
547 PyErr_SetString(PyExc_ValueError,
548 "None disallowed in expression list");
549 return 0;
550 }
551
552 }
553 return 1;
554 }
555
556 int
PyAST_Validate(mod_ty mod)557 PyAST_Validate(mod_ty mod)
558 {
559 int res = 0;
560
561 switch (mod->kind) {
562 case Module_kind:
563 res = validate_stmts(mod->v.Module.body);
564 break;
565 case Interactive_kind:
566 res = validate_stmts(mod->v.Interactive.body);
567 break;
568 case Expression_kind:
569 res = validate_expr(mod->v.Expression.body, Load);
570 break;
571 case Suite_kind:
572 PyErr_SetString(PyExc_ValueError, "Suite is not valid in the CPython compiler");
573 break;
574 default:
575 PyErr_SetString(PyExc_SystemError, "impossible module node");
576 res = 0;
577 break;
578 }
579 return res;
580 }
581
582 /* This is done here, so defines like "test" don't interfere with AST use above. */
583 #include "grammar.h"
584 #include "parsetok.h"
585 #include "graminit.h"
586
587 /* Data structure used internally */
588 struct compiling {
589 PyArena *c_arena; /* Arena for allocating memory. */
590 PyObject *c_filename; /* filename */
591 PyObject *c_normalize; /* Normalization function from unicodedata. */
592 };
593
594 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
595 static expr_ty ast_for_expr(struct compiling *, const node *);
596 static stmt_ty ast_for_stmt(struct compiling *, const node *);
597 static asdl_seq *ast_for_suite(struct compiling *c, const node *n);
598 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
599 expr_context_ty);
600 static expr_ty ast_for_testlist(struct compiling *, const node *);
601 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
602
603 static stmt_ty ast_for_with_stmt(struct compiling *, const node *, bool);
604 static stmt_ty ast_for_for_stmt(struct compiling *, const node *, bool);
605
606 /* Note different signature for ast_for_call */
607 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty, bool);
608
609 static PyObject *parsenumber(struct compiling *, const char *);
610 static expr_ty parsestrplus(struct compiling *, const node *n);
611
612 #define COMP_GENEXP 0
613 #define COMP_LISTCOMP 1
614 #define COMP_SETCOMP 2
615
616 static int
init_normalization(struct compiling * c)617 init_normalization(struct compiling *c)
618 {
619 PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
620 if (!m)
621 return 0;
622 c->c_normalize = PyObject_GetAttrString(m, "normalize");
623 Py_DECREF(m);
624 if (!c->c_normalize)
625 return 0;
626 return 1;
627 }
628
629 static identifier
new_identifier(const char * n,struct compiling * c)630 new_identifier(const char *n, struct compiling *c)
631 {
632 PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
633 if (!id)
634 return NULL;
635 /* PyUnicode_DecodeUTF8 should always return a ready string. */
636 assert(PyUnicode_IS_READY(id));
637 /* Check whether there are non-ASCII characters in the
638 identifier; if so, normalize to NFKC. */
639 if (!PyUnicode_IS_ASCII(id)) {
640 PyObject *id2;
641 _Py_IDENTIFIER(NFKC);
642 if (!c->c_normalize && !init_normalization(c)) {
643 Py_DECREF(id);
644 return NULL;
645 }
646 PyObject *form = _PyUnicode_FromId(&PyId_NFKC);
647 if (form == NULL) {
648 Py_DECREF(id);
649 return NULL;
650 }
651 PyObject *args[2] = {form, id};
652 id2 = _PyObject_FastCall(c->c_normalize, args, 2);
653 Py_DECREF(id);
654 if (!id2)
655 return NULL;
656 if (!PyUnicode_Check(id2)) {
657 PyErr_Format(PyExc_TypeError,
658 "unicodedata.normalize() must return a string, not "
659 "%.200s",
660 Py_TYPE(id2)->tp_name);
661 Py_DECREF(id2);
662 return NULL;
663 }
664 id = id2;
665 }
666 PyUnicode_InternInPlace(&id);
667 if (PyArena_AddPyObject(c->c_arena, id) < 0) {
668 Py_DECREF(id);
669 return NULL;
670 }
671 return id;
672 }
673
674 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c)
675
676 static int
ast_error(struct compiling * c,const node * n,const char * errmsg)677 ast_error(struct compiling *c, const node *n, const char *errmsg)
678 {
679 PyObject *value, *errstr, *loc, *tmp;
680
681 loc = PyErr_ProgramTextObject(c->c_filename, LINENO(n));
682 if (!loc) {
683 Py_INCREF(Py_None);
684 loc = Py_None;
685 }
686 tmp = Py_BuildValue("(OiiN)", c->c_filename, LINENO(n), n->n_col_offset, loc);
687 if (!tmp)
688 return 0;
689 errstr = PyUnicode_FromString(errmsg);
690 if (!errstr) {
691 Py_DECREF(tmp);
692 return 0;
693 }
694 value = PyTuple_Pack(2, errstr, tmp);
695 Py_DECREF(errstr);
696 Py_DECREF(tmp);
697 if (value) {
698 PyErr_SetObject(PyExc_SyntaxError, value);
699 Py_DECREF(value);
700 }
701 return 0;
702 }
703
704 /* num_stmts() returns number of contained statements.
705
706 Use this routine to determine how big a sequence is needed for
707 the statements in a parse tree. Its raison d'etre is this bit of
708 grammar:
709
710 stmt: simple_stmt | compound_stmt
711 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
712
713 A simple_stmt can contain multiple small_stmt elements joined
714 by semicolons. If the arg is a simple_stmt, the number of
715 small_stmt elements is returned.
716 */
717
718 static int
num_stmts(const node * n)719 num_stmts(const node *n)
720 {
721 int i, l;
722 node *ch;
723
724 switch (TYPE(n)) {
725 case single_input:
726 if (TYPE(CHILD(n, 0)) == NEWLINE)
727 return 0;
728 else
729 return num_stmts(CHILD(n, 0));
730 case file_input:
731 l = 0;
732 for (i = 0; i < NCH(n); i++) {
733 ch = CHILD(n, i);
734 if (TYPE(ch) == stmt)
735 l += num_stmts(ch);
736 }
737 return l;
738 case stmt:
739 return num_stmts(CHILD(n, 0));
740 case compound_stmt:
741 return 1;
742 case simple_stmt:
743 return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
744 case suite:
745 if (NCH(n) == 1)
746 return num_stmts(CHILD(n, 0));
747 else {
748 l = 0;
749 for (i = 2; i < (NCH(n) - 1); i++)
750 l += num_stmts(CHILD(n, i));
751 return l;
752 }
753 default: {
754 char buf[128];
755
756 sprintf(buf, "Non-statement found: %d %d",
757 TYPE(n), NCH(n));
758 Py_FatalError(buf);
759 }
760 }
761 Py_UNREACHABLE();
762 }
763
764 /* Transform the CST rooted at node * to the appropriate AST
765 */
766
767 mod_ty
PyAST_FromNodeObject(const node * n,PyCompilerFlags * flags,PyObject * filename,PyArena * arena)768 PyAST_FromNodeObject(const node *n, PyCompilerFlags *flags,
769 PyObject *filename, PyArena *arena)
770 {
771 int i, j, k, num;
772 asdl_seq *stmts = NULL;
773 stmt_ty s;
774 node *ch;
775 struct compiling c;
776 mod_ty res = NULL;
777
778 c.c_arena = arena;
779 /* borrowed reference */
780 c.c_filename = filename;
781 c.c_normalize = NULL;
782
783 if (TYPE(n) == encoding_decl)
784 n = CHILD(n, 0);
785
786 k = 0;
787 switch (TYPE(n)) {
788 case file_input:
789 stmts = _Py_asdl_seq_new(num_stmts(n), arena);
790 if (!stmts)
791 goto out;
792 for (i = 0; i < NCH(n) - 1; i++) {
793 ch = CHILD(n, i);
794 if (TYPE(ch) == NEWLINE)
795 continue;
796 REQ(ch, stmt);
797 num = num_stmts(ch);
798 if (num == 1) {
799 s = ast_for_stmt(&c, ch);
800 if (!s)
801 goto out;
802 asdl_seq_SET(stmts, k++, s);
803 }
804 else {
805 ch = CHILD(ch, 0);
806 REQ(ch, simple_stmt);
807 for (j = 0; j < num; j++) {
808 s = ast_for_stmt(&c, CHILD(ch, j * 2));
809 if (!s)
810 goto out;
811 asdl_seq_SET(stmts, k++, s);
812 }
813 }
814 }
815 res = Module(stmts, arena);
816 break;
817 case eval_input: {
818 expr_ty testlist_ast;
819
820 /* XXX Why not comp_for here? */
821 testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
822 if (!testlist_ast)
823 goto out;
824 res = Expression(testlist_ast, arena);
825 break;
826 }
827 case single_input:
828 if (TYPE(CHILD(n, 0)) == NEWLINE) {
829 stmts = _Py_asdl_seq_new(1, arena);
830 if (!stmts)
831 goto out;
832 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
833 arena));
834 if (!asdl_seq_GET(stmts, 0))
835 goto out;
836 res = Interactive(stmts, arena);
837 }
838 else {
839 n = CHILD(n, 0);
840 num = num_stmts(n);
841 stmts = _Py_asdl_seq_new(num, arena);
842 if (!stmts)
843 goto out;
844 if (num == 1) {
845 s = ast_for_stmt(&c, n);
846 if (!s)
847 goto out;
848 asdl_seq_SET(stmts, 0, s);
849 }
850 else {
851 /* Only a simple_stmt can contain multiple statements. */
852 REQ(n, simple_stmt);
853 for (i = 0; i < NCH(n); i += 2) {
854 if (TYPE(CHILD(n, i)) == NEWLINE)
855 break;
856 s = ast_for_stmt(&c, CHILD(n, i));
857 if (!s)
858 goto out;
859 asdl_seq_SET(stmts, i / 2, s);
860 }
861 }
862
863 res = Interactive(stmts, arena);
864 }
865 break;
866 default:
867 PyErr_Format(PyExc_SystemError,
868 "invalid node %d for PyAST_FromNode", TYPE(n));
869 goto out;
870 }
871 out:
872 if (c.c_normalize) {
873 Py_DECREF(c.c_normalize);
874 }
875 return res;
876 }
877
878 mod_ty
PyAST_FromNode(const node * n,PyCompilerFlags * flags,const char * filename_str,PyArena * arena)879 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename_str,
880 PyArena *arena)
881 {
882 mod_ty mod;
883 PyObject *filename;
884 filename = PyUnicode_DecodeFSDefault(filename_str);
885 if (filename == NULL)
886 return NULL;
887 mod = PyAST_FromNodeObject(n, flags, filename, arena);
888 Py_DECREF(filename);
889 return mod;
890
891 }
892
893 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
894 */
895
896 static operator_ty
get_operator(const node * n)897 get_operator(const node *n)
898 {
899 switch (TYPE(n)) {
900 case VBAR:
901 return BitOr;
902 case CIRCUMFLEX:
903 return BitXor;
904 case AMPER:
905 return BitAnd;
906 case LEFTSHIFT:
907 return LShift;
908 case RIGHTSHIFT:
909 return RShift;
910 case PLUS:
911 return Add;
912 case MINUS:
913 return Sub;
914 case STAR:
915 return Mult;
916 case AT:
917 return MatMult;
918 case SLASH:
919 return Div;
920 case DOUBLESLASH:
921 return FloorDiv;
922 case PERCENT:
923 return Mod;
924 default:
925 return (operator_ty)0;
926 }
927 }
928
929 static const char * const FORBIDDEN[] = {
930 "None",
931 "True",
932 "False",
933 NULL,
934 };
935
936 static int
forbidden_name(struct compiling * c,identifier name,const node * n,int full_checks)937 forbidden_name(struct compiling *c, identifier name, const node *n,
938 int full_checks)
939 {
940 assert(PyUnicode_Check(name));
941 if (_PyUnicode_EqualToASCIIString(name, "__debug__")) {
942 ast_error(c, n, "assignment to keyword");
943 return 1;
944 }
945 if (full_checks) {
946 const char * const *p;
947 for (p = FORBIDDEN; *p; p++) {
948 if (_PyUnicode_EqualToASCIIString(name, *p)) {
949 ast_error(c, n, "assignment to keyword");
950 return 1;
951 }
952 }
953 }
954 return 0;
955 }
956
957 /* Set the context ctx for expr_ty e, recursively traversing e.
958
959 Only sets context for expr kinds that "can appear in assignment context"
960 (according to ../Parser/Python.asdl). For other expr kinds, it sets
961 an appropriate syntax error and returns false.
962 */
963
964 static int
set_context(struct compiling * c,expr_ty e,expr_context_ty ctx,const node * n)965 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
966 {
967 asdl_seq *s = NULL;
968 /* If a particular expression type can't be used for assign / delete,
969 set expr_name to its name and an error message will be generated.
970 */
971 const char* expr_name = NULL;
972
973 /* The ast defines augmented store and load contexts, but the
974 implementation here doesn't actually use them. The code may be
975 a little more complex than necessary as a result. It also means
976 that expressions in an augmented assignment have a Store context.
977 Consider restructuring so that augmented assignment uses
978 set_context(), too.
979 */
980 assert(ctx != AugStore && ctx != AugLoad);
981
982 switch (e->kind) {
983 case Attribute_kind:
984 e->v.Attribute.ctx = ctx;
985 if (ctx == Store && forbidden_name(c, e->v.Attribute.attr, n, 1))
986 return 0;
987 break;
988 case Subscript_kind:
989 e->v.Subscript.ctx = ctx;
990 break;
991 case Starred_kind:
992 e->v.Starred.ctx = ctx;
993 if (!set_context(c, e->v.Starred.value, ctx, n))
994 return 0;
995 break;
996 case Name_kind:
997 if (ctx == Store) {
998 if (forbidden_name(c, e->v.Name.id, n, 0))
999 return 0; /* forbidden_name() calls ast_error() */
1000 }
1001 e->v.Name.ctx = ctx;
1002 break;
1003 case List_kind:
1004 e->v.List.ctx = ctx;
1005 s = e->v.List.elts;
1006 break;
1007 case Tuple_kind:
1008 e->v.Tuple.ctx = ctx;
1009 s = e->v.Tuple.elts;
1010 break;
1011 case Lambda_kind:
1012 expr_name = "lambda";
1013 break;
1014 case Call_kind:
1015 expr_name = "function call";
1016 break;
1017 case BoolOp_kind:
1018 case BinOp_kind:
1019 case UnaryOp_kind:
1020 expr_name = "operator";
1021 break;
1022 case GeneratorExp_kind:
1023 expr_name = "generator expression";
1024 break;
1025 case Yield_kind:
1026 case YieldFrom_kind:
1027 expr_name = "yield expression";
1028 break;
1029 case Await_kind:
1030 expr_name = "await expression";
1031 break;
1032 case ListComp_kind:
1033 expr_name = "list comprehension";
1034 break;
1035 case SetComp_kind:
1036 expr_name = "set comprehension";
1037 break;
1038 case DictComp_kind:
1039 expr_name = "dict comprehension";
1040 break;
1041 case Dict_kind:
1042 case Set_kind:
1043 case Num_kind:
1044 case Str_kind:
1045 case Bytes_kind:
1046 case JoinedStr_kind:
1047 case FormattedValue_kind:
1048 expr_name = "literal";
1049 break;
1050 case NameConstant_kind:
1051 expr_name = "keyword";
1052 break;
1053 case Ellipsis_kind:
1054 expr_name = "Ellipsis";
1055 break;
1056 case Compare_kind:
1057 expr_name = "comparison";
1058 break;
1059 case IfExp_kind:
1060 expr_name = "conditional expression";
1061 break;
1062 default:
1063 PyErr_Format(PyExc_SystemError,
1064 "unexpected expression in assignment %d (line %d)",
1065 e->kind, e->lineno);
1066 return 0;
1067 }
1068 /* Check for error string set by switch */
1069 if (expr_name) {
1070 char buf[300];
1071 PyOS_snprintf(buf, sizeof(buf),
1072 "can't %s %s",
1073 ctx == Store ? "assign to" : "delete",
1074 expr_name);
1075 return ast_error(c, n, buf);
1076 }
1077
1078 /* If the LHS is a list or tuple, we need to set the assignment
1079 context for all the contained elements.
1080 */
1081 if (s) {
1082 int i;
1083
1084 for (i = 0; i < asdl_seq_LEN(s); i++) {
1085 if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
1086 return 0;
1087 }
1088 }
1089 return 1;
1090 }
1091
1092 static operator_ty
ast_for_augassign(struct compiling * c,const node * n)1093 ast_for_augassign(struct compiling *c, const node *n)
1094 {
1095 REQ(n, augassign);
1096 n = CHILD(n, 0);
1097 switch (STR(n)[0]) {
1098 case '+':
1099 return Add;
1100 case '-':
1101 return Sub;
1102 case '/':
1103 if (STR(n)[1] == '/')
1104 return FloorDiv;
1105 else
1106 return Div;
1107 case '%':
1108 return Mod;
1109 case '<':
1110 return LShift;
1111 case '>':
1112 return RShift;
1113 case '&':
1114 return BitAnd;
1115 case '^':
1116 return BitXor;
1117 case '|':
1118 return BitOr;
1119 case '*':
1120 if (STR(n)[1] == '*')
1121 return Pow;
1122 else
1123 return Mult;
1124 case '@':
1125 return MatMult;
1126 default:
1127 PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
1128 return (operator_ty)0;
1129 }
1130 }
1131
1132 static cmpop_ty
ast_for_comp_op(struct compiling * c,const node * n)1133 ast_for_comp_op(struct compiling *c, const node *n)
1134 {
1135 /* comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'
1136 |'is' 'not'
1137 */
1138 REQ(n, comp_op);
1139 if (NCH(n) == 1) {
1140 n = CHILD(n, 0);
1141 switch (TYPE(n)) {
1142 case LESS:
1143 return Lt;
1144 case GREATER:
1145 return Gt;
1146 case EQEQUAL: /* == */
1147 return Eq;
1148 case LESSEQUAL:
1149 return LtE;
1150 case GREATEREQUAL:
1151 return GtE;
1152 case NOTEQUAL:
1153 return NotEq;
1154 case NAME:
1155 if (strcmp(STR(n), "in") == 0)
1156 return In;
1157 if (strcmp(STR(n), "is") == 0)
1158 return Is;
1159 /* fall through */
1160 default:
1161 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
1162 STR(n));
1163 return (cmpop_ty)0;
1164 }
1165 }
1166 else if (NCH(n) == 2) {
1167 /* handle "not in" and "is not" */
1168 switch (TYPE(CHILD(n, 0))) {
1169 case NAME:
1170 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
1171 return NotIn;
1172 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
1173 return IsNot;
1174 /* fall through */
1175 default:
1176 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
1177 STR(CHILD(n, 0)), STR(CHILD(n, 1)));
1178 return (cmpop_ty)0;
1179 }
1180 }
1181 PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
1182 NCH(n));
1183 return (cmpop_ty)0;
1184 }
1185
1186 static asdl_seq *
seq_for_testlist(struct compiling * c,const node * n)1187 seq_for_testlist(struct compiling *c, const node *n)
1188 {
1189 /* testlist: test (',' test)* [',']
1190 testlist_star_expr: test|star_expr (',' test|star_expr)* [',']
1191 */
1192 asdl_seq *seq;
1193 expr_ty expression;
1194 int i;
1195 assert(TYPE(n) == testlist || TYPE(n) == testlist_star_expr || TYPE(n) == testlist_comp);
1196
1197 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1198 if (!seq)
1199 return NULL;
1200
1201 for (i = 0; i < NCH(n); i += 2) {
1202 const node *ch = CHILD(n, i);
1203 assert(TYPE(ch) == test || TYPE(ch) == test_nocond || TYPE(ch) == star_expr);
1204
1205 expression = ast_for_expr(c, ch);
1206 if (!expression)
1207 return NULL;
1208
1209 assert(i / 2 < seq->size);
1210 asdl_seq_SET(seq, i / 2, expression);
1211 }
1212 return seq;
1213 }
1214
1215 static arg_ty
ast_for_arg(struct compiling * c,const node * n)1216 ast_for_arg(struct compiling *c, const node *n)
1217 {
1218 identifier name;
1219 expr_ty annotation = NULL;
1220 node *ch;
1221 arg_ty ret;
1222
1223 assert(TYPE(n) == tfpdef || TYPE(n) == vfpdef);
1224 ch = CHILD(n, 0);
1225 name = NEW_IDENTIFIER(ch);
1226 if (!name)
1227 return NULL;
1228 if (forbidden_name(c, name, ch, 0))
1229 return NULL;
1230
1231 if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) {
1232 annotation = ast_for_expr(c, CHILD(n, 2));
1233 if (!annotation)
1234 return NULL;
1235 }
1236
1237 ret = arg(name, annotation, LINENO(n), n->n_col_offset, c->c_arena);
1238 if (!ret)
1239 return NULL;
1240 return ret;
1241 }
1242
1243 /* returns -1 if failed to handle keyword only arguments
1244 returns new position to keep processing if successful
1245 (',' tfpdef ['=' test])*
1246 ^^^
1247 start pointing here
1248 */
1249 static int
handle_keywordonly_args(struct compiling * c,const node * n,int start,asdl_seq * kwonlyargs,asdl_seq * kwdefaults)1250 handle_keywordonly_args(struct compiling *c, const node *n, int start,
1251 asdl_seq *kwonlyargs, asdl_seq *kwdefaults)
1252 {
1253 PyObject *argname;
1254 node *ch;
1255 expr_ty expression, annotation;
1256 arg_ty arg;
1257 int i = start;
1258 int j = 0; /* index for kwdefaults and kwonlyargs */
1259
1260 if (kwonlyargs == NULL) {
1261 ast_error(c, CHILD(n, start), "named arguments must follow bare *");
1262 return -1;
1263 }
1264 assert(kwdefaults != NULL);
1265 while (i < NCH(n)) {
1266 ch = CHILD(n, i);
1267 switch (TYPE(ch)) {
1268 case vfpdef:
1269 case tfpdef:
1270 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1271 expression = ast_for_expr(c, CHILD(n, i + 2));
1272 if (!expression)
1273 goto error;
1274 asdl_seq_SET(kwdefaults, j, expression);
1275 i += 2; /* '=' and test */
1276 }
1277 else { /* setting NULL if no default value exists */
1278 asdl_seq_SET(kwdefaults, j, NULL);
1279 }
1280 if (NCH(ch) == 3) {
1281 /* ch is NAME ':' test */
1282 annotation = ast_for_expr(c, CHILD(ch, 2));
1283 if (!annotation)
1284 goto error;
1285 }
1286 else {
1287 annotation = NULL;
1288 }
1289 ch = CHILD(ch, 0);
1290 argname = NEW_IDENTIFIER(ch);
1291 if (!argname)
1292 goto error;
1293 if (forbidden_name(c, argname, ch, 0))
1294 goto error;
1295 arg = arg(argname, annotation, LINENO(ch), ch->n_col_offset,
1296 c->c_arena);
1297 if (!arg)
1298 goto error;
1299 asdl_seq_SET(kwonlyargs, j++, arg);
1300 i += 2; /* the name and the comma */
1301 break;
1302 case DOUBLESTAR:
1303 return i;
1304 default:
1305 ast_error(c, ch, "unexpected node");
1306 goto error;
1307 }
1308 }
1309 return i;
1310 error:
1311 return -1;
1312 }
1313
1314 /* Create AST for argument list. */
1315
1316 static arguments_ty
ast_for_arguments(struct compiling * c,const node * n)1317 ast_for_arguments(struct compiling *c, const node *n)
1318 {
1319 /* This function handles both typedargslist (function definition)
1320 and varargslist (lambda definition).
1321
1322 parameters: '(' [typedargslist] ')'
1323 typedargslist: (tfpdef ['=' test] (',' tfpdef ['=' test])* [',' [
1324 '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
1325 | '**' tfpdef [',']]]
1326 | '*' [tfpdef] (',' tfpdef ['=' test])* [',' ['**' tfpdef [',']]]
1327 | '**' tfpdef [','])
1328 tfpdef: NAME [':' test]
1329 varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [',' [
1330 '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
1331 | '**' vfpdef [',']]]
1332 | '*' [vfpdef] (',' vfpdef ['=' test])* [',' ['**' vfpdef [',']]]
1333 | '**' vfpdef [',']
1334 )
1335 vfpdef: NAME
1336
1337 */
1338 int i, j, k, nposargs = 0, nkwonlyargs = 0;
1339 int nposdefaults = 0, found_default = 0;
1340 asdl_seq *posargs, *posdefaults, *kwonlyargs, *kwdefaults;
1341 arg_ty vararg = NULL, kwarg = NULL;
1342 arg_ty arg;
1343 node *ch;
1344
1345 if (TYPE(n) == parameters) {
1346 if (NCH(n) == 2) /* () as argument list */
1347 return arguments(NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1348 n = CHILD(n, 1);
1349 }
1350 assert(TYPE(n) == typedargslist || TYPE(n) == varargslist);
1351
1352 /* First count the number of positional args & defaults. The
1353 variable i is the loop index for this for loop and the next.
1354 The next loop picks up where the first leaves off.
1355 */
1356 for (i = 0; i < NCH(n); i++) {
1357 ch = CHILD(n, i);
1358 if (TYPE(ch) == STAR) {
1359 /* skip star */
1360 i++;
1361 if (i < NCH(n) && /* skip argument following star */
1362 (TYPE(CHILD(n, i)) == tfpdef ||
1363 TYPE(CHILD(n, i)) == vfpdef)) {
1364 i++;
1365 }
1366 break;
1367 }
1368 if (TYPE(ch) == DOUBLESTAR) break;
1369 if (TYPE(ch) == vfpdef || TYPE(ch) == tfpdef) nposargs++;
1370 if (TYPE(ch) == EQUAL) nposdefaults++;
1371 }
1372 /* count the number of keyword only args &
1373 defaults for keyword only args */
1374 for ( ; i < NCH(n); ++i) {
1375 ch = CHILD(n, i);
1376 if (TYPE(ch) == DOUBLESTAR) break;
1377 if (TYPE(ch) == tfpdef || TYPE(ch) == vfpdef) nkwonlyargs++;
1378 }
1379 posargs = (nposargs ? _Py_asdl_seq_new(nposargs, c->c_arena) : NULL);
1380 if (!posargs && nposargs)
1381 return NULL;
1382 kwonlyargs = (nkwonlyargs ?
1383 _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1384 if (!kwonlyargs && nkwonlyargs)
1385 return NULL;
1386 posdefaults = (nposdefaults ?
1387 _Py_asdl_seq_new(nposdefaults, c->c_arena) : NULL);
1388 if (!posdefaults && nposdefaults)
1389 return NULL;
1390 /* The length of kwonlyargs and kwdefaults are same
1391 since we set NULL as default for keyword only argument w/o default
1392 - we have sequence data structure, but no dictionary */
1393 kwdefaults = (nkwonlyargs ?
1394 _Py_asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
1395 if (!kwdefaults && nkwonlyargs)
1396 return NULL;
1397
1398 /* tfpdef: NAME [':' test]
1399 vfpdef: NAME
1400 */
1401 i = 0;
1402 j = 0; /* index for defaults */
1403 k = 0; /* index for args */
1404 while (i < NCH(n)) {
1405 ch = CHILD(n, i);
1406 switch (TYPE(ch)) {
1407 case tfpdef:
1408 case vfpdef:
1409 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
1410 anything other than EQUAL or a comma? */
1411 /* XXX Should NCH(n) check be made a separate check? */
1412 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
1413 expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
1414 if (!expression)
1415 return NULL;
1416 assert(posdefaults != NULL);
1417 asdl_seq_SET(posdefaults, j++, expression);
1418 i += 2;
1419 found_default = 1;
1420 }
1421 else if (found_default) {
1422 ast_error(c, n,
1423 "non-default argument follows default argument");
1424 return NULL;
1425 }
1426 arg = ast_for_arg(c, ch);
1427 if (!arg)
1428 return NULL;
1429 asdl_seq_SET(posargs, k++, arg);
1430 i += 2; /* the name and the comma */
1431 break;
1432 case STAR:
1433 if (i+1 >= NCH(n) ||
1434 (i+2 == NCH(n) && TYPE(CHILD(n, i+1)) == COMMA)) {
1435 ast_error(c, CHILD(n, i),
1436 "named arguments must follow bare *");
1437 return NULL;
1438 }
1439 ch = CHILD(n, i+1); /* tfpdef or COMMA */
1440 if (TYPE(ch) == COMMA) {
1441 int res = 0;
1442 i += 2; /* now follows keyword only arguments */
1443 res = handle_keywordonly_args(c, n, i,
1444 kwonlyargs, kwdefaults);
1445 if (res == -1) return NULL;
1446 i = res; /* res has new position to process */
1447 }
1448 else {
1449 vararg = ast_for_arg(c, ch);
1450 if (!vararg)
1451 return NULL;
1452
1453 i += 3;
1454 if (i < NCH(n) && (TYPE(CHILD(n, i)) == tfpdef
1455 || TYPE(CHILD(n, i)) == vfpdef)) {
1456 int res = 0;
1457 res = handle_keywordonly_args(c, n, i,
1458 kwonlyargs, kwdefaults);
1459 if (res == -1) return NULL;
1460 i = res; /* res has new position to process */
1461 }
1462 }
1463 break;
1464 case DOUBLESTAR:
1465 ch = CHILD(n, i+1); /* tfpdef */
1466 assert(TYPE(ch) == tfpdef || TYPE(ch) == vfpdef);
1467 kwarg = ast_for_arg(c, ch);
1468 if (!kwarg)
1469 return NULL;
1470 i += 3;
1471 break;
1472 default:
1473 PyErr_Format(PyExc_SystemError,
1474 "unexpected node in varargslist: %d @ %d",
1475 TYPE(ch), i);
1476 return NULL;
1477 }
1478 }
1479 return arguments(posargs, vararg, kwonlyargs, kwdefaults, kwarg, posdefaults, c->c_arena);
1480 }
1481
1482 static expr_ty
ast_for_dotted_name(struct compiling * c,const node * n)1483 ast_for_dotted_name(struct compiling *c, const node *n)
1484 {
1485 expr_ty e;
1486 identifier id;
1487 int lineno, col_offset;
1488 int i;
1489
1490 REQ(n, dotted_name);
1491
1492 lineno = LINENO(n);
1493 col_offset = n->n_col_offset;
1494
1495 id = NEW_IDENTIFIER(CHILD(n, 0));
1496 if (!id)
1497 return NULL;
1498 e = Name(id, Load, lineno, col_offset, c->c_arena);
1499 if (!e)
1500 return NULL;
1501
1502 for (i = 2; i < NCH(n); i+=2) {
1503 id = NEW_IDENTIFIER(CHILD(n, i));
1504 if (!id)
1505 return NULL;
1506 e = Attribute(e, id, Load, lineno, col_offset, c->c_arena);
1507 if (!e)
1508 return NULL;
1509 }
1510
1511 return e;
1512 }
1513
1514 static expr_ty
ast_for_decorator(struct compiling * c,const node * n)1515 ast_for_decorator(struct compiling *c, const node *n)
1516 {
1517 /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
1518 expr_ty d = NULL;
1519 expr_ty name_expr;
1520
1521 REQ(n, decorator);
1522 REQ(CHILD(n, 0), AT);
1523 REQ(RCHILD(n, -1), NEWLINE);
1524
1525 name_expr = ast_for_dotted_name(c, CHILD(n, 1));
1526 if (!name_expr)
1527 return NULL;
1528
1529 if (NCH(n) == 3) { /* No arguments */
1530 d = name_expr;
1531 name_expr = NULL;
1532 }
1533 else if (NCH(n) == 5) { /* Call with no arguments */
1534 d = Call(name_expr, NULL, NULL, LINENO(n),
1535 n->n_col_offset, c->c_arena);
1536 if (!d)
1537 return NULL;
1538 name_expr = NULL;
1539 }
1540 else {
1541 d = ast_for_call(c, CHILD(n, 3), name_expr, true);
1542 if (!d)
1543 return NULL;
1544 name_expr = NULL;
1545 }
1546
1547 return d;
1548 }
1549
1550 static asdl_seq*
ast_for_decorators(struct compiling * c,const node * n)1551 ast_for_decorators(struct compiling *c, const node *n)
1552 {
1553 asdl_seq* decorator_seq;
1554 expr_ty d;
1555 int i;
1556
1557 REQ(n, decorators);
1558 decorator_seq = _Py_asdl_seq_new(NCH(n), c->c_arena);
1559 if (!decorator_seq)
1560 return NULL;
1561
1562 for (i = 0; i < NCH(n); i++) {
1563 d = ast_for_decorator(c, CHILD(n, i));
1564 if (!d)
1565 return NULL;
1566 asdl_seq_SET(decorator_seq, i, d);
1567 }
1568 return decorator_seq;
1569 }
1570
1571 static stmt_ty
ast_for_funcdef_impl(struct compiling * c,const node * n0,asdl_seq * decorator_seq,bool is_async)1572 ast_for_funcdef_impl(struct compiling *c, const node *n0,
1573 asdl_seq *decorator_seq, bool is_async)
1574 {
1575 /* funcdef: 'def' NAME parameters ['->' test] ':' suite */
1576 const node * const n = is_async ? CHILD(n0, 1) : n0;
1577 identifier name;
1578 arguments_ty args;
1579 asdl_seq *body;
1580 expr_ty returns = NULL;
1581 int name_i = 1;
1582
1583 REQ(n, funcdef);
1584
1585 name = NEW_IDENTIFIER(CHILD(n, name_i));
1586 if (!name)
1587 return NULL;
1588 if (forbidden_name(c, name, CHILD(n, name_i), 0))
1589 return NULL;
1590 args = ast_for_arguments(c, CHILD(n, name_i + 1));
1591 if (!args)
1592 return NULL;
1593 if (TYPE(CHILD(n, name_i+2)) == RARROW) {
1594 returns = ast_for_expr(c, CHILD(n, name_i + 3));
1595 if (!returns)
1596 return NULL;
1597 name_i += 2;
1598 }
1599 body = ast_for_suite(c, CHILD(n, name_i + 3));
1600 if (!body)
1601 return NULL;
1602
1603 if (is_async)
1604 return AsyncFunctionDef(name, args, body, decorator_seq, returns,
1605 LINENO(n0), n0->n_col_offset, c->c_arena);
1606 else
1607 return FunctionDef(name, args, body, decorator_seq, returns,
1608 LINENO(n), n->n_col_offset, c->c_arena);
1609 }
1610
1611 static stmt_ty
ast_for_async_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1612 ast_for_async_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1613 {
1614 /* async_funcdef: 'async' funcdef */
1615 REQ(n, async_funcdef);
1616 REQ(CHILD(n, 0), NAME);
1617 assert(strcmp(STR(CHILD(n, 0)), "async") == 0);
1618 REQ(CHILD(n, 1), funcdef);
1619
1620 return ast_for_funcdef_impl(c, n, decorator_seq,
1621 true /* is_async */);
1622 }
1623
1624 static stmt_ty
ast_for_funcdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)1625 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
1626 {
1627 /* funcdef: 'def' NAME parameters ['->' test] ':' suite */
1628 return ast_for_funcdef_impl(c, n, decorator_seq,
1629 false /* is_async */);
1630 }
1631
1632
1633 static stmt_ty
ast_for_async_stmt(struct compiling * c,const node * n)1634 ast_for_async_stmt(struct compiling *c, const node *n)
1635 {
1636 /* async_stmt: 'async' (funcdef | with_stmt | for_stmt) */
1637 REQ(n, async_stmt);
1638 REQ(CHILD(n, 0), NAME);
1639 assert(strcmp(STR(CHILD(n, 0)), "async") == 0);
1640
1641 switch (TYPE(CHILD(n, 1))) {
1642 case funcdef:
1643 return ast_for_funcdef_impl(c, n, NULL,
1644 true /* is_async */);
1645 case with_stmt:
1646 return ast_for_with_stmt(c, n,
1647 true /* is_async */);
1648
1649 case for_stmt:
1650 return ast_for_for_stmt(c, n,
1651 true /* is_async */);
1652
1653 default:
1654 PyErr_Format(PyExc_SystemError,
1655 "invalid async stament: %s",
1656 STR(CHILD(n, 1)));
1657 return NULL;
1658 }
1659 }
1660
1661 static stmt_ty
ast_for_decorated(struct compiling * c,const node * n)1662 ast_for_decorated(struct compiling *c, const node *n)
1663 {
1664 /* decorated: decorators (classdef | funcdef | async_funcdef) */
1665 stmt_ty thing = NULL;
1666 asdl_seq *decorator_seq = NULL;
1667
1668 REQ(n, decorated);
1669
1670 decorator_seq = ast_for_decorators(c, CHILD(n, 0));
1671 if (!decorator_seq)
1672 return NULL;
1673
1674 assert(TYPE(CHILD(n, 1)) == funcdef ||
1675 TYPE(CHILD(n, 1)) == async_funcdef ||
1676 TYPE(CHILD(n, 1)) == classdef);
1677
1678 if (TYPE(CHILD(n, 1)) == funcdef) {
1679 thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
1680 } else if (TYPE(CHILD(n, 1)) == classdef) {
1681 thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
1682 } else if (TYPE(CHILD(n, 1)) == async_funcdef) {
1683 thing = ast_for_async_funcdef(c, CHILD(n, 1), decorator_seq);
1684 }
1685 /* we count the decorators in when talking about the class' or
1686 * function's line number */
1687 if (thing) {
1688 thing->lineno = LINENO(n);
1689 thing->col_offset = n->n_col_offset;
1690 }
1691 return thing;
1692 }
1693
1694 static expr_ty
ast_for_lambdef(struct compiling * c,const node * n)1695 ast_for_lambdef(struct compiling *c, const node *n)
1696 {
1697 /* lambdef: 'lambda' [varargslist] ':' test
1698 lambdef_nocond: 'lambda' [varargslist] ':' test_nocond */
1699 arguments_ty args;
1700 expr_ty expression;
1701
1702 if (NCH(n) == 3) {
1703 args = arguments(NULL, NULL, NULL, NULL, NULL, NULL, c->c_arena);
1704 if (!args)
1705 return NULL;
1706 expression = ast_for_expr(c, CHILD(n, 2));
1707 if (!expression)
1708 return NULL;
1709 }
1710 else {
1711 args = ast_for_arguments(c, CHILD(n, 1));
1712 if (!args)
1713 return NULL;
1714 expression = ast_for_expr(c, CHILD(n, 3));
1715 if (!expression)
1716 return NULL;
1717 }
1718
1719 return Lambda(args, expression, LINENO(n), n->n_col_offset, c->c_arena);
1720 }
1721
1722 static expr_ty
ast_for_ifexpr(struct compiling * c,const node * n)1723 ast_for_ifexpr(struct compiling *c, const node *n)
1724 {
1725 /* test: or_test 'if' or_test 'else' test */
1726 expr_ty expression, body, orelse;
1727
1728 assert(NCH(n) == 5);
1729 body = ast_for_expr(c, CHILD(n, 0));
1730 if (!body)
1731 return NULL;
1732 expression = ast_for_expr(c, CHILD(n, 2));
1733 if (!expression)
1734 return NULL;
1735 orelse = ast_for_expr(c, CHILD(n, 4));
1736 if (!orelse)
1737 return NULL;
1738 return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
1739 c->c_arena);
1740 }
1741
1742 /*
1743 Count the number of 'for' loops in a comprehension.
1744
1745 Helper for ast_for_comprehension().
1746 */
1747
1748 static int
count_comp_fors(struct compiling * c,const node * n)1749 count_comp_fors(struct compiling *c, const node *n)
1750 {
1751 int n_fors = 0;
1752
1753 count_comp_for:
1754 n_fors++;
1755 REQ(n, comp_for);
1756 if (NCH(n) == 2) {
1757 REQ(CHILD(n, 0), NAME);
1758 assert(strcmp(STR(CHILD(n, 0)), "async") == 0);
1759 n = CHILD(n, 1);
1760 }
1761 else if (NCH(n) == 1) {
1762 n = CHILD(n, 0);
1763 }
1764 else {
1765 goto error;
1766 }
1767 if (NCH(n) == (5)) {
1768 n = CHILD(n, 4);
1769 }
1770 else {
1771 return n_fors;
1772 }
1773 count_comp_iter:
1774 REQ(n, comp_iter);
1775 n = CHILD(n, 0);
1776 if (TYPE(n) == comp_for)
1777 goto count_comp_for;
1778 else if (TYPE(n) == comp_if) {
1779 if (NCH(n) == 3) {
1780 n = CHILD(n, 2);
1781 goto count_comp_iter;
1782 }
1783 else
1784 return n_fors;
1785 }
1786
1787 error:
1788 /* Should never be reached */
1789 PyErr_SetString(PyExc_SystemError,
1790 "logic error in count_comp_fors");
1791 return -1;
1792 }
1793
1794 /* Count the number of 'if' statements in a comprehension.
1795
1796 Helper for ast_for_comprehension().
1797 */
1798
1799 static int
count_comp_ifs(struct compiling * c,const node * n)1800 count_comp_ifs(struct compiling *c, const node *n)
1801 {
1802 int n_ifs = 0;
1803
1804 while (1) {
1805 REQ(n, comp_iter);
1806 if (TYPE(CHILD(n, 0)) == comp_for)
1807 return n_ifs;
1808 n = CHILD(n, 0);
1809 REQ(n, comp_if);
1810 n_ifs++;
1811 if (NCH(n) == 2)
1812 return n_ifs;
1813 n = CHILD(n, 2);
1814 }
1815 }
1816
1817 static asdl_seq *
ast_for_comprehension(struct compiling * c,const node * n)1818 ast_for_comprehension(struct compiling *c, const node *n)
1819 {
1820 int i, n_fors;
1821 asdl_seq *comps;
1822
1823 n_fors = count_comp_fors(c, n);
1824 if (n_fors == -1)
1825 return NULL;
1826
1827 comps = _Py_asdl_seq_new(n_fors, c->c_arena);
1828 if (!comps)
1829 return NULL;
1830
1831 for (i = 0; i < n_fors; i++) {
1832 comprehension_ty comp;
1833 asdl_seq *t;
1834 expr_ty expression, first;
1835 node *for_ch;
1836 node *sync_n;
1837 int is_async = 0;
1838
1839 REQ(n, comp_for);
1840
1841 if (NCH(n) == 2) {
1842 is_async = 1;
1843 REQ(CHILD(n, 0), NAME);
1844 assert(strcmp(STR(CHILD(n, 0)), "async") == 0);
1845 sync_n = CHILD(n, 1);
1846 }
1847 else {
1848 sync_n = CHILD(n, 0);
1849 }
1850 REQ(sync_n, sync_comp_for);
1851
1852 for_ch = CHILD(sync_n, 1);
1853 t = ast_for_exprlist(c, for_ch, Store);
1854 if (!t)
1855 return NULL;
1856 expression = ast_for_expr(c, CHILD(sync_n, 3));
1857 if (!expression)
1858 return NULL;
1859
1860 /* Check the # of children rather than the length of t, since
1861 (x for x, in ...) has 1 element in t, but still requires a Tuple. */
1862 first = (expr_ty)asdl_seq_GET(t, 0);
1863 if (NCH(for_ch) == 1)
1864 comp = comprehension(first, expression, NULL,
1865 is_async, c->c_arena);
1866 else
1867 comp = comprehension(Tuple(t, Store, first->lineno,
1868 first->col_offset, c->c_arena),
1869 expression, NULL, is_async, c->c_arena);
1870 if (!comp)
1871 return NULL;
1872
1873 if (NCH(sync_n) == 5) {
1874 int j, n_ifs;
1875 asdl_seq *ifs;
1876
1877 n = CHILD(sync_n, 4);
1878 n_ifs = count_comp_ifs(c, n);
1879 if (n_ifs == -1)
1880 return NULL;
1881
1882 ifs = _Py_asdl_seq_new(n_ifs, c->c_arena);
1883 if (!ifs)
1884 return NULL;
1885
1886 for (j = 0; j < n_ifs; j++) {
1887 REQ(n, comp_iter);
1888 n = CHILD(n, 0);
1889 REQ(n, comp_if);
1890
1891 expression = ast_for_expr(c, CHILD(n, 1));
1892 if (!expression)
1893 return NULL;
1894 asdl_seq_SET(ifs, j, expression);
1895 if (NCH(n) == 3)
1896 n = CHILD(n, 2);
1897 }
1898 /* on exit, must guarantee that n is a comp_for */
1899 if (TYPE(n) == comp_iter)
1900 n = CHILD(n, 0);
1901 comp->ifs = ifs;
1902 }
1903 asdl_seq_SET(comps, i, comp);
1904 }
1905 return comps;
1906 }
1907
1908 static expr_ty
ast_for_itercomp(struct compiling * c,const node * n,int type)1909 ast_for_itercomp(struct compiling *c, const node *n, int type)
1910 {
1911 /* testlist_comp: (test|star_expr)
1912 * ( comp_for | (',' (test|star_expr))* [','] ) */
1913 expr_ty elt;
1914 asdl_seq *comps;
1915 node *ch;
1916
1917 assert(NCH(n) > 1);
1918
1919 ch = CHILD(n, 0);
1920 elt = ast_for_expr(c, ch);
1921 if (!elt)
1922 return NULL;
1923 if (elt->kind == Starred_kind) {
1924 ast_error(c, ch, "iterable unpacking cannot be used in comprehension");
1925 return NULL;
1926 }
1927
1928 comps = ast_for_comprehension(c, CHILD(n, 1));
1929 if (!comps)
1930 return NULL;
1931
1932 if (type == COMP_GENEXP)
1933 return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1934 else if (type == COMP_LISTCOMP)
1935 return ListComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1936 else if (type == COMP_SETCOMP)
1937 return SetComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1938 else
1939 /* Should never happen */
1940 return NULL;
1941 }
1942
1943 /* Fills in the key, value pair corresponding to the dict element. In case
1944 * of an unpacking, key is NULL. *i is advanced by the number of ast
1945 * elements. Iff successful, nonzero is returned.
1946 */
1947 static int
ast_for_dictelement(struct compiling * c,const node * n,int * i,expr_ty * key,expr_ty * value)1948 ast_for_dictelement(struct compiling *c, const node *n, int *i,
1949 expr_ty *key, expr_ty *value)
1950 {
1951 expr_ty expression;
1952 if (TYPE(CHILD(n, *i)) == DOUBLESTAR) {
1953 assert(NCH(n) - *i >= 2);
1954
1955 expression = ast_for_expr(c, CHILD(n, *i + 1));
1956 if (!expression)
1957 return 0;
1958 *key = NULL;
1959 *value = expression;
1960
1961 *i += 2;
1962 }
1963 else {
1964 assert(NCH(n) - *i >= 3);
1965
1966 expression = ast_for_expr(c, CHILD(n, *i));
1967 if (!expression)
1968 return 0;
1969 *key = expression;
1970
1971 REQ(CHILD(n, *i + 1), COLON);
1972
1973 expression = ast_for_expr(c, CHILD(n, *i + 2));
1974 if (!expression)
1975 return 0;
1976 *value = expression;
1977
1978 *i += 3;
1979 }
1980 return 1;
1981 }
1982
1983 static expr_ty
ast_for_dictcomp(struct compiling * c,const node * n)1984 ast_for_dictcomp(struct compiling *c, const node *n)
1985 {
1986 expr_ty key, value;
1987 asdl_seq *comps;
1988 int i = 0;
1989
1990 if (!ast_for_dictelement(c, n, &i, &key, &value))
1991 return NULL;
1992 assert(key);
1993 assert(NCH(n) - i >= 1);
1994
1995 comps = ast_for_comprehension(c, CHILD(n, i));
1996 if (!comps)
1997 return NULL;
1998
1999 return DictComp(key, value, comps, LINENO(n), n->n_col_offset, c->c_arena);
2000 }
2001
2002 static expr_ty
ast_for_dictdisplay(struct compiling * c,const node * n)2003 ast_for_dictdisplay(struct compiling *c, const node *n)
2004 {
2005 int i;
2006 int j;
2007 int size;
2008 asdl_seq *keys, *values;
2009
2010 size = (NCH(n) + 1) / 3; /* +1 in case no trailing comma */
2011 keys = _Py_asdl_seq_new(size, c->c_arena);
2012 if (!keys)
2013 return NULL;
2014
2015 values = _Py_asdl_seq_new(size, c->c_arena);
2016 if (!values)
2017 return NULL;
2018
2019 j = 0;
2020 for (i = 0; i < NCH(n); i++) {
2021 expr_ty key, value;
2022
2023 if (!ast_for_dictelement(c, n, &i, &key, &value))
2024 return NULL;
2025 asdl_seq_SET(keys, j, key);
2026 asdl_seq_SET(values, j, value);
2027
2028 j++;
2029 }
2030 keys->size = j;
2031 values->size = j;
2032 return Dict(keys, values, LINENO(n), n->n_col_offset, c->c_arena);
2033 }
2034
2035 static expr_ty
ast_for_genexp(struct compiling * c,const node * n)2036 ast_for_genexp(struct compiling *c, const node *n)
2037 {
2038 assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
2039 return ast_for_itercomp(c, n, COMP_GENEXP);
2040 }
2041
2042 static expr_ty
ast_for_listcomp(struct compiling * c,const node * n)2043 ast_for_listcomp(struct compiling *c, const node *n)
2044 {
2045 assert(TYPE(n) == (testlist_comp));
2046 return ast_for_itercomp(c, n, COMP_LISTCOMP);
2047 }
2048
2049 static expr_ty
ast_for_setcomp(struct compiling * c,const node * n)2050 ast_for_setcomp(struct compiling *c, const node *n)
2051 {
2052 assert(TYPE(n) == (dictorsetmaker));
2053 return ast_for_itercomp(c, n, COMP_SETCOMP);
2054 }
2055
2056 static expr_ty
ast_for_setdisplay(struct compiling * c,const node * n)2057 ast_for_setdisplay(struct compiling *c, const node *n)
2058 {
2059 int i;
2060 int size;
2061 asdl_seq *elts;
2062
2063 assert(TYPE(n) == (dictorsetmaker));
2064 size = (NCH(n) + 1) / 2; /* +1 in case no trailing comma */
2065 elts = _Py_asdl_seq_new(size, c->c_arena);
2066 if (!elts)
2067 return NULL;
2068 for (i = 0; i < NCH(n); i += 2) {
2069 expr_ty expression;
2070 expression = ast_for_expr(c, CHILD(n, i));
2071 if (!expression)
2072 return NULL;
2073 asdl_seq_SET(elts, i / 2, expression);
2074 }
2075 return Set(elts, LINENO(n), n->n_col_offset, c->c_arena);
2076 }
2077
2078 static expr_ty
ast_for_atom(struct compiling * c,const node * n)2079 ast_for_atom(struct compiling *c, const node *n)
2080 {
2081 /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']'
2082 | '{' [dictmaker|testlist_comp] '}' | NAME | NUMBER | STRING+
2083 | '...' | 'None' | 'True' | 'False'
2084 */
2085 node *ch = CHILD(n, 0);
2086
2087 switch (TYPE(ch)) {
2088 case NAME: {
2089 PyObject *name;
2090 const char *s = STR(ch);
2091 size_t len = strlen(s);
2092 if (len >= 4 && len <= 5) {
2093 if (!strcmp(s, "None"))
2094 return NameConstant(Py_None, LINENO(n), n->n_col_offset, c->c_arena);
2095 if (!strcmp(s, "True"))
2096 return NameConstant(Py_True, LINENO(n), n->n_col_offset, c->c_arena);
2097 if (!strcmp(s, "False"))
2098 return NameConstant(Py_False, LINENO(n), n->n_col_offset, c->c_arena);
2099 }
2100 name = new_identifier(s, c);
2101 if (!name)
2102 return NULL;
2103 /* All names start in Load context, but may later be changed. */
2104 return Name(name, Load, LINENO(n), n->n_col_offset, c->c_arena);
2105 }
2106 case STRING: {
2107 expr_ty str = parsestrplus(c, n);
2108 if (!str) {
2109 const char *errtype = NULL;
2110 if (PyErr_ExceptionMatches(PyExc_UnicodeError))
2111 errtype = "unicode error";
2112 else if (PyErr_ExceptionMatches(PyExc_ValueError))
2113 errtype = "value error";
2114 if (errtype) {
2115 char buf[128];
2116 const char *s = NULL;
2117 PyObject *type, *value, *tback, *errstr;
2118 PyErr_Fetch(&type, &value, &tback);
2119 errstr = PyObject_Str(value);
2120 if (errstr)
2121 s = PyUnicode_AsUTF8(errstr);
2122 if (s) {
2123 PyOS_snprintf(buf, sizeof(buf), "(%s) %s", errtype, s);
2124 } else {
2125 PyErr_Clear();
2126 PyOS_snprintf(buf, sizeof(buf), "(%s) unknown error", errtype);
2127 }
2128 Py_XDECREF(errstr);
2129 ast_error(c, n, buf);
2130 Py_DECREF(type);
2131 Py_XDECREF(value);
2132 Py_XDECREF(tback);
2133 }
2134 return NULL;
2135 }
2136 return str;
2137 }
2138 case NUMBER: {
2139 PyObject *pynum = parsenumber(c, STR(ch));
2140 if (!pynum)
2141 return NULL;
2142
2143 if (PyArena_AddPyObject(c->c_arena, pynum) < 0) {
2144 Py_DECREF(pynum);
2145 return NULL;
2146 }
2147 return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
2148 }
2149 case ELLIPSIS: /* Ellipsis */
2150 return Ellipsis(LINENO(n), n->n_col_offset, c->c_arena);
2151 case LPAR: /* some parenthesized expressions */
2152 ch = CHILD(n, 1);
2153
2154 if (TYPE(ch) == RPAR)
2155 return Tuple(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
2156
2157 if (TYPE(ch) == yield_expr)
2158 return ast_for_expr(c, ch);
2159
2160 /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
2161 if ((NCH(ch) > 1) && (TYPE(CHILD(ch, 1)) == comp_for))
2162 return ast_for_genexp(c, ch);
2163
2164 return ast_for_testlist(c, ch);
2165 case LSQB: /* list (or list comprehension) */
2166 ch = CHILD(n, 1);
2167
2168 if (TYPE(ch) == RSQB)
2169 return List(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
2170
2171 REQ(ch, testlist_comp);
2172 if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
2173 asdl_seq *elts = seq_for_testlist(c, ch);
2174 if (!elts)
2175 return NULL;
2176
2177 return List(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
2178 }
2179 else
2180 return ast_for_listcomp(c, ch);
2181 case LBRACE: {
2182 /* dictorsetmaker: ( ((test ':' test | '**' test)
2183 * (comp_for | (',' (test ':' test | '**' test))* [','])) |
2184 * ((test | '*' test)
2185 * (comp_for | (',' (test | '*' test))* [','])) ) */
2186 expr_ty res;
2187 ch = CHILD(n, 1);
2188 if (TYPE(ch) == RBRACE) {
2189 /* It's an empty dict. */
2190 return Dict(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2191 }
2192 else {
2193 int is_dict = (TYPE(CHILD(ch, 0)) == DOUBLESTAR);
2194 if (NCH(ch) == 1 ||
2195 (NCH(ch) > 1 &&
2196 TYPE(CHILD(ch, 1)) == COMMA)) {
2197 /* It's a set display. */
2198 res = ast_for_setdisplay(c, ch);
2199 }
2200 else if (NCH(ch) > 1 &&
2201 TYPE(CHILD(ch, 1)) == comp_for) {
2202 /* It's a set comprehension. */
2203 res = ast_for_setcomp(c, ch);
2204 }
2205 else if (NCH(ch) > 3 - is_dict &&
2206 TYPE(CHILD(ch, 3 - is_dict)) == comp_for) {
2207 /* It's a dictionary comprehension. */
2208 if (is_dict) {
2209 ast_error(c, n, "dict unpacking cannot be used in "
2210 "dict comprehension");
2211 return NULL;
2212 }
2213 res = ast_for_dictcomp(c, ch);
2214 }
2215 else {
2216 /* It's a dictionary display. */
2217 res = ast_for_dictdisplay(c, ch);
2218 }
2219 if (res) {
2220 res->lineno = LINENO(n);
2221 res->col_offset = n->n_col_offset;
2222 }
2223 return res;
2224 }
2225 }
2226 default:
2227 PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
2228 return NULL;
2229 }
2230 }
2231
2232 static slice_ty
ast_for_slice(struct compiling * c,const node * n)2233 ast_for_slice(struct compiling *c, const node *n)
2234 {
2235 node *ch;
2236 expr_ty lower = NULL, upper = NULL, step = NULL;
2237
2238 REQ(n, subscript);
2239
2240 /*
2241 subscript: test | [test] ':' [test] [sliceop]
2242 sliceop: ':' [test]
2243 */
2244 ch = CHILD(n, 0);
2245 if (NCH(n) == 1 && TYPE(ch) == test) {
2246 /* 'step' variable hold no significance in terms of being used over
2247 other vars */
2248 step = ast_for_expr(c, ch);
2249 if (!step)
2250 return NULL;
2251
2252 return Index(step, c->c_arena);
2253 }
2254
2255 if (TYPE(ch) == test) {
2256 lower = ast_for_expr(c, ch);
2257 if (!lower)
2258 return NULL;
2259 }
2260
2261 /* If there's an upper bound it's in the second or third position. */
2262 if (TYPE(ch) == COLON) {
2263 if (NCH(n) > 1) {
2264 node *n2 = CHILD(n, 1);
2265
2266 if (TYPE(n2) == test) {
2267 upper = ast_for_expr(c, n2);
2268 if (!upper)
2269 return NULL;
2270 }
2271 }
2272 } else if (NCH(n) > 2) {
2273 node *n2 = CHILD(n, 2);
2274
2275 if (TYPE(n2) == test) {
2276 upper = ast_for_expr(c, n2);
2277 if (!upper)
2278 return NULL;
2279 }
2280 }
2281
2282 ch = CHILD(n, NCH(n) - 1);
2283 if (TYPE(ch) == sliceop) {
2284 if (NCH(ch) != 1) {
2285 ch = CHILD(ch, 1);
2286 if (TYPE(ch) == test) {
2287 step = ast_for_expr(c, ch);
2288 if (!step)
2289 return NULL;
2290 }
2291 }
2292 }
2293
2294 return Slice(lower, upper, step, c->c_arena);
2295 }
2296
2297 static expr_ty
ast_for_binop(struct compiling * c,const node * n)2298 ast_for_binop(struct compiling *c, const node *n)
2299 {
2300 /* Must account for a sequence of expressions.
2301 How should A op B op C by represented?
2302 BinOp(BinOp(A, op, B), op, C).
2303 */
2304
2305 int i, nops;
2306 expr_ty expr1, expr2, result;
2307 operator_ty newoperator;
2308
2309 expr1 = ast_for_expr(c, CHILD(n, 0));
2310 if (!expr1)
2311 return NULL;
2312
2313 expr2 = ast_for_expr(c, CHILD(n, 2));
2314 if (!expr2)
2315 return NULL;
2316
2317 newoperator = get_operator(CHILD(n, 1));
2318 if (!newoperator)
2319 return NULL;
2320
2321 result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
2322 c->c_arena);
2323 if (!result)
2324 return NULL;
2325
2326 nops = (NCH(n) - 1) / 2;
2327 for (i = 1; i < nops; i++) {
2328 expr_ty tmp_result, tmp;
2329 const node* next_oper = CHILD(n, i * 2 + 1);
2330
2331 newoperator = get_operator(next_oper);
2332 if (!newoperator)
2333 return NULL;
2334
2335 tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
2336 if (!tmp)
2337 return NULL;
2338
2339 tmp_result = BinOp(result, newoperator, tmp,
2340 LINENO(next_oper), next_oper->n_col_offset,
2341 c->c_arena);
2342 if (!tmp_result)
2343 return NULL;
2344 result = tmp_result;
2345 }
2346 return result;
2347 }
2348
2349 static expr_ty
ast_for_trailer(struct compiling * c,const node * n,expr_ty left_expr)2350 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr)
2351 {
2352 /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
2353 subscriptlist: subscript (',' subscript)* [',']
2354 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
2355 */
2356 REQ(n, trailer);
2357 if (TYPE(CHILD(n, 0)) == LPAR) {
2358 if (NCH(n) == 2)
2359 return Call(left_expr, NULL, NULL, LINENO(n),
2360 n->n_col_offset, c->c_arena);
2361 else
2362 return ast_for_call(c, CHILD(n, 1), left_expr, true);
2363 }
2364 else if (TYPE(CHILD(n, 0)) == DOT) {
2365 PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
2366 if (!attr_id)
2367 return NULL;
2368 return Attribute(left_expr, attr_id, Load,
2369 LINENO(n), n->n_col_offset, c->c_arena);
2370 }
2371 else {
2372 REQ(CHILD(n, 0), LSQB);
2373 REQ(CHILD(n, 2), RSQB);
2374 n = CHILD(n, 1);
2375 if (NCH(n) == 1) {
2376 slice_ty slc = ast_for_slice(c, CHILD(n, 0));
2377 if (!slc)
2378 return NULL;
2379 return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset,
2380 c->c_arena);
2381 }
2382 else {
2383 /* The grammar is ambiguous here. The ambiguity is resolved
2384 by treating the sequence as a tuple literal if there are
2385 no slice features.
2386 */
2387 int j;
2388 slice_ty slc;
2389 expr_ty e;
2390 int simple = 1;
2391 asdl_seq *slices, *elts;
2392 slices = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2393 if (!slices)
2394 return NULL;
2395 for (j = 0; j < NCH(n); j += 2) {
2396 slc = ast_for_slice(c, CHILD(n, j));
2397 if (!slc)
2398 return NULL;
2399 if (slc->kind != Index_kind)
2400 simple = 0;
2401 asdl_seq_SET(slices, j / 2, slc);
2402 }
2403 if (!simple) {
2404 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
2405 Load, LINENO(n), n->n_col_offset, c->c_arena);
2406 }
2407 /* extract Index values and put them in a Tuple */
2408 elts = _Py_asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
2409 if (!elts)
2410 return NULL;
2411 for (j = 0; j < asdl_seq_LEN(slices); ++j) {
2412 slc = (slice_ty)asdl_seq_GET(slices, j);
2413 assert(slc->kind == Index_kind && slc->v.Index.value);
2414 asdl_seq_SET(elts, j, slc->v.Index.value);
2415 }
2416 e = Tuple(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
2417 if (!e)
2418 return NULL;
2419 return Subscript(left_expr, Index(e, c->c_arena),
2420 Load, LINENO(n), n->n_col_offset, c->c_arena);
2421 }
2422 }
2423 }
2424
2425 static expr_ty
ast_for_factor(struct compiling * c,const node * n)2426 ast_for_factor(struct compiling *c, const node *n)
2427 {
2428 expr_ty expression;
2429
2430 expression = ast_for_expr(c, CHILD(n, 1));
2431 if (!expression)
2432 return NULL;
2433
2434 switch (TYPE(CHILD(n, 0))) {
2435 case PLUS:
2436 return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
2437 c->c_arena);
2438 case MINUS:
2439 return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
2440 c->c_arena);
2441 case TILDE:
2442 return UnaryOp(Invert, expression, LINENO(n),
2443 n->n_col_offset, c->c_arena);
2444 }
2445 PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
2446 TYPE(CHILD(n, 0)));
2447 return NULL;
2448 }
2449
2450 static expr_ty
ast_for_atom_expr(struct compiling * c,const node * n)2451 ast_for_atom_expr(struct compiling *c, const node *n)
2452 {
2453 int i, nch, start = 0;
2454 expr_ty e, tmp;
2455
2456 REQ(n, atom_expr);
2457 nch = NCH(n);
2458
2459 if (TYPE(CHILD(n, 0)) == NAME && strcmp(STR(CHILD(n, 0)), "await") == 0) {
2460 start = 1;
2461 assert(nch > 1);
2462 }
2463
2464 e = ast_for_atom(c, CHILD(n, start));
2465 if (!e)
2466 return NULL;
2467 if (nch == 1)
2468 return e;
2469 if (start && nch == 2) {
2470 return Await(e, LINENO(n), n->n_col_offset, c->c_arena);
2471 }
2472
2473 for (i = start + 1; i < nch; i++) {
2474 node *ch = CHILD(n, i);
2475 if (TYPE(ch) != trailer)
2476 break;
2477 tmp = ast_for_trailer(c, ch, e);
2478 if (!tmp)
2479 return NULL;
2480 tmp->lineno = e->lineno;
2481 tmp->col_offset = e->col_offset;
2482 e = tmp;
2483 }
2484
2485 if (start) {
2486 /* there was an 'await' */
2487 return Await(e, LINENO(n), n->n_col_offset, c->c_arena);
2488 }
2489 else {
2490 return e;
2491 }
2492 }
2493
2494 static expr_ty
ast_for_power(struct compiling * c,const node * n)2495 ast_for_power(struct compiling *c, const node *n)
2496 {
2497 /* power: atom trailer* ('**' factor)*
2498 */
2499 expr_ty e;
2500 REQ(n, power);
2501 e = ast_for_atom_expr(c, CHILD(n, 0));
2502 if (!e)
2503 return NULL;
2504 if (NCH(n) == 1)
2505 return e;
2506 if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
2507 expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
2508 if (!f)
2509 return NULL;
2510 e = BinOp(e, Pow, f, LINENO(n), n->n_col_offset, c->c_arena);
2511 }
2512 return e;
2513 }
2514
2515 static expr_ty
ast_for_starred(struct compiling * c,const node * n)2516 ast_for_starred(struct compiling *c, const node *n)
2517 {
2518 expr_ty tmp;
2519 REQ(n, star_expr);
2520
2521 tmp = ast_for_expr(c, CHILD(n, 1));
2522 if (!tmp)
2523 return NULL;
2524
2525 /* The Load context is changed later. */
2526 return Starred(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
2527 }
2528
2529
2530 /* Do not name a variable 'expr'! Will cause a compile error.
2531 */
2532
2533 static expr_ty
ast_for_expr(struct compiling * c,const node * n)2534 ast_for_expr(struct compiling *c, const node *n)
2535 {
2536 /* handle the full range of simple expressions
2537 test: or_test ['if' or_test 'else' test] | lambdef
2538 test_nocond: or_test | lambdef_nocond
2539 or_test: and_test ('or' and_test)*
2540 and_test: not_test ('and' not_test)*
2541 not_test: 'not' not_test | comparison
2542 comparison: expr (comp_op expr)*
2543 expr: xor_expr ('|' xor_expr)*
2544 xor_expr: and_expr ('^' and_expr)*
2545 and_expr: shift_expr ('&' shift_expr)*
2546 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
2547 arith_expr: term (('+'|'-') term)*
2548 term: factor (('*'|'@'|'/'|'%'|'//') factor)*
2549 factor: ('+'|'-'|'~') factor | power
2550 power: atom_expr ['**' factor]
2551 atom_expr: ['await'] atom trailer*
2552 yield_expr: 'yield' [yield_arg]
2553 */
2554
2555 asdl_seq *seq;
2556 int i;
2557
2558 loop:
2559 switch (TYPE(n)) {
2560 case test:
2561 case test_nocond:
2562 if (TYPE(CHILD(n, 0)) == lambdef ||
2563 TYPE(CHILD(n, 0)) == lambdef_nocond)
2564 return ast_for_lambdef(c, CHILD(n, 0));
2565 else if (NCH(n) > 1)
2566 return ast_for_ifexpr(c, n);
2567 /* Fallthrough */
2568 case or_test:
2569 case and_test:
2570 if (NCH(n) == 1) {
2571 n = CHILD(n, 0);
2572 goto loop;
2573 }
2574 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2575 if (!seq)
2576 return NULL;
2577 for (i = 0; i < NCH(n); i += 2) {
2578 expr_ty e = ast_for_expr(c, CHILD(n, i));
2579 if (!e)
2580 return NULL;
2581 asdl_seq_SET(seq, i / 2, e);
2582 }
2583 if (!strcmp(STR(CHILD(n, 1)), "and"))
2584 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
2585 c->c_arena);
2586 assert(!strcmp(STR(CHILD(n, 1)), "or"));
2587 return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena);
2588 case not_test:
2589 if (NCH(n) == 1) {
2590 n = CHILD(n, 0);
2591 goto loop;
2592 }
2593 else {
2594 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2595 if (!expression)
2596 return NULL;
2597
2598 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
2599 c->c_arena);
2600 }
2601 case comparison:
2602 if (NCH(n) == 1) {
2603 n = CHILD(n, 0);
2604 goto loop;
2605 }
2606 else {
2607 expr_ty expression;
2608 asdl_int_seq *ops;
2609 asdl_seq *cmps;
2610 ops = _Py_asdl_int_seq_new(NCH(n) / 2, c->c_arena);
2611 if (!ops)
2612 return NULL;
2613 cmps = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
2614 if (!cmps) {
2615 return NULL;
2616 }
2617 for (i = 1; i < NCH(n); i += 2) {
2618 cmpop_ty newoperator;
2619
2620 newoperator = ast_for_comp_op(c, CHILD(n, i));
2621 if (!newoperator) {
2622 return NULL;
2623 }
2624
2625 expression = ast_for_expr(c, CHILD(n, i + 1));
2626 if (!expression) {
2627 return NULL;
2628 }
2629
2630 asdl_seq_SET(ops, i / 2, newoperator);
2631 asdl_seq_SET(cmps, i / 2, expression);
2632 }
2633 expression = ast_for_expr(c, CHILD(n, 0));
2634 if (!expression) {
2635 return NULL;
2636 }
2637
2638 return Compare(expression, ops, cmps, LINENO(n),
2639 n->n_col_offset, c->c_arena);
2640 }
2641 break;
2642
2643 case star_expr:
2644 return ast_for_starred(c, n);
2645 /* The next five cases all handle BinOps. The main body of code
2646 is the same in each case, but the switch turned inside out to
2647 reuse the code for each type of operator.
2648 */
2649 case expr:
2650 case xor_expr:
2651 case and_expr:
2652 case shift_expr:
2653 case arith_expr:
2654 case term:
2655 if (NCH(n) == 1) {
2656 n = CHILD(n, 0);
2657 goto loop;
2658 }
2659 return ast_for_binop(c, n);
2660 case yield_expr: {
2661 node *an = NULL;
2662 node *en = NULL;
2663 int is_from = 0;
2664 expr_ty exp = NULL;
2665 if (NCH(n) > 1)
2666 an = CHILD(n, 1); /* yield_arg */
2667 if (an) {
2668 en = CHILD(an, NCH(an) - 1);
2669 if (NCH(an) == 2) {
2670 is_from = 1;
2671 exp = ast_for_expr(c, en);
2672 }
2673 else
2674 exp = ast_for_testlist(c, en);
2675 if (!exp)
2676 return NULL;
2677 }
2678 if (is_from)
2679 return YieldFrom(exp, LINENO(n), n->n_col_offset, c->c_arena);
2680 return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena);
2681 }
2682 case factor:
2683 if (NCH(n) == 1) {
2684 n = CHILD(n, 0);
2685 goto loop;
2686 }
2687 return ast_for_factor(c, n);
2688 case power:
2689 return ast_for_power(c, n);
2690 default:
2691 PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
2692 return NULL;
2693 }
2694 /* should never get here unless if error is set */
2695 return NULL;
2696 }
2697
2698 static expr_ty
ast_for_call(struct compiling * c,const node * n,expr_ty func,bool allowgen)2699 ast_for_call(struct compiling *c, const node *n, expr_ty func, bool allowgen)
2700 {
2701 /*
2702 arglist: argument (',' argument)* [',']
2703 argument: ( test [comp_for] | '*' test | test '=' test | '**' test )
2704 */
2705
2706 int i, nargs, nkeywords;
2707 int ndoublestars;
2708 asdl_seq *args;
2709 asdl_seq *keywords;
2710
2711 REQ(n, arglist);
2712
2713 nargs = 0;
2714 nkeywords = 0;
2715 for (i = 0; i < NCH(n); i++) {
2716 node *ch = CHILD(n, i);
2717 if (TYPE(ch) == argument) {
2718 if (NCH(ch) == 1)
2719 nargs++;
2720 else if (TYPE(CHILD(ch, 1)) == comp_for) {
2721 nargs++;
2722 if (!allowgen) {
2723 ast_error(c, ch, "invalid syntax");
2724 return NULL;
2725 }
2726 if (NCH(n) > 1) {
2727 ast_error(c, ch, "Generator expression must be parenthesized");
2728 return NULL;
2729 }
2730 }
2731 else if (TYPE(CHILD(ch, 0)) == STAR)
2732 nargs++;
2733 else
2734 /* TYPE(CHILD(ch, 0)) == DOUBLESTAR or keyword argument */
2735 nkeywords++;
2736 }
2737 }
2738
2739 args = _Py_asdl_seq_new(nargs, c->c_arena);
2740 if (!args)
2741 return NULL;
2742 keywords = _Py_asdl_seq_new(nkeywords, c->c_arena);
2743 if (!keywords)
2744 return NULL;
2745
2746 nargs = 0; /* positional arguments + iterable argument unpackings */
2747 nkeywords = 0; /* keyword arguments + keyword argument unpackings */
2748 ndoublestars = 0; /* just keyword argument unpackings */
2749 for (i = 0; i < NCH(n); i++) {
2750 node *ch = CHILD(n, i);
2751 if (TYPE(ch) == argument) {
2752 expr_ty e;
2753 node *chch = CHILD(ch, 0);
2754 if (NCH(ch) == 1) {
2755 /* a positional argument */
2756 if (nkeywords) {
2757 if (ndoublestars) {
2758 ast_error(c, chch,
2759 "positional argument follows "
2760 "keyword argument unpacking");
2761 }
2762 else {
2763 ast_error(c, chch,
2764 "positional argument follows "
2765 "keyword argument");
2766 }
2767 return NULL;
2768 }
2769 e = ast_for_expr(c, chch);
2770 if (!e)
2771 return NULL;
2772 asdl_seq_SET(args, nargs++, e);
2773 }
2774 else if (TYPE(chch) == STAR) {
2775 /* an iterable argument unpacking */
2776 expr_ty starred;
2777 if (ndoublestars) {
2778 ast_error(c, chch,
2779 "iterable argument unpacking follows "
2780 "keyword argument unpacking");
2781 return NULL;
2782 }
2783 e = ast_for_expr(c, CHILD(ch, 1));
2784 if (!e)
2785 return NULL;
2786 starred = Starred(e, Load, LINENO(chch),
2787 chch->n_col_offset,
2788 c->c_arena);
2789 if (!starred)
2790 return NULL;
2791 asdl_seq_SET(args, nargs++, starred);
2792
2793 }
2794 else if (TYPE(chch) == DOUBLESTAR) {
2795 /* a keyword argument unpacking */
2796 keyword_ty kw;
2797 i++;
2798 e = ast_for_expr(c, CHILD(ch, 1));
2799 if (!e)
2800 return NULL;
2801 kw = keyword(NULL, e, c->c_arena);
2802 asdl_seq_SET(keywords, nkeywords++, kw);
2803 ndoublestars++;
2804 }
2805 else if (TYPE(CHILD(ch, 1)) == comp_for) {
2806 /* the lone generator expression */
2807 e = ast_for_genexp(c, ch);
2808 if (!e)
2809 return NULL;
2810 asdl_seq_SET(args, nargs++, e);
2811 }
2812 else {
2813 /* a keyword argument */
2814 keyword_ty kw;
2815 identifier key, tmp;
2816 int k;
2817
2818 /* chch is test, but must be an identifier? */
2819 e = ast_for_expr(c, chch);
2820 if (!e)
2821 return NULL;
2822 /* f(lambda x: x[0] = 3) ends up getting parsed with
2823 * LHS test = lambda x: x[0], and RHS test = 3.
2824 * SF bug 132313 points out that complaining about a keyword
2825 * then is very confusing.
2826 */
2827 if (e->kind == Lambda_kind) {
2828 ast_error(c, chch,
2829 "lambda cannot contain assignment");
2830 return NULL;
2831 }
2832 else if (e->kind != Name_kind) {
2833 ast_error(c, chch,
2834 "keyword can't be an expression");
2835 return NULL;
2836 }
2837 else if (forbidden_name(c, e->v.Name.id, ch, 1)) {
2838 return NULL;
2839 }
2840 key = e->v.Name.id;
2841 for (k = 0; k < nkeywords; k++) {
2842 tmp = ((keyword_ty)asdl_seq_GET(keywords, k))->arg;
2843 if (tmp && !PyUnicode_Compare(tmp, key)) {
2844 ast_error(c, chch,
2845 "keyword argument repeated");
2846 return NULL;
2847 }
2848 }
2849 e = ast_for_expr(c, CHILD(ch, 2));
2850 if (!e)
2851 return NULL;
2852 kw = keyword(key, e, c->c_arena);
2853 if (!kw)
2854 return NULL;
2855 asdl_seq_SET(keywords, nkeywords++, kw);
2856 }
2857 }
2858 }
2859
2860 return Call(func, args, keywords, func->lineno, func->col_offset, c->c_arena);
2861 }
2862
2863 static expr_ty
ast_for_testlist(struct compiling * c,const node * n)2864 ast_for_testlist(struct compiling *c, const node* n)
2865 {
2866 /* testlist_comp: test (comp_for | (',' test)* [',']) */
2867 /* testlist: test (',' test)* [','] */
2868 assert(NCH(n) > 0);
2869 if (TYPE(n) == testlist_comp) {
2870 if (NCH(n) > 1)
2871 assert(TYPE(CHILD(n, 1)) != comp_for);
2872 }
2873 else {
2874 assert(TYPE(n) == testlist ||
2875 TYPE(n) == testlist_star_expr);
2876 }
2877 if (NCH(n) == 1)
2878 return ast_for_expr(c, CHILD(n, 0));
2879 else {
2880 asdl_seq *tmp = seq_for_testlist(c, n);
2881 if (!tmp)
2882 return NULL;
2883 return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
2884 }
2885 }
2886
2887 static stmt_ty
ast_for_expr_stmt(struct compiling * c,const node * n)2888 ast_for_expr_stmt(struct compiling *c, const node *n)
2889 {
2890 REQ(n, expr_stmt);
2891 /* expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
2892 ('=' (yield_expr|testlist_star_expr))*)
2893 annassign: ':' test ['=' test]
2894 testlist_star_expr: (test|star_expr) (',' test|star_expr)* [',']
2895 augassign: '+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^='
2896 | '<<=' | '>>=' | '**=' | '//='
2897 test: ... here starts the operator precedence dance
2898 */
2899
2900 if (NCH(n) == 1) {
2901 expr_ty e = ast_for_testlist(c, CHILD(n, 0));
2902 if (!e)
2903 return NULL;
2904
2905 return Expr(e, LINENO(n), n->n_col_offset, c->c_arena);
2906 }
2907 else if (TYPE(CHILD(n, 1)) == augassign) {
2908 expr_ty expr1, expr2;
2909 operator_ty newoperator;
2910 node *ch = CHILD(n, 0);
2911
2912 expr1 = ast_for_testlist(c, ch);
2913 if (!expr1)
2914 return NULL;
2915 if(!set_context(c, expr1, Store, ch))
2916 return NULL;
2917 /* set_context checks that most expressions are not the left side.
2918 Augmented assignments can only have a name, a subscript, or an
2919 attribute on the left, though, so we have to explicitly check for
2920 those. */
2921 switch (expr1->kind) {
2922 case Name_kind:
2923 case Attribute_kind:
2924 case Subscript_kind:
2925 break;
2926 default:
2927 ast_error(c, ch, "illegal expression for augmented assignment");
2928 return NULL;
2929 }
2930
2931 ch = CHILD(n, 2);
2932 if (TYPE(ch) == testlist)
2933 expr2 = ast_for_testlist(c, ch);
2934 else
2935 expr2 = ast_for_expr(c, ch);
2936 if (!expr2)
2937 return NULL;
2938
2939 newoperator = ast_for_augassign(c, CHILD(n, 1));
2940 if (!newoperator)
2941 return NULL;
2942
2943 return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset, c->c_arena);
2944 }
2945 else if (TYPE(CHILD(n, 1)) == annassign) {
2946 expr_ty expr1, expr2, expr3;
2947 node *ch = CHILD(n, 0);
2948 node *deep, *ann = CHILD(n, 1);
2949 int simple = 1;
2950
2951 /* we keep track of parens to qualify (x) as expression not name */
2952 deep = ch;
2953 while (NCH(deep) == 1) {
2954 deep = CHILD(deep, 0);
2955 }
2956 if (NCH(deep) > 0 && TYPE(CHILD(deep, 0)) == LPAR) {
2957 simple = 0;
2958 }
2959 expr1 = ast_for_testlist(c, ch);
2960 if (!expr1) {
2961 return NULL;
2962 }
2963 switch (expr1->kind) {
2964 case Name_kind:
2965 if (forbidden_name(c, expr1->v.Name.id, n, 0)) {
2966 return NULL;
2967 }
2968 expr1->v.Name.ctx = Store;
2969 break;
2970 case Attribute_kind:
2971 if (forbidden_name(c, expr1->v.Attribute.attr, n, 1)) {
2972 return NULL;
2973 }
2974 expr1->v.Attribute.ctx = Store;
2975 break;
2976 case Subscript_kind:
2977 expr1->v.Subscript.ctx = Store;
2978 break;
2979 case List_kind:
2980 ast_error(c, ch,
2981 "only single target (not list) can be annotated");
2982 return NULL;
2983 case Tuple_kind:
2984 ast_error(c, ch,
2985 "only single target (not tuple) can be annotated");
2986 return NULL;
2987 default:
2988 ast_error(c, ch,
2989 "illegal target for annotation");
2990 return NULL;
2991 }
2992
2993 if (expr1->kind != Name_kind) {
2994 simple = 0;
2995 }
2996 ch = CHILD(ann, 1);
2997 expr2 = ast_for_expr(c, ch);
2998 if (!expr2) {
2999 return NULL;
3000 }
3001 if (NCH(ann) == 2) {
3002 return AnnAssign(expr1, expr2, NULL, simple,
3003 LINENO(n), n->n_col_offset, c->c_arena);
3004 }
3005 else {
3006 ch = CHILD(ann, 3);
3007 expr3 = ast_for_expr(c, ch);
3008 if (!expr3) {
3009 return NULL;
3010 }
3011 return AnnAssign(expr1, expr2, expr3, simple,
3012 LINENO(n), n->n_col_offset, c->c_arena);
3013 }
3014 }
3015 else {
3016 int i;
3017 asdl_seq *targets;
3018 node *value;
3019 expr_ty expression;
3020
3021 /* a normal assignment */
3022 REQ(CHILD(n, 1), EQUAL);
3023 targets = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3024 if (!targets)
3025 return NULL;
3026 for (i = 0; i < NCH(n) - 2; i += 2) {
3027 expr_ty e;
3028 node *ch = CHILD(n, i);
3029 if (TYPE(ch) == yield_expr) {
3030 ast_error(c, ch, "assignment to yield expression not possible");
3031 return NULL;
3032 }
3033 e = ast_for_testlist(c, ch);
3034 if (!e)
3035 return NULL;
3036
3037 /* set context to assign */
3038 if (!set_context(c, e, Store, CHILD(n, i)))
3039 return NULL;
3040
3041 asdl_seq_SET(targets, i / 2, e);
3042 }
3043 value = CHILD(n, NCH(n) - 1);
3044 if (TYPE(value) == testlist_star_expr)
3045 expression = ast_for_testlist(c, value);
3046 else
3047 expression = ast_for_expr(c, value);
3048 if (!expression)
3049 return NULL;
3050 return Assign(targets, expression, LINENO(n), n->n_col_offset, c->c_arena);
3051 }
3052 }
3053
3054
3055 static asdl_seq *
ast_for_exprlist(struct compiling * c,const node * n,expr_context_ty context)3056 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
3057 {
3058 asdl_seq *seq;
3059 int i;
3060 expr_ty e;
3061
3062 REQ(n, exprlist);
3063
3064 seq = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3065 if (!seq)
3066 return NULL;
3067 for (i = 0; i < NCH(n); i += 2) {
3068 e = ast_for_expr(c, CHILD(n, i));
3069 if (!e)
3070 return NULL;
3071 asdl_seq_SET(seq, i / 2, e);
3072 if (context && !set_context(c, e, context, CHILD(n, i)))
3073 return NULL;
3074 }
3075 return seq;
3076 }
3077
3078 static stmt_ty
ast_for_del_stmt(struct compiling * c,const node * n)3079 ast_for_del_stmt(struct compiling *c, const node *n)
3080 {
3081 asdl_seq *expr_list;
3082
3083 /* del_stmt: 'del' exprlist */
3084 REQ(n, del_stmt);
3085
3086 expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
3087 if (!expr_list)
3088 return NULL;
3089 return Delete(expr_list, LINENO(n), n->n_col_offset, c->c_arena);
3090 }
3091
3092 static stmt_ty
ast_for_flow_stmt(struct compiling * c,const node * n)3093 ast_for_flow_stmt(struct compiling *c, const node *n)
3094 {
3095 /*
3096 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
3097 | yield_stmt
3098 break_stmt: 'break'
3099 continue_stmt: 'continue'
3100 return_stmt: 'return' [testlist]
3101 yield_stmt: yield_expr
3102 yield_expr: 'yield' testlist | 'yield' 'from' test
3103 raise_stmt: 'raise' [test [',' test [',' test]]]
3104 */
3105 node *ch;
3106
3107 REQ(n, flow_stmt);
3108 ch = CHILD(n, 0);
3109 switch (TYPE(ch)) {
3110 case break_stmt:
3111 return Break(LINENO(n), n->n_col_offset, c->c_arena);
3112 case continue_stmt:
3113 return Continue(LINENO(n), n->n_col_offset, c->c_arena);
3114 case yield_stmt: { /* will reduce to yield_expr */
3115 expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
3116 if (!exp)
3117 return NULL;
3118 return Expr(exp, LINENO(n), n->n_col_offset, c->c_arena);
3119 }
3120 case return_stmt:
3121 if (NCH(ch) == 1)
3122 return Return(NULL, LINENO(n), n->n_col_offset, c->c_arena);
3123 else {
3124 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
3125 if (!expression)
3126 return NULL;
3127 return Return(expression, LINENO(n), n->n_col_offset, c->c_arena);
3128 }
3129 case raise_stmt:
3130 if (NCH(ch) == 1)
3131 return Raise(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
3132 else if (NCH(ch) >= 2) {
3133 expr_ty cause = NULL;
3134 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
3135 if (!expression)
3136 return NULL;
3137 if (NCH(ch) == 4) {
3138 cause = ast_for_expr(c, CHILD(ch, 3));
3139 if (!cause)
3140 return NULL;
3141 }
3142 return Raise(expression, cause, LINENO(n), n->n_col_offset, c->c_arena);
3143 }
3144 /* fall through */
3145 default:
3146 PyErr_Format(PyExc_SystemError,
3147 "unexpected flow_stmt: %d", TYPE(ch));
3148 return NULL;
3149 }
3150 }
3151
3152 static alias_ty
alias_for_import_name(struct compiling * c,const node * n,int store)3153 alias_for_import_name(struct compiling *c, const node *n, int store)
3154 {
3155 /*
3156 import_as_name: NAME ['as' NAME]
3157 dotted_as_name: dotted_name ['as' NAME]
3158 dotted_name: NAME ('.' NAME)*
3159 */
3160 identifier str, name;
3161
3162 loop:
3163 switch (TYPE(n)) {
3164 case import_as_name: {
3165 node *name_node = CHILD(n, 0);
3166 str = NULL;
3167 name = NEW_IDENTIFIER(name_node);
3168 if (!name)
3169 return NULL;
3170 if (NCH(n) == 3) {
3171 node *str_node = CHILD(n, 2);
3172 str = NEW_IDENTIFIER(str_node);
3173 if (!str)
3174 return NULL;
3175 if (store && forbidden_name(c, str, str_node, 0))
3176 return NULL;
3177 }
3178 else {
3179 if (forbidden_name(c, name, name_node, 0))
3180 return NULL;
3181 }
3182 return alias(name, str, c->c_arena);
3183 }
3184 case dotted_as_name:
3185 if (NCH(n) == 1) {
3186 n = CHILD(n, 0);
3187 goto loop;
3188 }
3189 else {
3190 node *asname_node = CHILD(n, 2);
3191 alias_ty a = alias_for_import_name(c, CHILD(n, 0), 0);
3192 if (!a)
3193 return NULL;
3194 assert(!a->asname);
3195 a->asname = NEW_IDENTIFIER(asname_node);
3196 if (!a->asname)
3197 return NULL;
3198 if (forbidden_name(c, a->asname, asname_node, 0))
3199 return NULL;
3200 return a;
3201 }
3202 break;
3203 case dotted_name:
3204 if (NCH(n) == 1) {
3205 node *name_node = CHILD(n, 0);
3206 name = NEW_IDENTIFIER(name_node);
3207 if (!name)
3208 return NULL;
3209 if (store && forbidden_name(c, name, name_node, 0))
3210 return NULL;
3211 return alias(name, NULL, c->c_arena);
3212 }
3213 else {
3214 /* Create a string of the form "a.b.c" */
3215 int i;
3216 size_t len;
3217 char *s;
3218 PyObject *uni;
3219
3220 len = 0;
3221 for (i = 0; i < NCH(n); i += 2)
3222 /* length of string plus one for the dot */
3223 len += strlen(STR(CHILD(n, i))) + 1;
3224 len--; /* the last name doesn't have a dot */
3225 str = PyBytes_FromStringAndSize(NULL, len);
3226 if (!str)
3227 return NULL;
3228 s = PyBytes_AS_STRING(str);
3229 if (!s)
3230 return NULL;
3231 for (i = 0; i < NCH(n); i += 2) {
3232 char *sch = STR(CHILD(n, i));
3233 strcpy(s, STR(CHILD(n, i)));
3234 s += strlen(sch);
3235 *s++ = '.';
3236 }
3237 --s;
3238 *s = '\0';
3239 uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str),
3240 PyBytes_GET_SIZE(str),
3241 NULL);
3242 Py_DECREF(str);
3243 if (!uni)
3244 return NULL;
3245 str = uni;
3246 PyUnicode_InternInPlace(&str);
3247 if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3248 Py_DECREF(str);
3249 return NULL;
3250 }
3251 return alias(str, NULL, c->c_arena);
3252 }
3253 break;
3254 case STAR:
3255 str = PyUnicode_InternFromString("*");
3256 if (!str)
3257 return NULL;
3258 if (PyArena_AddPyObject(c->c_arena, str) < 0) {
3259 Py_DECREF(str);
3260 return NULL;
3261 }
3262 return alias(str, NULL, c->c_arena);
3263 default:
3264 PyErr_Format(PyExc_SystemError,
3265 "unexpected import name: %d", TYPE(n));
3266 return NULL;
3267 }
3268
3269 PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
3270 return NULL;
3271 }
3272
3273 static stmt_ty
ast_for_import_stmt(struct compiling * c,const node * n)3274 ast_for_import_stmt(struct compiling *c, const node *n)
3275 {
3276 /*
3277 import_stmt: import_name | import_from
3278 import_name: 'import' dotted_as_names
3279 import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+)
3280 'import' ('*' | '(' import_as_names ')' | import_as_names)
3281 */
3282 int lineno;
3283 int col_offset;
3284 int i;
3285 asdl_seq *aliases;
3286
3287 REQ(n, import_stmt);
3288 lineno = LINENO(n);
3289 col_offset = n->n_col_offset;
3290 n = CHILD(n, 0);
3291 if (TYPE(n) == import_name) {
3292 n = CHILD(n, 1);
3293 REQ(n, dotted_as_names);
3294 aliases = _Py_asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
3295 if (!aliases)
3296 return NULL;
3297 for (i = 0; i < NCH(n); i += 2) {
3298 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3299 if (!import_alias)
3300 return NULL;
3301 asdl_seq_SET(aliases, i / 2, import_alias);
3302 }
3303 return Import(aliases, lineno, col_offset, c->c_arena);
3304 }
3305 else if (TYPE(n) == import_from) {
3306 int n_children;
3307 int idx, ndots = 0;
3308 alias_ty mod = NULL;
3309 identifier modname = NULL;
3310
3311 /* Count the number of dots (for relative imports) and check for the
3312 optional module name */
3313 for (idx = 1; idx < NCH(n); idx++) {
3314 if (TYPE(CHILD(n, idx)) == dotted_name) {
3315 mod = alias_for_import_name(c, CHILD(n, idx), 0);
3316 if (!mod)
3317 return NULL;
3318 idx++;
3319 break;
3320 } else if (TYPE(CHILD(n, idx)) == ELLIPSIS) {
3321 /* three consecutive dots are tokenized as one ELLIPSIS */
3322 ndots += 3;
3323 continue;
3324 } else if (TYPE(CHILD(n, idx)) != DOT) {
3325 break;
3326 }
3327 ndots++;
3328 }
3329 idx++; /* skip over the 'import' keyword */
3330 switch (TYPE(CHILD(n, idx))) {
3331 case STAR:
3332 /* from ... import * */
3333 n = CHILD(n, idx);
3334 n_children = 1;
3335 break;
3336 case LPAR:
3337 /* from ... import (x, y, z) */
3338 n = CHILD(n, idx + 1);
3339 n_children = NCH(n);
3340 break;
3341 case import_as_names:
3342 /* from ... import x, y, z */
3343 n = CHILD(n, idx);
3344 n_children = NCH(n);
3345 if (n_children % 2 == 0) {
3346 ast_error(c, n, "trailing comma not allowed without"
3347 " surrounding parentheses");
3348 return NULL;
3349 }
3350 break;
3351 default:
3352 ast_error(c, n, "Unexpected node-type in from-import");
3353 return NULL;
3354 }
3355
3356 aliases = _Py_asdl_seq_new((n_children + 1) / 2, c->c_arena);
3357 if (!aliases)
3358 return NULL;
3359
3360 /* handle "from ... import *" special b/c there's no children */
3361 if (TYPE(n) == STAR) {
3362 alias_ty import_alias = alias_for_import_name(c, n, 1);
3363 if (!import_alias)
3364 return NULL;
3365 asdl_seq_SET(aliases, 0, import_alias);
3366 }
3367 else {
3368 for (i = 0; i < NCH(n); i += 2) {
3369 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i), 1);
3370 if (!import_alias)
3371 return NULL;
3372 asdl_seq_SET(aliases, i / 2, import_alias);
3373 }
3374 }
3375 if (mod != NULL)
3376 modname = mod->name;
3377 return ImportFrom(modname, aliases, ndots, lineno, col_offset,
3378 c->c_arena);
3379 }
3380 PyErr_Format(PyExc_SystemError,
3381 "unknown import statement: starts with command '%s'",
3382 STR(CHILD(n, 0)));
3383 return NULL;
3384 }
3385
3386 static stmt_ty
ast_for_global_stmt(struct compiling * c,const node * n)3387 ast_for_global_stmt(struct compiling *c, const node *n)
3388 {
3389 /* global_stmt: 'global' NAME (',' NAME)* */
3390 identifier name;
3391 asdl_seq *s;
3392 int i;
3393
3394 REQ(n, global_stmt);
3395 s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3396 if (!s)
3397 return NULL;
3398 for (i = 1; i < NCH(n); i += 2) {
3399 name = NEW_IDENTIFIER(CHILD(n, i));
3400 if (!name)
3401 return NULL;
3402 asdl_seq_SET(s, i / 2, name);
3403 }
3404 return Global(s, LINENO(n), n->n_col_offset, c->c_arena);
3405 }
3406
3407 static stmt_ty
ast_for_nonlocal_stmt(struct compiling * c,const node * n)3408 ast_for_nonlocal_stmt(struct compiling *c, const node *n)
3409 {
3410 /* nonlocal_stmt: 'nonlocal' NAME (',' NAME)* */
3411 identifier name;
3412 asdl_seq *s;
3413 int i;
3414
3415 REQ(n, nonlocal_stmt);
3416 s = _Py_asdl_seq_new(NCH(n) / 2, c->c_arena);
3417 if (!s)
3418 return NULL;
3419 for (i = 1; i < NCH(n); i += 2) {
3420 name = NEW_IDENTIFIER(CHILD(n, i));
3421 if (!name)
3422 return NULL;
3423 asdl_seq_SET(s, i / 2, name);
3424 }
3425 return Nonlocal(s, LINENO(n), n->n_col_offset, c->c_arena);
3426 }
3427
3428 static stmt_ty
ast_for_assert_stmt(struct compiling * c,const node * n)3429 ast_for_assert_stmt(struct compiling *c, const node *n)
3430 {
3431 /* assert_stmt: 'assert' test [',' test] */
3432 REQ(n, assert_stmt);
3433 if (NCH(n) == 2) {
3434 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
3435 if (!expression)
3436 return NULL;
3437 return Assert(expression, NULL, LINENO(n), n->n_col_offset, c->c_arena);
3438 }
3439 else if (NCH(n) == 4) {
3440 expr_ty expr1, expr2;
3441
3442 expr1 = ast_for_expr(c, CHILD(n, 1));
3443 if (!expr1)
3444 return NULL;
3445 expr2 = ast_for_expr(c, CHILD(n, 3));
3446 if (!expr2)
3447 return NULL;
3448
3449 return Assert(expr1, expr2, LINENO(n), n->n_col_offset, c->c_arena);
3450 }
3451 PyErr_Format(PyExc_SystemError,
3452 "improper number of parts to 'assert' statement: %d",
3453 NCH(n));
3454 return NULL;
3455 }
3456
3457 static asdl_seq *
ast_for_suite(struct compiling * c,const node * n)3458 ast_for_suite(struct compiling *c, const node *n)
3459 {
3460 /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
3461 asdl_seq *seq;
3462 stmt_ty s;
3463 int i, total, num, end, pos = 0;
3464 node *ch;
3465
3466 REQ(n, suite);
3467
3468 total = num_stmts(n);
3469 seq = _Py_asdl_seq_new(total, c->c_arena);
3470 if (!seq)
3471 return NULL;
3472 if (TYPE(CHILD(n, 0)) == simple_stmt) {
3473 n = CHILD(n, 0);
3474 /* simple_stmt always ends with a NEWLINE,
3475 and may have a trailing SEMI
3476 */
3477 end = NCH(n) - 1;
3478 if (TYPE(CHILD(n, end - 1)) == SEMI)
3479 end--;
3480 /* loop by 2 to skip semi-colons */
3481 for (i = 0; i < end; i += 2) {
3482 ch = CHILD(n, i);
3483 s = ast_for_stmt(c, ch);
3484 if (!s)
3485 return NULL;
3486 asdl_seq_SET(seq, pos++, s);
3487 }
3488 }
3489 else {
3490 for (i = 2; i < (NCH(n) - 1); i++) {
3491 ch = CHILD(n, i);
3492 REQ(ch, stmt);
3493 num = num_stmts(ch);
3494 if (num == 1) {
3495 /* small_stmt or compound_stmt with only one child */
3496 s = ast_for_stmt(c, ch);
3497 if (!s)
3498 return NULL;
3499 asdl_seq_SET(seq, pos++, s);
3500 }
3501 else {
3502 int j;
3503 ch = CHILD(ch, 0);
3504 REQ(ch, simple_stmt);
3505 for (j = 0; j < NCH(ch); j += 2) {
3506 /* statement terminates with a semi-colon ';' */
3507 if (NCH(CHILD(ch, j)) == 0) {
3508 assert((j + 1) == NCH(ch));
3509 break;
3510 }
3511 s = ast_for_stmt(c, CHILD(ch, j));
3512 if (!s)
3513 return NULL;
3514 asdl_seq_SET(seq, pos++, s);
3515 }
3516 }
3517 }
3518 }
3519 assert(pos == seq->size);
3520 return seq;
3521 }
3522
3523 static stmt_ty
ast_for_if_stmt(struct compiling * c,const node * n)3524 ast_for_if_stmt(struct compiling *c, const node *n)
3525 {
3526 /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
3527 ['else' ':' suite]
3528 */
3529 char *s;
3530
3531 REQ(n, if_stmt);
3532
3533 if (NCH(n) == 4) {
3534 expr_ty expression;
3535 asdl_seq *suite_seq;
3536
3537 expression = ast_for_expr(c, CHILD(n, 1));
3538 if (!expression)
3539 return NULL;
3540 suite_seq = ast_for_suite(c, CHILD(n, 3));
3541 if (!suite_seq)
3542 return NULL;
3543
3544 return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
3545 c->c_arena);
3546 }
3547
3548 s = STR(CHILD(n, 4));
3549 /* s[2], the third character in the string, will be
3550 's' for el_s_e, or
3551 'i' for el_i_f
3552 */
3553 if (s[2] == 's') {
3554 expr_ty expression;
3555 asdl_seq *seq1, *seq2;
3556
3557 expression = ast_for_expr(c, CHILD(n, 1));
3558 if (!expression)
3559 return NULL;
3560 seq1 = ast_for_suite(c, CHILD(n, 3));
3561 if (!seq1)
3562 return NULL;
3563 seq2 = ast_for_suite(c, CHILD(n, 6));
3564 if (!seq2)
3565 return NULL;
3566
3567 return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
3568 c->c_arena);
3569 }
3570 else if (s[2] == 'i') {
3571 int i, n_elif, has_else = 0;
3572 expr_ty expression;
3573 asdl_seq *suite_seq;
3574 asdl_seq *orelse = NULL;
3575 n_elif = NCH(n) - 4;
3576 /* must reference the child n_elif+1 since 'else' token is third,
3577 not fourth, child from the end. */
3578 if (TYPE(CHILD(n, (n_elif + 1))) == NAME
3579 && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
3580 has_else = 1;
3581 n_elif -= 3;
3582 }
3583 n_elif /= 4;
3584
3585 if (has_else) {
3586 asdl_seq *suite_seq2;
3587
3588 orelse = _Py_asdl_seq_new(1, c->c_arena);
3589 if (!orelse)
3590 return NULL;
3591 expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
3592 if (!expression)
3593 return NULL;
3594 suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
3595 if (!suite_seq)
3596 return NULL;
3597 suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
3598 if (!suite_seq2)
3599 return NULL;
3600
3601 asdl_seq_SET(orelse, 0,
3602 If(expression, suite_seq, suite_seq2,
3603 LINENO(CHILD(n, NCH(n) - 6)),
3604 CHILD(n, NCH(n) - 6)->n_col_offset,
3605 c->c_arena));
3606 /* the just-created orelse handled the last elif */
3607 n_elif--;
3608 }
3609
3610 for (i = 0; i < n_elif; i++) {
3611 int off = 5 + (n_elif - i - 1) * 4;
3612 asdl_seq *newobj = _Py_asdl_seq_new(1, c->c_arena);
3613 if (!newobj)
3614 return NULL;
3615 expression = ast_for_expr(c, CHILD(n, off));
3616 if (!expression)
3617 return NULL;
3618 suite_seq = ast_for_suite(c, CHILD(n, off + 2));
3619 if (!suite_seq)
3620 return NULL;
3621
3622 asdl_seq_SET(newobj, 0,
3623 If(expression, suite_seq, orelse,
3624 LINENO(CHILD(n, off)),
3625 CHILD(n, off)->n_col_offset, c->c_arena));
3626 orelse = newobj;
3627 }
3628 expression = ast_for_expr(c, CHILD(n, 1));
3629 if (!expression)
3630 return NULL;
3631 suite_seq = ast_for_suite(c, CHILD(n, 3));
3632 if (!suite_seq)
3633 return NULL;
3634 return If(expression, suite_seq, orelse,
3635 LINENO(n), n->n_col_offset, c->c_arena);
3636 }
3637
3638 PyErr_Format(PyExc_SystemError,
3639 "unexpected token in 'if' statement: %s", s);
3640 return NULL;
3641 }
3642
3643 static stmt_ty
ast_for_while_stmt(struct compiling * c,const node * n)3644 ast_for_while_stmt(struct compiling *c, const node *n)
3645 {
3646 /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
3647 REQ(n, while_stmt);
3648
3649 if (NCH(n) == 4) {
3650 expr_ty expression;
3651 asdl_seq *suite_seq;
3652
3653 expression = ast_for_expr(c, CHILD(n, 1));
3654 if (!expression)
3655 return NULL;
3656 suite_seq = ast_for_suite(c, CHILD(n, 3));
3657 if (!suite_seq)
3658 return NULL;
3659 return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset, c->c_arena);
3660 }
3661 else if (NCH(n) == 7) {
3662 expr_ty expression;
3663 asdl_seq *seq1, *seq2;
3664
3665 expression = ast_for_expr(c, CHILD(n, 1));
3666 if (!expression)
3667 return NULL;
3668 seq1 = ast_for_suite(c, CHILD(n, 3));
3669 if (!seq1)
3670 return NULL;
3671 seq2 = ast_for_suite(c, CHILD(n, 6));
3672 if (!seq2)
3673 return NULL;
3674
3675 return While(expression, seq1, seq2, LINENO(n), n->n_col_offset, c->c_arena);
3676 }
3677
3678 PyErr_Format(PyExc_SystemError,
3679 "wrong number of tokens for 'while' statement: %d",
3680 NCH(n));
3681 return NULL;
3682 }
3683
3684 static stmt_ty
ast_for_for_stmt(struct compiling * c,const node * n0,bool is_async)3685 ast_for_for_stmt(struct compiling *c, const node *n0, bool is_async)
3686 {
3687 const node * const n = is_async ? CHILD(n0, 1) : n0;
3688 asdl_seq *_target, *seq = NULL, *suite_seq;
3689 expr_ty expression;
3690 expr_ty target, first;
3691 const node *node_target;
3692 /* for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] */
3693 REQ(n, for_stmt);
3694
3695 if (NCH(n) == 9) {
3696 seq = ast_for_suite(c, CHILD(n, 8));
3697 if (!seq)
3698 return NULL;
3699 }
3700
3701 node_target = CHILD(n, 1);
3702 _target = ast_for_exprlist(c, node_target, Store);
3703 if (!_target)
3704 return NULL;
3705 /* Check the # of children rather than the length of _target, since
3706 for x, in ... has 1 element in _target, but still requires a Tuple. */
3707 first = (expr_ty)asdl_seq_GET(_target, 0);
3708 if (NCH(node_target) == 1)
3709 target = first;
3710 else
3711 target = Tuple(_target, Store, first->lineno, first->col_offset, c->c_arena);
3712
3713 expression = ast_for_testlist(c, CHILD(n, 3));
3714 if (!expression)
3715 return NULL;
3716 suite_seq = ast_for_suite(c, CHILD(n, 5));
3717 if (!suite_seq)
3718 return NULL;
3719
3720 if (is_async)
3721 return AsyncFor(target, expression, suite_seq, seq,
3722 LINENO(n0), n0->n_col_offset,
3723 c->c_arena);
3724 else
3725 return For(target, expression, suite_seq, seq,
3726 LINENO(n), n->n_col_offset,
3727 c->c_arena);
3728 }
3729
3730 static excepthandler_ty
ast_for_except_clause(struct compiling * c,const node * exc,node * body)3731 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
3732 {
3733 /* except_clause: 'except' [test ['as' test]] */
3734 REQ(exc, except_clause);
3735 REQ(body, suite);
3736
3737 if (NCH(exc) == 1) {
3738 asdl_seq *suite_seq = ast_for_suite(c, body);
3739 if (!suite_seq)
3740 return NULL;
3741
3742 return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
3743 exc->n_col_offset, c->c_arena);
3744 }
3745 else if (NCH(exc) == 2) {
3746 expr_ty expression;
3747 asdl_seq *suite_seq;
3748
3749 expression = ast_for_expr(c, CHILD(exc, 1));
3750 if (!expression)
3751 return NULL;
3752 suite_seq = ast_for_suite(c, body);
3753 if (!suite_seq)
3754 return NULL;
3755
3756 return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
3757 exc->n_col_offset, c->c_arena);
3758 }
3759 else if (NCH(exc) == 4) {
3760 asdl_seq *suite_seq;
3761 expr_ty expression;
3762 identifier e = NEW_IDENTIFIER(CHILD(exc, 3));
3763 if (!e)
3764 return NULL;
3765 if (forbidden_name(c, e, CHILD(exc, 3), 0))
3766 return NULL;
3767 expression = ast_for_expr(c, CHILD(exc, 1));
3768 if (!expression)
3769 return NULL;
3770 suite_seq = ast_for_suite(c, body);
3771 if (!suite_seq)
3772 return NULL;
3773
3774 return ExceptHandler(expression, e, suite_seq, LINENO(exc),
3775 exc->n_col_offset, c->c_arena);
3776 }
3777
3778 PyErr_Format(PyExc_SystemError,
3779 "wrong number of children for 'except' clause: %d",
3780 NCH(exc));
3781 return NULL;
3782 }
3783
3784 static stmt_ty
ast_for_try_stmt(struct compiling * c,const node * n)3785 ast_for_try_stmt(struct compiling *c, const node *n)
3786 {
3787 const int nch = NCH(n);
3788 int n_except = (nch - 3)/3;
3789 asdl_seq *body, *handlers = NULL, *orelse = NULL, *finally = NULL;
3790
3791 REQ(n, try_stmt);
3792
3793 body = ast_for_suite(c, CHILD(n, 2));
3794 if (body == NULL)
3795 return NULL;
3796
3797 if (TYPE(CHILD(n, nch - 3)) == NAME) {
3798 if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
3799 if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
3800 /* we can assume it's an "else",
3801 because nch >= 9 for try-else-finally and
3802 it would otherwise have a type of except_clause */
3803 orelse = ast_for_suite(c, CHILD(n, nch - 4));
3804 if (orelse == NULL)
3805 return NULL;
3806 n_except--;
3807 }
3808
3809 finally = ast_for_suite(c, CHILD(n, nch - 1));
3810 if (finally == NULL)
3811 return NULL;
3812 n_except--;
3813 }
3814 else {
3815 /* we can assume it's an "else",
3816 otherwise it would have a type of except_clause */
3817 orelse = ast_for_suite(c, CHILD(n, nch - 1));
3818 if (orelse == NULL)
3819 return NULL;
3820 n_except--;
3821 }
3822 }
3823 else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
3824 ast_error(c, n, "malformed 'try' statement");
3825 return NULL;
3826 }
3827
3828 if (n_except > 0) {
3829 int i;
3830 /* process except statements to create a try ... except */
3831 handlers = _Py_asdl_seq_new(n_except, c->c_arena);
3832 if (handlers == NULL)
3833 return NULL;
3834
3835 for (i = 0; i < n_except; i++) {
3836 excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
3837 CHILD(n, 5 + i * 3));
3838 if (!e)
3839 return NULL;
3840 asdl_seq_SET(handlers, i, e);
3841 }
3842 }
3843
3844 assert(finally != NULL || asdl_seq_LEN(handlers));
3845 return Try(body, handlers, orelse, finally, LINENO(n), n->n_col_offset, c->c_arena);
3846 }
3847
3848 /* with_item: test ['as' expr] */
3849 static withitem_ty
ast_for_with_item(struct compiling * c,const node * n)3850 ast_for_with_item(struct compiling *c, const node *n)
3851 {
3852 expr_ty context_expr, optional_vars = NULL;
3853
3854 REQ(n, with_item);
3855 context_expr = ast_for_expr(c, CHILD(n, 0));
3856 if (!context_expr)
3857 return NULL;
3858 if (NCH(n) == 3) {
3859 optional_vars = ast_for_expr(c, CHILD(n, 2));
3860
3861 if (!optional_vars) {
3862 return NULL;
3863 }
3864 if (!set_context(c, optional_vars, Store, n)) {
3865 return NULL;
3866 }
3867 }
3868
3869 return withitem(context_expr, optional_vars, c->c_arena);
3870 }
3871
3872 /* with_stmt: 'with' with_item (',' with_item)* ':' suite */
3873 static stmt_ty
ast_for_with_stmt(struct compiling * c,const node * n0,bool is_async)3874 ast_for_with_stmt(struct compiling *c, const node *n0, bool is_async)
3875 {
3876 const node * const n = is_async ? CHILD(n0, 1) : n0;
3877 int i, n_items;
3878 asdl_seq *items, *body;
3879
3880 REQ(n, with_stmt);
3881
3882 n_items = (NCH(n) - 2) / 2;
3883 items = _Py_asdl_seq_new(n_items, c->c_arena);
3884 if (!items)
3885 return NULL;
3886 for (i = 1; i < NCH(n) - 2; i += 2) {
3887 withitem_ty item = ast_for_with_item(c, CHILD(n, i));
3888 if (!item)
3889 return NULL;
3890 asdl_seq_SET(items, (i - 1) / 2, item);
3891 }
3892
3893 body = ast_for_suite(c, CHILD(n, NCH(n) - 1));
3894 if (!body)
3895 return NULL;
3896
3897 if (is_async)
3898 return AsyncWith(items, body, LINENO(n0), n0->n_col_offset, c->c_arena);
3899 else
3900 return With(items, body, LINENO(n), n->n_col_offset, c->c_arena);
3901 }
3902
3903 static stmt_ty
ast_for_classdef(struct compiling * c,const node * n,asdl_seq * decorator_seq)3904 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
3905 {
3906 /* classdef: 'class' NAME ['(' arglist ')'] ':' suite */
3907 PyObject *classname;
3908 asdl_seq *s;
3909 expr_ty call;
3910
3911 REQ(n, classdef);
3912
3913 if (NCH(n) == 4) { /* class NAME ':' suite */
3914 s = ast_for_suite(c, CHILD(n, 3));
3915 if (!s)
3916 return NULL;
3917 classname = NEW_IDENTIFIER(CHILD(n, 1));
3918 if (!classname)
3919 return NULL;
3920 if (forbidden_name(c, classname, CHILD(n, 3), 0))
3921 return NULL;
3922 return ClassDef(classname, NULL, NULL, s, decorator_seq,
3923 LINENO(n), n->n_col_offset, c->c_arena);
3924 }
3925
3926 if (TYPE(CHILD(n, 3)) == RPAR) { /* class NAME '(' ')' ':' suite */
3927 s = ast_for_suite(c, CHILD(n, 5));
3928 if (!s)
3929 return NULL;
3930 classname = NEW_IDENTIFIER(CHILD(n, 1));
3931 if (!classname)
3932 return NULL;
3933 if (forbidden_name(c, classname, CHILD(n, 3), 0))
3934 return NULL;
3935 return ClassDef(classname, NULL, NULL, s, decorator_seq,
3936 LINENO(n), n->n_col_offset, c->c_arena);
3937 }
3938
3939 /* class NAME '(' arglist ')' ':' suite */
3940 /* build up a fake Call node so we can extract its pieces */
3941 {
3942 PyObject *dummy_name;
3943 expr_ty dummy;
3944 dummy_name = NEW_IDENTIFIER(CHILD(n, 1));
3945 if (!dummy_name)
3946 return NULL;
3947 dummy = Name(dummy_name, Load, LINENO(n), n->n_col_offset, c->c_arena);
3948 call = ast_for_call(c, CHILD(n, 3), dummy, false);
3949 if (!call)
3950 return NULL;
3951 }
3952 s = ast_for_suite(c, CHILD(n, 6));
3953 if (!s)
3954 return NULL;
3955 classname = NEW_IDENTIFIER(CHILD(n, 1));
3956 if (!classname)
3957 return NULL;
3958 if (forbidden_name(c, classname, CHILD(n, 1), 0))
3959 return NULL;
3960
3961 return ClassDef(classname, call->v.Call.args, call->v.Call.keywords, s,
3962 decorator_seq, LINENO(n), n->n_col_offset, c->c_arena);
3963 }
3964
3965 static stmt_ty
ast_for_stmt(struct compiling * c,const node * n)3966 ast_for_stmt(struct compiling *c, const node *n)
3967 {
3968 if (TYPE(n) == stmt) {
3969 assert(NCH(n) == 1);
3970 n = CHILD(n, 0);
3971 }
3972 if (TYPE(n) == simple_stmt) {
3973 assert(num_stmts(n) == 1);
3974 n = CHILD(n, 0);
3975 }
3976 if (TYPE(n) == small_stmt) {
3977 n = CHILD(n, 0);
3978 /* small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt
3979 | import_stmt | global_stmt | nonlocal_stmt | assert_stmt
3980 */
3981 switch (TYPE(n)) {
3982 case expr_stmt:
3983 return ast_for_expr_stmt(c, n);
3984 case del_stmt:
3985 return ast_for_del_stmt(c, n);
3986 case pass_stmt:
3987 return Pass(LINENO(n), n->n_col_offset, c->c_arena);
3988 case flow_stmt:
3989 return ast_for_flow_stmt(c, n);
3990 case import_stmt:
3991 return ast_for_import_stmt(c, n);
3992 case global_stmt:
3993 return ast_for_global_stmt(c, n);
3994 case nonlocal_stmt:
3995 return ast_for_nonlocal_stmt(c, n);
3996 case assert_stmt:
3997 return ast_for_assert_stmt(c, n);
3998 default:
3999 PyErr_Format(PyExc_SystemError,
4000 "unhandled small_stmt: TYPE=%d NCH=%d\n",
4001 TYPE(n), NCH(n));
4002 return NULL;
4003 }
4004 }
4005 else {
4006 /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
4007 | funcdef | classdef | decorated | async_stmt
4008 */
4009 node *ch = CHILD(n, 0);
4010 REQ(n, compound_stmt);
4011 switch (TYPE(ch)) {
4012 case if_stmt:
4013 return ast_for_if_stmt(c, ch);
4014 case while_stmt:
4015 return ast_for_while_stmt(c, ch);
4016 case for_stmt:
4017 return ast_for_for_stmt(c, ch, 0);
4018 case try_stmt:
4019 return ast_for_try_stmt(c, ch);
4020 case with_stmt:
4021 return ast_for_with_stmt(c, ch, 0);
4022 case funcdef:
4023 return ast_for_funcdef(c, ch, NULL);
4024 case classdef:
4025 return ast_for_classdef(c, ch, NULL);
4026 case decorated:
4027 return ast_for_decorated(c, ch);
4028 case async_stmt:
4029 return ast_for_async_stmt(c, ch);
4030 default:
4031 PyErr_Format(PyExc_SystemError,
4032 "unhandled small_stmt: TYPE=%d NCH=%d\n",
4033 TYPE(n), NCH(n));
4034 return NULL;
4035 }
4036 }
4037 }
4038
4039 static PyObject *
parsenumber_raw(struct compiling * c,const char * s)4040 parsenumber_raw(struct compiling *c, const char *s)
4041 {
4042 const char *end;
4043 long x;
4044 double dx;
4045 Py_complex compl;
4046 int imflag;
4047
4048 assert(s != NULL);
4049 errno = 0;
4050 end = s + strlen(s) - 1;
4051 imflag = *end == 'j' || *end == 'J';
4052 if (s[0] == '0') {
4053 x = (long) PyOS_strtoul(s, (char **)&end, 0);
4054 if (x < 0 && errno == 0) {
4055 return PyLong_FromString(s, (char **)0, 0);
4056 }
4057 }
4058 else
4059 x = PyOS_strtol(s, (char **)&end, 0);
4060 if (*end == '\0') {
4061 if (errno != 0)
4062 return PyLong_FromString(s, (char **)0, 0);
4063 return PyLong_FromLong(x);
4064 }
4065 /* XXX Huge floats may silently fail */
4066 if (imflag) {
4067 compl.real = 0.;
4068 compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
4069 if (compl.imag == -1.0 && PyErr_Occurred())
4070 return NULL;
4071 return PyComplex_FromCComplex(compl);
4072 }
4073 else
4074 {
4075 dx = PyOS_string_to_double(s, NULL, NULL);
4076 if (dx == -1.0 && PyErr_Occurred())
4077 return NULL;
4078 return PyFloat_FromDouble(dx);
4079 }
4080 }
4081
4082 static PyObject *
parsenumber(struct compiling * c,const char * s)4083 parsenumber(struct compiling *c, const char *s)
4084 {
4085 char *dup, *end;
4086 PyObject *res = NULL;
4087
4088 assert(s != NULL);
4089
4090 if (strchr(s, '_') == NULL) {
4091 return parsenumber_raw(c, s);
4092 }
4093 /* Create a duplicate without underscores. */
4094 dup = PyMem_Malloc(strlen(s) + 1);
4095 if (dup == NULL) {
4096 return PyErr_NoMemory();
4097 }
4098 end = dup;
4099 for (; *s; s++) {
4100 if (*s != '_') {
4101 *end++ = *s;
4102 }
4103 }
4104 *end = '\0';
4105 res = parsenumber_raw(c, dup);
4106 PyMem_Free(dup);
4107 return res;
4108 }
4109
4110 static PyObject *
decode_utf8(struct compiling * c,const char ** sPtr,const char * end)4111 decode_utf8(struct compiling *c, const char **sPtr, const char *end)
4112 {
4113 const char *s, *t;
4114 t = s = *sPtr;
4115 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
4116 while (s < end && (*s & 0x80)) s++;
4117 *sPtr = s;
4118 return PyUnicode_DecodeUTF8(t, s - t, NULL);
4119 }
4120
4121 static int
warn_invalid_escape_sequence(struct compiling * c,const node * n,unsigned char first_invalid_escape_char)4122 warn_invalid_escape_sequence(struct compiling *c, const node *n,
4123 unsigned char first_invalid_escape_char)
4124 {
4125 PyObject *msg = PyUnicode_FromFormat("invalid escape sequence \\%c",
4126 first_invalid_escape_char);
4127 if (msg == NULL) {
4128 return -1;
4129 }
4130 if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg,
4131 c->c_filename, LINENO(n),
4132 NULL, NULL) < 0)
4133 {
4134 if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
4135 const char *s;
4136
4137 /* Replace the DeprecationWarning exception with a SyntaxError
4138 to get a more accurate error report */
4139 PyErr_Clear();
4140
4141 s = PyUnicode_AsUTF8(msg);
4142 if (s != NULL) {
4143 ast_error(c, n, s);
4144 }
4145 }
4146 Py_DECREF(msg);
4147 return -1;
4148 }
4149 Py_DECREF(msg);
4150 return 0;
4151 }
4152
4153 static PyObject *
decode_unicode_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4154 decode_unicode_with_escapes(struct compiling *c, const node *n, const char *s,
4155 size_t len)
4156 {
4157 PyObject *v, *u;
4158 char *buf;
4159 char *p;
4160 const char *end;
4161
4162 /* check for integer overflow */
4163 if (len > SIZE_MAX / 6)
4164 return NULL;
4165 /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
4166 "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
4167 u = PyBytes_FromStringAndSize((char *)NULL, len * 6);
4168 if (u == NULL)
4169 return NULL;
4170 p = buf = PyBytes_AsString(u);
4171 end = s + len;
4172 while (s < end) {
4173 if (*s == '\\') {
4174 *p++ = *s++;
4175 if (s >= end || *s & 0x80) {
4176 strcpy(p, "u005c");
4177 p += 5;
4178 if (s >= end)
4179 break;
4180 }
4181 }
4182 if (*s & 0x80) { /* XXX inefficient */
4183 PyObject *w;
4184 int kind;
4185 void *data;
4186 Py_ssize_t len, i;
4187 w = decode_utf8(c, &s, end);
4188 if (w == NULL) {
4189 Py_DECREF(u);
4190 return NULL;
4191 }
4192 kind = PyUnicode_KIND(w);
4193 data = PyUnicode_DATA(w);
4194 len = PyUnicode_GET_LENGTH(w);
4195 for (i = 0; i < len; i++) {
4196 Py_UCS4 chr = PyUnicode_READ(kind, data, i);
4197 sprintf(p, "\\U%08x", chr);
4198 p += 10;
4199 }
4200 /* Should be impossible to overflow */
4201 assert(p - buf <= PyBytes_GET_SIZE(u));
4202 Py_DECREF(w);
4203 } else {
4204 *p++ = *s++;
4205 }
4206 }
4207 len = p - buf;
4208 s = buf;
4209
4210 const char *first_invalid_escape;
4211 v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
4212
4213 if (v != NULL && first_invalid_escape != NULL) {
4214 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4215 /* We have not decref u before because first_invalid_escape points
4216 inside u. */
4217 Py_XDECREF(u);
4218 Py_DECREF(v);
4219 return NULL;
4220 }
4221 }
4222 Py_XDECREF(u);
4223 return v;
4224 }
4225
4226 static PyObject *
decode_bytes_with_escapes(struct compiling * c,const node * n,const char * s,size_t len)4227 decode_bytes_with_escapes(struct compiling *c, const node *n, const char *s,
4228 size_t len)
4229 {
4230 const char *first_invalid_escape;
4231 PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, 0, NULL,
4232 &first_invalid_escape);
4233 if (result == NULL)
4234 return NULL;
4235
4236 if (first_invalid_escape != NULL) {
4237 if (warn_invalid_escape_sequence(c, n, *first_invalid_escape) < 0) {
4238 Py_DECREF(result);
4239 return NULL;
4240 }
4241 }
4242 return result;
4243 }
4244
4245 /* Shift locations for the given node and all its children by adding `lineno`
4246 and `col_offset` to existing locations. */
fstring_shift_node_locations(node * n,int lineno,int col_offset)4247 static void fstring_shift_node_locations(node *n, int lineno, int col_offset)
4248 {
4249 n->n_col_offset = n->n_col_offset + col_offset;
4250 for (int i = 0; i < NCH(n); ++i) {
4251 if (n->n_lineno && n->n_lineno < CHILD(n, i)->n_lineno) {
4252 /* Shifting column offsets unnecessary if there's been newlines. */
4253 col_offset = 0;
4254 }
4255 fstring_shift_node_locations(CHILD(n, i), lineno, col_offset);
4256 }
4257 n->n_lineno = n->n_lineno + lineno;
4258 }
4259
4260 /* Fix locations for the given node and its children.
4261
4262 `parent` is the enclosing node.
4263 `n` is the node which locations are going to be fixed relative to parent.
4264 `expr_str` is the child node's string representation, including braces.
4265 */
4266 static void
fstring_fix_node_location(const node * parent,node * n,char * expr_str)4267 fstring_fix_node_location(const node *parent, node *n, char *expr_str)
4268 {
4269 char *substr = NULL;
4270 char *start;
4271 int lines = LINENO(parent) - 1;
4272 int cols = parent->n_col_offset;
4273 /* Find the full fstring to fix location information in `n`. */
4274 while (parent && parent->n_type != STRING)
4275 parent = parent->n_child;
4276 if (parent && parent->n_str) {
4277 substr = strstr(parent->n_str, expr_str);
4278 if (substr) {
4279 start = substr;
4280 while (start > parent->n_str) {
4281 if (start[0] == '\n')
4282 break;
4283 start--;
4284 }
4285 cols += substr - start;
4286 /* Fix lineno in mulitline strings. */
4287 while ((substr = strchr(substr + 1, '\n')))
4288 lines--;
4289 }
4290 }
4291 fstring_shift_node_locations(n, lines, cols);
4292 }
4293
4294 /* Compile this expression in to an expr_ty. Add parens around the
4295 expression, in order to allow leading spaces in the expression. */
4296 static expr_ty
fstring_compile_expr(const char * expr_start,const char * expr_end,struct compiling * c,const node * n)4297 fstring_compile_expr(const char *expr_start, const char *expr_end,
4298 struct compiling *c, const node *n)
4299
4300 {
4301 PyCompilerFlags cf;
4302 node *mod_n;
4303 mod_ty mod;
4304 char *str;
4305 Py_ssize_t len;
4306 const char *s;
4307
4308 assert(expr_end >= expr_start);
4309 assert(*(expr_start-1) == '{');
4310 assert(*expr_end == '}' || *expr_end == '!' || *expr_end == ':');
4311
4312 /* If the substring is all whitespace, it's an error. We need to catch this
4313 here, and not when we call PyParser_SimpleParseStringFlagsFilename,
4314 because turning the expression '' in to '()' would go from being invalid
4315 to valid. */
4316 for (s = expr_start; s != expr_end; s++) {
4317 char c = *s;
4318 /* The Python parser ignores only the following whitespace
4319 characters (\r already is converted to \n). */
4320 if (!(c == ' ' || c == '\t' || c == '\n' || c == '\f')) {
4321 break;
4322 }
4323 }
4324 if (s == expr_end) {
4325 ast_error(c, n, "f-string: empty expression not allowed");
4326 return NULL;
4327 }
4328
4329 len = expr_end - expr_start;
4330 /* Allocate 3 extra bytes: open paren, close paren, null byte. */
4331 str = PyMem_RawMalloc(len + 3);
4332 if (str == NULL) {
4333 PyErr_NoMemory();
4334 return NULL;
4335 }
4336
4337 str[0] = '(';
4338 memcpy(str+1, expr_start, len);
4339 str[len+1] = ')';
4340 str[len+2] = 0;
4341
4342 cf.cf_flags = PyCF_ONLY_AST;
4343 mod_n = PyParser_SimpleParseStringFlagsFilename(str, "<fstring>",
4344 Py_eval_input, 0);
4345 if (!mod_n) {
4346 PyMem_RawFree(str);
4347 return NULL;
4348 }
4349 /* Reuse str to find the correct column offset. */
4350 str[0] = '{';
4351 str[len+1] = '}';
4352 fstring_fix_node_location(n, mod_n, str);
4353 mod = PyAST_FromNode(mod_n, &cf, "<fstring>", c->c_arena);
4354 PyMem_RawFree(str);
4355 PyNode_Free(mod_n);
4356 if (!mod)
4357 return NULL;
4358 return mod->v.Expression.body;
4359 }
4360
4361 /* Return -1 on error.
4362
4363 Return 0 if we reached the end of the literal.
4364
4365 Return 1 if we haven't reached the end of the literal, but we want
4366 the caller to process the literal up to this point. Used for
4367 doubled braces.
4368 */
4369 static int
fstring_find_literal(const char ** str,const char * end,int raw,PyObject ** literal,int recurse_lvl,struct compiling * c,const node * n)4370 fstring_find_literal(const char **str, const char *end, int raw,
4371 PyObject **literal, int recurse_lvl,
4372 struct compiling *c, const node *n)
4373 {
4374 /* Get any literal string. It ends when we hit an un-doubled left
4375 brace (which isn't part of a unicode name escape such as
4376 "\N{EULER CONSTANT}"), or the end of the string. */
4377
4378 const char *s = *str;
4379 const char *literal_start = s;
4380 int result = 0;
4381
4382 assert(*literal == NULL);
4383 while (s < end) {
4384 char ch = *s++;
4385 if (!raw && ch == '\\' && s < end) {
4386 ch = *s++;
4387 if (ch == 'N') {
4388 if (s < end && *s++ == '{') {
4389 while (s < end && *s++ != '}') {
4390 }
4391 continue;
4392 }
4393 break;
4394 }
4395 if (ch == '{' && warn_invalid_escape_sequence(c, n, ch) < 0) {
4396 return -1;
4397 }
4398 }
4399 if (ch == '{' || ch == '}') {
4400 /* Check for doubled braces, but only at the top level. If
4401 we checked at every level, then f'{0:{3}}' would fail
4402 with the two closing braces. */
4403 if (recurse_lvl == 0) {
4404 if (s < end && *s == ch) {
4405 /* We're going to tell the caller that the literal ends
4406 here, but that they should continue scanning. But also
4407 skip over the second brace when we resume scanning. */
4408 *str = s + 1;
4409 result = 1;
4410 goto done;
4411 }
4412
4413 /* Where a single '{' is the start of a new expression, a
4414 single '}' is not allowed. */
4415 if (ch == '}') {
4416 *str = s - 1;
4417 ast_error(c, n, "f-string: single '}' is not allowed");
4418 return -1;
4419 }
4420 }
4421 /* We're either at a '{', which means we're starting another
4422 expression; or a '}', which means we're at the end of this
4423 f-string (for a nested format_spec). */
4424 s--;
4425 break;
4426 }
4427 }
4428 *str = s;
4429 assert(s <= end);
4430 assert(s == end || *s == '{' || *s == '}');
4431 done:
4432 if (literal_start != s) {
4433 if (raw)
4434 *literal = PyUnicode_DecodeUTF8Stateful(literal_start,
4435 s - literal_start,
4436 NULL, NULL);
4437 else
4438 *literal = decode_unicode_with_escapes(c, n, literal_start,
4439 s - literal_start);
4440 if (!*literal)
4441 return -1;
4442 }
4443 return result;
4444 }
4445
4446 /* Forward declaration because parsing is recursive. */
4447 static expr_ty
4448 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
4449 struct compiling *c, const node *n);
4450
4451 /* Parse the f-string at *str, ending at end. We know *str starts an
4452 expression (so it must be a '{'). Returns the FormattedValue node,
4453 which includes the expression, conversion character, and
4454 format_spec expression.
4455
4456 Note that I don't do a perfect job here: I don't make sure that a
4457 closing brace doesn't match an opening paren, for example. It
4458 doesn't need to error on all invalid expressions, just correctly
4459 find the end of all valid ones. Any errors inside the expression
4460 will be caught when we parse it later. */
4461 static int
fstring_find_expr(const char ** str,const char * end,int raw,int recurse_lvl,expr_ty * expression,struct compiling * c,const node * n)4462 fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
4463 expr_ty *expression, struct compiling *c, const node *n)
4464 {
4465 /* Return -1 on error, else 0. */
4466
4467 const char *expr_start;
4468 const char *expr_end;
4469 expr_ty simple_expression;
4470 expr_ty format_spec = NULL; /* Optional format specifier. */
4471 int conversion = -1; /* The conversion char. -1 if not specified. */
4472
4473 /* 0 if we're not in a string, else the quote char we're trying to
4474 match (single or double quote). */
4475 char quote_char = 0;
4476
4477 /* If we're inside a string, 1=normal, 3=triple-quoted. */
4478 int string_type = 0;
4479
4480 /* Keep track of nesting level for braces/parens/brackets in
4481 expressions. */
4482 Py_ssize_t nested_depth = 0;
4483
4484 /* Can only nest one level deep. */
4485 if (recurse_lvl >= 2) {
4486 ast_error(c, n, "f-string: expressions nested too deeply");
4487 return -1;
4488 }
4489
4490 /* The first char must be a left brace, or we wouldn't have gotten
4491 here. Skip over it. */
4492 assert(**str == '{');
4493 *str += 1;
4494
4495 expr_start = *str;
4496 for (; *str < end; (*str)++) {
4497 char ch;
4498
4499 /* Loop invariants. */
4500 assert(nested_depth >= 0);
4501 assert(*str >= expr_start && *str < end);
4502 if (quote_char)
4503 assert(string_type == 1 || string_type == 3);
4504 else
4505 assert(string_type == 0);
4506
4507 ch = **str;
4508 /* Nowhere inside an expression is a backslash allowed. */
4509 if (ch == '\\') {
4510 /* Error: can't include a backslash character, inside
4511 parens or strings or not. */
4512 ast_error(c, n, "f-string expression part "
4513 "cannot include a backslash");
4514 return -1;
4515 }
4516 if (quote_char) {
4517 /* We're inside a string. See if we're at the end. */
4518 /* This code needs to implement the same non-error logic
4519 as tok_get from tokenizer.c, at the letter_quote
4520 label. To actually share that code would be a
4521 nightmare. But, it's unlikely to change and is small,
4522 so duplicate it here. Note we don't need to catch all
4523 of the errors, since they'll be caught when parsing the
4524 expression. We just need to match the non-error
4525 cases. Thus we can ignore \n in single-quoted strings,
4526 for example. Or non-terminated strings. */
4527 if (ch == quote_char) {
4528 /* Does this match the string_type (single or triple
4529 quoted)? */
4530 if (string_type == 3) {
4531 if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
4532 /* We're at the end of a triple quoted string. */
4533 *str += 2;
4534 string_type = 0;
4535 quote_char = 0;
4536 continue;
4537 }
4538 } else {
4539 /* We're at the end of a normal string. */
4540 quote_char = 0;
4541 string_type = 0;
4542 continue;
4543 }
4544 }
4545 } else if (ch == '\'' || ch == '"') {
4546 /* Is this a triple quoted string? */
4547 if (*str+2 < end && *(*str+1) == ch && *(*str+2) == ch) {
4548 string_type = 3;
4549 *str += 2;
4550 } else {
4551 /* Start of a normal string. */
4552 string_type = 1;
4553 }
4554 /* Start looking for the end of the string. */
4555 quote_char = ch;
4556 } else if (ch == '[' || ch == '{' || ch == '(') {
4557 nested_depth++;
4558 } else if (nested_depth != 0 &&
4559 (ch == ']' || ch == '}' || ch == ')')) {
4560 nested_depth--;
4561 } else if (ch == '#') {
4562 /* Error: can't include a comment character, inside parens
4563 or not. */
4564 ast_error(c, n, "f-string expression part cannot include '#'");
4565 return -1;
4566 } else if (nested_depth == 0 &&
4567 (ch == '!' || ch == ':' || ch == '}')) {
4568 /* First, test for the special case of "!=". Since '=' is
4569 not an allowed conversion character, nothing is lost in
4570 this test. */
4571 if (ch == '!' && *str+1 < end && *(*str+1) == '=') {
4572 /* This isn't a conversion character, just continue. */
4573 continue;
4574 }
4575 /* Normal way out of this loop. */
4576 break;
4577 } else {
4578 /* Just consume this char and loop around. */
4579 }
4580 }
4581 expr_end = *str;
4582 /* If we leave this loop in a string or with mismatched parens, we
4583 don't care. We'll get a syntax error when compiling the
4584 expression. But, we can produce a better error message, so
4585 let's just do that.*/
4586 if (quote_char) {
4587 ast_error(c, n, "f-string: unterminated string");
4588 return -1;
4589 }
4590 if (nested_depth) {
4591 ast_error(c, n, "f-string: mismatched '(', '{', or '['");
4592 return -1;
4593 }
4594
4595 if (*str >= end)
4596 goto unexpected_end_of_string;
4597
4598 /* Compile the expression as soon as possible, so we show errors
4599 related to the expression before errors related to the
4600 conversion or format_spec. */
4601 simple_expression = fstring_compile_expr(expr_start, expr_end, c, n);
4602 if (!simple_expression)
4603 return -1;
4604
4605 /* Check for a conversion char, if present. */
4606 if (**str == '!') {
4607 *str += 1;
4608 if (*str >= end)
4609 goto unexpected_end_of_string;
4610
4611 conversion = **str;
4612 *str += 1;
4613
4614 /* Validate the conversion. */
4615 if (!(conversion == 's' || conversion == 'r'
4616 || conversion == 'a')) {
4617 ast_error(c, n, "f-string: invalid conversion character: "
4618 "expected 's', 'r', or 'a'");
4619 return -1;
4620 }
4621 }
4622
4623 /* Check for the format spec, if present. */
4624 if (*str >= end)
4625 goto unexpected_end_of_string;
4626 if (**str == ':') {
4627 *str += 1;
4628 if (*str >= end)
4629 goto unexpected_end_of_string;
4630
4631 /* Parse the format spec. */
4632 format_spec = fstring_parse(str, end, raw, recurse_lvl+1, c, n);
4633 if (!format_spec)
4634 return -1;
4635 }
4636
4637 if (*str >= end || **str != '}')
4638 goto unexpected_end_of_string;
4639
4640 /* We're at a right brace. Consume it. */
4641 assert(*str < end);
4642 assert(**str == '}');
4643 *str += 1;
4644
4645 /* And now create the FormattedValue node that represents this
4646 entire expression with the conversion and format spec. */
4647 *expression = FormattedValue(simple_expression, conversion,
4648 format_spec, LINENO(n), n->n_col_offset,
4649 c->c_arena);
4650 if (!*expression)
4651 return -1;
4652
4653 return 0;
4654
4655 unexpected_end_of_string:
4656 ast_error(c, n, "f-string: expecting '}'");
4657 return -1;
4658 }
4659
4660 /* Return -1 on error.
4661
4662 Return 0 if we have a literal (possible zero length) and an
4663 expression (zero length if at the end of the string.
4664
4665 Return 1 if we have a literal, but no expression, and we want the
4666 caller to call us again. This is used to deal with doubled
4667 braces.
4668
4669 When called multiple times on the string 'a{{b{0}c', this function
4670 will return:
4671
4672 1. the literal 'a{' with no expression, and a return value
4673 of 1. Despite the fact that there's no expression, the return
4674 value of 1 means we're not finished yet.
4675
4676 2. the literal 'b' and the expression '0', with a return value of
4677 0. The fact that there's an expression means we're not finished.
4678
4679 3. literal 'c' with no expression and a return value of 0. The
4680 combination of the return value of 0 with no expression means
4681 we're finished.
4682 */
4683 static int
fstring_find_literal_and_expr(const char ** str,const char * end,int raw,int recurse_lvl,PyObject ** literal,expr_ty * expression,struct compiling * c,const node * n)4684 fstring_find_literal_and_expr(const char **str, const char *end, int raw,
4685 int recurse_lvl, PyObject **literal,
4686 expr_ty *expression,
4687 struct compiling *c, const node *n)
4688 {
4689 int result;
4690
4691 assert(*literal == NULL && *expression == NULL);
4692
4693 /* Get any literal string. */
4694 result = fstring_find_literal(str, end, raw, literal, recurse_lvl, c, n);
4695 if (result < 0)
4696 goto error;
4697
4698 assert(result == 0 || result == 1);
4699
4700 if (result == 1)
4701 /* We have a literal, but don't look at the expression. */
4702 return 1;
4703
4704 if (*str >= end || **str == '}')
4705 /* We're at the end of the string or the end of a nested
4706 f-string: no expression. The top-level error case where we
4707 expect to be at the end of the string but we're at a '}' is
4708 handled later. */
4709 return 0;
4710
4711 /* We must now be the start of an expression, on a '{'. */
4712 assert(**str == '{');
4713
4714 if (fstring_find_expr(str, end, raw, recurse_lvl, expression, c, n) < 0)
4715 goto error;
4716
4717 return 0;
4718
4719 error:
4720 Py_CLEAR(*literal);
4721 return -1;
4722 }
4723
4724 #define EXPRLIST_N_CACHED 64
4725
4726 typedef struct {
4727 /* Incrementally build an array of expr_ty, so be used in an
4728 asdl_seq. Cache some small but reasonably sized number of
4729 expr_ty's, and then after that start dynamically allocating,
4730 doubling the number allocated each time. Note that the f-string
4731 f'{0}a{1}' contains 3 expr_ty's: 2 FormattedValue's, and one
4732 Str for the literal 'a'. So you add expr_ty's about twice as
4733 fast as you add exressions in an f-string. */
4734
4735 Py_ssize_t allocated; /* Number we've allocated. */
4736 Py_ssize_t size; /* Number we've used. */
4737 expr_ty *p; /* Pointer to the memory we're actually
4738 using. Will point to 'data' until we
4739 start dynamically allocating. */
4740 expr_ty data[EXPRLIST_N_CACHED];
4741 } ExprList;
4742
4743 #ifdef NDEBUG
4744 #define ExprList_check_invariants(l)
4745 #else
4746 static void
ExprList_check_invariants(ExprList * l)4747 ExprList_check_invariants(ExprList *l)
4748 {
4749 /* Check our invariants. Make sure this object is "live", and
4750 hasn't been deallocated. */
4751 assert(l->size >= 0);
4752 assert(l->p != NULL);
4753 if (l->size <= EXPRLIST_N_CACHED)
4754 assert(l->data == l->p);
4755 }
4756 #endif
4757
4758 static void
ExprList_Init(ExprList * l)4759 ExprList_Init(ExprList *l)
4760 {
4761 l->allocated = EXPRLIST_N_CACHED;
4762 l->size = 0;
4763
4764 /* Until we start allocating dynamically, p points to data. */
4765 l->p = l->data;
4766
4767 ExprList_check_invariants(l);
4768 }
4769
4770 static int
ExprList_Append(ExprList * l,expr_ty exp)4771 ExprList_Append(ExprList *l, expr_ty exp)
4772 {
4773 ExprList_check_invariants(l);
4774 if (l->size >= l->allocated) {
4775 /* We need to alloc (or realloc) the memory. */
4776 Py_ssize_t new_size = l->allocated * 2;
4777
4778 /* See if we've ever allocated anything dynamically. */
4779 if (l->p == l->data) {
4780 Py_ssize_t i;
4781 /* We're still using the cached data. Switch to
4782 alloc-ing. */
4783 l->p = PyMem_RawMalloc(sizeof(expr_ty) * new_size);
4784 if (!l->p)
4785 return -1;
4786 /* Copy the cached data into the new buffer. */
4787 for (i = 0; i < l->size; i++)
4788 l->p[i] = l->data[i];
4789 } else {
4790 /* Just realloc. */
4791 expr_ty *tmp = PyMem_RawRealloc(l->p, sizeof(expr_ty) * new_size);
4792 if (!tmp) {
4793 PyMem_RawFree(l->p);
4794 l->p = NULL;
4795 return -1;
4796 }
4797 l->p = tmp;
4798 }
4799
4800 l->allocated = new_size;
4801 assert(l->allocated == 2 * l->size);
4802 }
4803
4804 l->p[l->size++] = exp;
4805
4806 ExprList_check_invariants(l);
4807 return 0;
4808 }
4809
4810 static void
ExprList_Dealloc(ExprList * l)4811 ExprList_Dealloc(ExprList *l)
4812 {
4813 ExprList_check_invariants(l);
4814
4815 /* If there's been an error, or we've never dynamically allocated,
4816 do nothing. */
4817 if (!l->p || l->p == l->data) {
4818 /* Do nothing. */
4819 } else {
4820 /* We have dynamically allocated. Free the memory. */
4821 PyMem_RawFree(l->p);
4822 }
4823 l->p = NULL;
4824 l->size = -1;
4825 }
4826
4827 static asdl_seq *
ExprList_Finish(ExprList * l,PyArena * arena)4828 ExprList_Finish(ExprList *l, PyArena *arena)
4829 {
4830 asdl_seq *seq;
4831
4832 ExprList_check_invariants(l);
4833
4834 /* Allocate the asdl_seq and copy the expressions in to it. */
4835 seq = _Py_asdl_seq_new(l->size, arena);
4836 if (seq) {
4837 Py_ssize_t i;
4838 for (i = 0; i < l->size; i++)
4839 asdl_seq_SET(seq, i, l->p[i]);
4840 }
4841 ExprList_Dealloc(l);
4842 return seq;
4843 }
4844
4845 /* The FstringParser is designed to add a mix of strings and
4846 f-strings, and concat them together as needed. Ultimately, it
4847 generates an expr_ty. */
4848 typedef struct {
4849 PyObject *last_str;
4850 ExprList expr_list;
4851 int fmode;
4852 } FstringParser;
4853
4854 #ifdef NDEBUG
4855 #define FstringParser_check_invariants(state)
4856 #else
4857 static void
FstringParser_check_invariants(FstringParser * state)4858 FstringParser_check_invariants(FstringParser *state)
4859 {
4860 if (state->last_str)
4861 assert(PyUnicode_CheckExact(state->last_str));
4862 ExprList_check_invariants(&state->expr_list);
4863 }
4864 #endif
4865
4866 static void
FstringParser_Init(FstringParser * state)4867 FstringParser_Init(FstringParser *state)
4868 {
4869 state->last_str = NULL;
4870 state->fmode = 0;
4871 ExprList_Init(&state->expr_list);
4872 FstringParser_check_invariants(state);
4873 }
4874
4875 static void
FstringParser_Dealloc(FstringParser * state)4876 FstringParser_Dealloc(FstringParser *state)
4877 {
4878 FstringParser_check_invariants(state);
4879
4880 Py_XDECREF(state->last_str);
4881 ExprList_Dealloc(&state->expr_list);
4882 }
4883
4884 /* Make a Str node, but decref the PyUnicode object being added. */
4885 static expr_ty
make_str_node_and_del(PyObject ** str,struct compiling * c,const node * n)4886 make_str_node_and_del(PyObject **str, struct compiling *c, const node* n)
4887 {
4888 PyObject *s = *str;
4889 *str = NULL;
4890 assert(PyUnicode_CheckExact(s));
4891 if (PyArena_AddPyObject(c->c_arena, s) < 0) {
4892 Py_DECREF(s);
4893 return NULL;
4894 }
4895 return Str(s, LINENO(n), n->n_col_offset, c->c_arena);
4896 }
4897
4898 /* Add a non-f-string (that is, a regular literal string). str is
4899 decref'd. */
4900 static int
FstringParser_ConcatAndDel(FstringParser * state,PyObject * str)4901 FstringParser_ConcatAndDel(FstringParser *state, PyObject *str)
4902 {
4903 FstringParser_check_invariants(state);
4904
4905 assert(PyUnicode_CheckExact(str));
4906
4907 if (PyUnicode_GET_LENGTH(str) == 0) {
4908 Py_DECREF(str);
4909 return 0;
4910 }
4911
4912 if (!state->last_str) {
4913 /* We didn't have a string before, so just remember this one. */
4914 state->last_str = str;
4915 } else {
4916 /* Concatenate this with the previous string. */
4917 PyUnicode_AppendAndDel(&state->last_str, str);
4918 if (!state->last_str)
4919 return -1;
4920 }
4921 FstringParser_check_invariants(state);
4922 return 0;
4923 }
4924
4925 /* Parse an f-string. The f-string is in *str to end, with no
4926 'f' or quotes. */
4927 static int
FstringParser_ConcatFstring(FstringParser * state,const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)4928 FstringParser_ConcatFstring(FstringParser *state, const char **str,
4929 const char *end, int raw, int recurse_lvl,
4930 struct compiling *c, const node *n)
4931 {
4932 FstringParser_check_invariants(state);
4933 state->fmode = 1;
4934
4935 /* Parse the f-string. */
4936 while (1) {
4937 PyObject *literal = NULL;
4938 expr_ty expression = NULL;
4939
4940 /* If there's a zero length literal in front of the
4941 expression, literal will be NULL. If we're at the end of
4942 the f-string, expression will be NULL (unless result == 1,
4943 see below). */
4944 int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl,
4945 &literal, &expression,
4946 c, n);
4947 if (result < 0)
4948 return -1;
4949
4950 /* Add the literal, if any. */
4951 if (!literal) {
4952 /* Do nothing. Just leave last_str alone (and possibly
4953 NULL). */
4954 } else if (!state->last_str) {
4955 /* Note that the literal can be zero length, if the
4956 input string is "\\\n" or "\\\r", among others. */
4957 state->last_str = literal;
4958 literal = NULL;
4959 } else {
4960 /* We have a literal, concatenate it. */
4961 assert(PyUnicode_GET_LENGTH(literal) != 0);
4962 if (FstringParser_ConcatAndDel(state, literal) < 0)
4963 return -1;
4964 literal = NULL;
4965 }
4966
4967 /* We've dealt with the literal now. It can't be leaked on further
4968 errors. */
4969 assert(literal == NULL);
4970
4971 /* See if we should just loop around to get the next literal
4972 and expression, while ignoring the expression this
4973 time. This is used for un-doubling braces, as an
4974 optimization. */
4975 if (result == 1)
4976 continue;
4977
4978 if (!expression)
4979 /* We're done with this f-string. */
4980 break;
4981
4982 /* We know we have an expression. Convert any existing string
4983 to a Str node. */
4984 if (!state->last_str) {
4985 /* Do nothing. No previous literal. */
4986 } else {
4987 /* Convert the existing last_str literal to a Str node. */
4988 expr_ty str = make_str_node_and_del(&state->last_str, c, n);
4989 if (!str || ExprList_Append(&state->expr_list, str) < 0)
4990 return -1;
4991 }
4992
4993 if (ExprList_Append(&state->expr_list, expression) < 0)
4994 return -1;
4995 }
4996
4997 /* If recurse_lvl is zero, then we must be at the end of the
4998 string. Otherwise, we must be at a right brace. */
4999
5000 if (recurse_lvl == 0 && *str < end-1) {
5001 ast_error(c, n, "f-string: unexpected end of string");
5002 return -1;
5003 }
5004 if (recurse_lvl != 0 && **str != '}') {
5005 ast_error(c, n, "f-string: expecting '}'");
5006 return -1;
5007 }
5008
5009 FstringParser_check_invariants(state);
5010 return 0;
5011 }
5012
5013 /* Convert the partial state reflected in last_str and expr_list to an
5014 expr_ty. The expr_ty can be a Str, or a JoinedStr. */
5015 static expr_ty
FstringParser_Finish(FstringParser * state,struct compiling * c,const node * n)5016 FstringParser_Finish(FstringParser *state, struct compiling *c,
5017 const node *n)
5018 {
5019 asdl_seq *seq;
5020
5021 FstringParser_check_invariants(state);
5022
5023 /* If we're just a constant string with no expressions, return
5024 that. */
5025 if (!state->fmode) {
5026 assert(!state->expr_list.size);
5027 if (!state->last_str) {
5028 /* Create a zero length string. */
5029 state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
5030 if (!state->last_str)
5031 goto error;
5032 }
5033 return make_str_node_and_del(&state->last_str, c, n);
5034 }
5035
5036 /* Create a Str node out of last_str, if needed. It will be the
5037 last node in our expression list. */
5038 if (state->last_str) {
5039 expr_ty str = make_str_node_and_del(&state->last_str, c, n);
5040 if (!str || ExprList_Append(&state->expr_list, str) < 0)
5041 goto error;
5042 }
5043 /* This has already been freed. */
5044 assert(state->last_str == NULL);
5045
5046 seq = ExprList_Finish(&state->expr_list, c->c_arena);
5047 if (!seq)
5048 goto error;
5049
5050 return JoinedStr(seq, LINENO(n), n->n_col_offset, c->c_arena);
5051
5052 error:
5053 FstringParser_Dealloc(state);
5054 return NULL;
5055 }
5056
5057 /* Given an f-string (with no 'f' or quotes) that's in *str and ends
5058 at end, parse it into an expr_ty. Return NULL on error. Adjust
5059 str to point past the parsed portion. */
5060 static expr_ty
fstring_parse(const char ** str,const char * end,int raw,int recurse_lvl,struct compiling * c,const node * n)5061 fstring_parse(const char **str, const char *end, int raw, int recurse_lvl,
5062 struct compiling *c, const node *n)
5063 {
5064 FstringParser state;
5065
5066 FstringParser_Init(&state);
5067 if (FstringParser_ConcatFstring(&state, str, end, raw, recurse_lvl,
5068 c, n) < 0) {
5069 FstringParser_Dealloc(&state);
5070 return NULL;
5071 }
5072
5073 return FstringParser_Finish(&state, c, n);
5074 }
5075
5076 /* n is a Python string literal, including the bracketing quote
5077 characters, and r, b, u, &/or f prefixes (if any), and embedded
5078 escape sequences (if any). parsestr parses it, and sets *result to
5079 decoded Python string object. If the string is an f-string, set
5080 *fstr and *fstrlen to the unparsed string object. Return 0 if no
5081 errors occurred.
5082 */
5083 static int
parsestr(struct compiling * c,const node * n,int * bytesmode,int * rawmode,PyObject ** result,const char ** fstr,Py_ssize_t * fstrlen)5084 parsestr(struct compiling *c, const node *n, int *bytesmode, int *rawmode,
5085 PyObject **result, const char **fstr, Py_ssize_t *fstrlen)
5086 {
5087 size_t len;
5088 const char *s = STR(n);
5089 int quote = Py_CHARMASK(*s);
5090 int fmode = 0;
5091 *bytesmode = 0;
5092 *rawmode = 0;
5093 *result = NULL;
5094 *fstr = NULL;
5095 if (Py_ISALPHA(quote)) {
5096 while (!*bytesmode || !*rawmode) {
5097 if (quote == 'b' || quote == 'B') {
5098 quote = *++s;
5099 *bytesmode = 1;
5100 }
5101 else if (quote == 'u' || quote == 'U') {
5102 quote = *++s;
5103 }
5104 else if (quote == 'r' || quote == 'R') {
5105 quote = *++s;
5106 *rawmode = 1;
5107 }
5108 else if (quote == 'f' || quote == 'F') {
5109 quote = *++s;
5110 fmode = 1;
5111 }
5112 else {
5113 break;
5114 }
5115 }
5116 }
5117 if (fmode && *bytesmode) {
5118 PyErr_BadInternalCall();
5119 return -1;
5120 }
5121 if (quote != '\'' && quote != '\"') {
5122 PyErr_BadInternalCall();
5123 return -1;
5124 }
5125 /* Skip the leading quote char. */
5126 s++;
5127 len = strlen(s);
5128 if (len > INT_MAX) {
5129 PyErr_SetString(PyExc_OverflowError,
5130 "string to parse is too long");
5131 return -1;
5132 }
5133 if (s[--len] != quote) {
5134 /* Last quote char must match the first. */
5135 PyErr_BadInternalCall();
5136 return -1;
5137 }
5138 if (len >= 4 && s[0] == quote && s[1] == quote) {
5139 /* A triple quoted string. We've already skipped one quote at
5140 the start and one at the end of the string. Now skip the
5141 two at the start. */
5142 s += 2;
5143 len -= 2;
5144 /* And check that the last two match. */
5145 if (s[--len] != quote || s[--len] != quote) {
5146 PyErr_BadInternalCall();
5147 return -1;
5148 }
5149 }
5150
5151 if (fmode) {
5152 /* Just return the bytes. The caller will parse the resulting
5153 string. */
5154 *fstr = s;
5155 *fstrlen = len;
5156 return 0;
5157 }
5158
5159 /* Not an f-string. */
5160 /* Avoid invoking escape decoding routines if possible. */
5161 *rawmode = *rawmode || strchr(s, '\\') == NULL;
5162 if (*bytesmode) {
5163 /* Disallow non-ASCII characters. */
5164 const char *ch;
5165 for (ch = s; *ch; ch++) {
5166 if (Py_CHARMASK(*ch) >= 0x80) {
5167 ast_error(c, n, "bytes can only contain ASCII "
5168 "literal characters.");
5169 return -1;
5170 }
5171 }
5172 if (*rawmode)
5173 *result = PyBytes_FromStringAndSize(s, len);
5174 else
5175 *result = decode_bytes_with_escapes(c, n, s, len);
5176 } else {
5177 if (*rawmode)
5178 *result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
5179 else
5180 *result = decode_unicode_with_escapes(c, n, s, len);
5181 }
5182 return *result == NULL ? -1 : 0;
5183 }
5184
5185 /* Accepts a STRING+ atom, and produces an expr_ty node. Run through
5186 each STRING atom, and process it as needed. For bytes, just
5187 concatenate them together, and the result will be a Bytes node. For
5188 normal strings and f-strings, concatenate them together. The result
5189 will be a Str node if there were no f-strings; a FormattedValue
5190 node if there's just an f-string (with no leading or trailing
5191 literals), or a JoinedStr node if there are multiple f-strings or
5192 any literals involved. */
5193 static expr_ty
parsestrplus(struct compiling * c,const node * n)5194 parsestrplus(struct compiling *c, const node *n)
5195 {
5196 int bytesmode = 0;
5197 PyObject *bytes_str = NULL;
5198 int i;
5199
5200 FstringParser state;
5201 FstringParser_Init(&state);
5202
5203 for (i = 0; i < NCH(n); i++) {
5204 int this_bytesmode;
5205 int this_rawmode;
5206 PyObject *s;
5207 const char *fstr;
5208 Py_ssize_t fstrlen = -1; /* Silence a compiler warning. */
5209
5210 REQ(CHILD(n, i), STRING);
5211 if (parsestr(c, CHILD(n, i), &this_bytesmode, &this_rawmode, &s,
5212 &fstr, &fstrlen) != 0)
5213 goto error;
5214
5215 /* Check that we're not mixing bytes with unicode. */
5216 if (i != 0 && bytesmode != this_bytesmode) {
5217 ast_error(c, n, "cannot mix bytes and nonbytes literals");
5218 /* s is NULL if the current string part is an f-string. */
5219 Py_XDECREF(s);
5220 goto error;
5221 }
5222 bytesmode = this_bytesmode;
5223
5224 if (fstr != NULL) {
5225 int result;
5226 assert(s == NULL && !bytesmode);
5227 /* This is an f-string. Parse and concatenate it. */
5228 result = FstringParser_ConcatFstring(&state, &fstr, fstr+fstrlen,
5229 this_rawmode, 0, c, n);
5230 if (result < 0)
5231 goto error;
5232 } else {
5233 /* A string or byte string. */
5234 assert(s != NULL && fstr == NULL);
5235
5236 assert(bytesmode ? PyBytes_CheckExact(s) :
5237 PyUnicode_CheckExact(s));
5238
5239 if (bytesmode) {
5240 /* For bytes, concat as we go. */
5241 if (i == 0) {
5242 /* First time, just remember this value. */
5243 bytes_str = s;
5244 } else {
5245 PyBytes_ConcatAndDel(&bytes_str, s);
5246 if (!bytes_str)
5247 goto error;
5248 }
5249 } else {
5250 /* This is a regular string. Concatenate it. */
5251 if (FstringParser_ConcatAndDel(&state, s) < 0)
5252 goto error;
5253 }
5254 }
5255 }
5256 if (bytesmode) {
5257 /* Just return the bytes object and we're done. */
5258 if (PyArena_AddPyObject(c->c_arena, bytes_str) < 0)
5259 goto error;
5260 return Bytes(bytes_str, LINENO(n), n->n_col_offset, c->c_arena);
5261 }
5262
5263 /* We're not a bytes string, bytes_str should never have been set. */
5264 assert(bytes_str == NULL);
5265
5266 return FstringParser_Finish(&state, c, n);
5267
5268 error:
5269 Py_XDECREF(bytes_str);
5270 FstringParser_Dealloc(&state);
5271 return NULL;
5272 }
5273